1 module arsd.bbcode;
2 
3 import std..string;
4 import std.array : replace;
5 import arsd.dom; // BBCode is really tied to HTML, so we depend on it
6 
7 string bbCodeToText(string bbcode) {
8 	bbcode = bbcode.replace("\r\n", "\n");
9 
10 	BbCodeComponent bbc = parseBbCode(bbcode);
11 
12 	return bbc.toPlainText();
13 }
14 
15 string htmlToBbCode(Element htmlRoot) {
16 	string ret;
17 
18 	//if(htmlRoot.childNodes.length == 0)
19 		if(htmlRoot.nodeType == NodeType.Text)
20 			return htmlRoot.nodeValue().replace("\n", " ");
21 
22 	auto child = htmlRoot;
23 
24 	switch(child.tagName) {
25 		case "b":
26 		case "i":
27 		case "s":
28 		case "u":
29 			ret ~= "[" ~ child.tagName ~ "]";
30 			ret ~= child.innerText;
31 			ret ~= "[/" ~ child.tagName ~ "]";
32 		break;
33 		case "p":
34 			// whitespace is all the same for html......
35 
36 			string para = "\n\n";
37 			foreach(c; child.childNodes) {
38 				para ~= htmlToBbCode(c);
39 			}
40 			para ~= "\n\n";
41 
42 			para = para.squeeze(" "); // in HTML, whitespace is collapsed
43 
44 			ret ~= para;
45 
46 			//ret ~= "\n\n" ~ strip(child.innerText().replace("\r\n", "\n").replace("\n", " ")) ~ "\n\n";
47 		break;
48 		case "br":
49 			ret ~= "\n";
50 		break;
51 		case "blockquote":
52 			string attribute;
53 			string value;
54 			foreach(c; child.childNodes) {
55 				if(c.tagName == "cite")
56 					attribute = c.innerText().replace(" wrote:", "");
57 					attribute = attribute.strip;
58 					if(attribute.length && attribute[$-1] == ':')
59 						attribute = attribute[0..$-1];
60 				else {
61 					value ~= htmlToBbCode(c);
62 				}
63 			}
64 
65 			attribute = attribute.strip;
66 
67 			if(attribute.length) {
68 				ret ~= "[quote=\""~attribute~"\"]"~value~"[/quote]";
69 			} else {
70 				ret ~= "[quote]"~value~"[/quote]";
71 			}
72 		break;
73 		case "pre":
74 			if(indexOf(child.getAttribute("class"), "d_code") != -1) {
75 				ret ~= "[code]" ~ child.innerText ~ "[/code]";
76 				break;
77 			}
78 		/* fallthrough */
79 		default:
80 			foreach(c; htmlRoot.childNodes) {
81 				ret ~= htmlToBbCode(c);
82 			}
83 	}
84 
85 	// collapses the paragraphs splits...
86 	return ret.replace("\n\n\n\n", "\n\n");
87 }
88 
89 BbCodeComponent parseBbCode(string bbcode, BbCodeComponent delegate(string, string, ref string) spawner = null, int* consumed = null) {
90 	if(spawner is null) {
91 		BbCodeComponent delegateWrapper(string a, string b, ref string c) {
92 			return bbCodeDefaultSpawner(a, b, c);
93 		}
94 		spawner = &delegateWrapper;
95 	}
96 
97 	BbCodeComponent root = new BbCodeComponent;
98 
99 	int idx;
100 
101 	int loops;
102 
103 	int originalLength = bbcode.length;
104 
105 	while(bbcode.length) {
106 		loops++;
107 		if(loops > 15)
108 			assert(0, bbcode);
109 		idx = bbcode.indexOf("[");
110 		if(idx == -1) {
111 		    done:
112 			root.appendChild(new BbCodePlainText(bbcode));
113 			if(consumed !is null)
114 				*consumed = originalLength = bbcode.length;
115 			return root; // we're done, no more tags
116 		} else {
117 			root.appendChild(new BbCodePlainText(bbcode[0 .. idx]));
118 			bbcode = bbcode[idx .. $];
119 
120 			idx = bbcode.indexOf("]");
121 			if(idx == -1) // never closes; not bbcode anymore
122 				goto done;
123 
124 			auto equal = bbcode.indexOf("=");
125 
126 			string tagName;
127 			string attributeValue;
128 
129 			// it has an attribute
130 			if(equal != -1 && equal < idx) {
131 				tagName = bbcode[1 .. equal]; // skip the [
132 				attributeValue = bbcode[equal + 1 .. idx];
133 
134 				if(attributeValue[0] == '"' && attributeValue[$-1] == '"') // quoted value...
135 					attributeValue = attributeValue[1 .. $-1]; // ... gets chopped off
136 			} else {
137 				tagName = bbcode[1 .. idx];
138 			}
139 
140 			string storedData = bbcode[0 .. idx + 1]; // skip the closing ]
141 			bbcode = bbcode[idx + 1 .. $];
142 
143 			auto com = spawner(tagName, attributeValue, bbcode);
144 
145 			if(com is null) {
146 				root.appendChild(new BbCodePlainText(storedData)); // assume plain text
147 				continue;
148 			} else {
149 				root.appendChild(com);
150 			}
151 		}
152 	}
153 
154 	if(consumed !is null)
155 		*consumed = originalLength;
156 
157 	return root;
158 }
159 
160 
161 /// return null if you don't know what to do. You should consume data as you go, or leave it as it is if you fail
162 BbCodeComponent bbCodeDefaultSpawner(string tag, string attribute, ref string data) {
163 	switch(tag) {
164 		// inline elements
165 		case "b":
166 		case "i":
167 		case "u":
168 		case "s":
169 			if(attribute.length)
170 				return null;
171 
172 			auto idx = data.indexOf("[/"~tag~"]");
173 			if(idx == -1) // they can't nest
174 				return null;
175 
176 			// nor do they have children aside from text
177 
178 			BbCodeTagWithChildren e;
179 			switch(tag) {
180 				case "b": e = new BbCodeBold; break;
181 				case "i": e = new BbCodeItalic; break;
182 				default: e = new BbCodeTagWithChildren;
183 			}
184 
185 			assert(e !is null);
186 
187 			e.tagName = tag;
188 			e.appendChild(new BbCodePlainText(data[0 .. idx]));
189 			data = data[idx + ("[/"~tag~"]").length .. $];
190 
191 			return e;
192 		break;
193 		case "hr":
194 			auto e = new BbCodeHr;
195 			e.tagName = "hr";
196 
197 			return e;
198 		break;
199 
200 		// nestable blocks
201 		case "section":
202 		case "quote":
203 			auto idx = findNestedCloser(data, tag);
204 			if(idx == -1)
205 				return null;
206 
207 			auto parsed = parseBbCode(data[0.. idx], null);
208 
209 			data = data[idx + ("[/"~tag~"]").length .. $];
210 
211 			BbCodeComponent e;
212 			
213 			switch(tag) {
214 				default: assert(0);
215 				case "quote": e = new BbCodeQuote; break;
216 				case "section": e = new BbCodeSection; break;
217 			}
218 
219 			e.tagName = tag;
220 			e.attributeValue = attribute;
221 
222 			foreach(p; parsed.children)
223 				e.appendChild(p);
224 
225 			return e;
226 		break;
227 
228 		// non-parsed blocks
229 		case "code":
230 			auto idx = data.indexOf("[/" ~ tag ~ "]");
231 			if(idx == -1)
232 				return null;
233 
234 			auto e = new BbCodeCode;
235 			e.tagName = tag;
236 			e.value = data[0 .. idx];
237 
238 			data = data[idx + ("[/" ~ tag ~ "]").length .. $];
239 
240 			return e;
241 		break;
242 		default:
243 			return null;
244 	}
245 
246 	return null;
247 }
248 
249 int findNestedCloser(string data, string tag) {
250 	int itemsOpen = 1;
251 
252 	int idxOpener, idxCloser;
253 
254 	int startingFrom = 0;
255 
256    more:
257    	assert(startingFrom >= 0);
258 	idxOpener = data[startingFrom .. $].indexOf("[" ~ tag);
259 	idxCloser = data[startingFrom .. $].indexOf("[/" ~ tag ~ "]");
260 
261 	if(idxCloser == -1)
262 		return -1; //  no closer means we're done
263 
264 	if(idxOpener == -1)
265 		idxOpener = int.max; // didn't find an opener... pretend it is infinitely long
266 
267 	if(idxCloser < idxOpener) {
268 		// encountered a closing tag
269 		itemsOpen --;
270 		if(itemsOpen == 0)
271 			return idxCloser + startingFrom; // it's our closer!
272 		if(itemsOpen < 0)
273 			assert(0, "too many closers");
274 //			return -1; // wtf too many closers?
275 
276 		// there was something else... let's skip past it
277 		startingFrom = idxCloser + startingFrom + 1;
278 	} else {
279 		// encountered an opening tag
280 		itemsOpen++;
281 		startingFrom = idxOpener + startingFrom + 1;
282 	}
283 
284 	goto more;
285 }
286 import std.stdio;
287 
288 class BbCodeSection : BbCodeTagWithChildren {
289 	override bool isBlock() { return true; }
290 
291 	override string toPlainText() {
292 		string ret;
293 		if(attributeValue.length) {
294 			ret = attributeValue;
295 			switch(nestingLevel) {
296 				case 0:
297 				ret ~= "\n==================================\n";
298 				break;
299 				case 1:
300 				ret ~= "\n----------------------------------\n";
301 				break;
302 				default:
303 					ret = ret.toUpper ~ "\n";
304 			}
305 		}
306 
307 		ret ~= super.toPlainText();
308 
309 		return ret;
310 	}
311 }
312 
313 class BbCodeCode : BbCodeComponent {
314 	string value;
315 
316 	override bool isBlock() { return true; }
317 	override string toPlainText() {
318 		string ret;
319 
320 		ret ~= "/* **************** */\n";
321 		ret ~= value;
322 		ret ~= "\n/* **************** */";
323 
324 		return ret;
325 	}
326 }
327 
328 class BbCodeQuote : BbCodeTagWithChildren {
329 	override bool isBlock() { return true; }
330 	override string toPlainText() {
331 		string c = ">";
332 		foreach(i; 0 .. nestingLevel())
333 			c ~= ">";
334 
335 		c ~= " ";
336 
337 		string ret;
338 
339 		if(attributeValue.length)
340 			ret = attributeValue ~ " wrote:\n";
341 
342 		ret ~= replace(
343 			c ~ super.toPlainText().strip,
344 			"\n", "\n" ~ c);
345 
346 		ret = ret.replace("> >", ">");
347 
348 		return ret;
349 	}
350 }
351 
352 class BbCodeBold : BbCodeTagWithChildren {
353 	override string toPlainText() {
354 		return "*" ~ super.toPlainText() ~ "*";
355 	}
356 }
357 class BbCodeItalic : BbCodeTagWithChildren {
358 	override string toPlainText() {
359 		return "/" ~ super.toPlainText() ~ "/";
360 	}
361 }
362 
363 class BbCodeHr : BbCodeNonClosedTag {
364 	override string toPlainText() {
365 		return
366 `                   ----------------------------------                `;
367 	}
368 
369 	override bool isBlock() { return true; }
370 }
371 
372 /// This is the base of a parsed BBCode group.
373 /// Various types of bbcode tags should be subclasses of it
374 class BbCodeComponent {
375 	this() {
376 
377 	}
378 	int nestingLevel() {
379 		int nesting = 0;
380 
381 		BbCodeComponent e = parent;
382 		while(e) {
383 			if(e.tagName == tagName)
384 				nesting++;
385 			e = e.parent;
386 		}
387 		return nesting;
388 	}
389 
390 
391 	bool setBlock;
392 
393 	bool isBlock() { return setBlock; }
394 
395 	invariant() {
396 		foreach(child; children) {
397 			assert(child !is null);
398 		}
399 	}
400 
401 	BbCodeComponent appendChild(BbCodeComponent c) {
402 		children ~= c;
403 		c.parent = this;
404 		return c;
405 	}
406 
407 	BbCodeComponent[] children;
408 	BbCodeComponent parent;
409 	string tagName;
410 	string attributeValue;
411 
412 	/// outputs its own source code
413 	override string toString() {
414 		string ret;
415 
416 		foreach(child; children) {
417 			ret ~= child.toString();
418 		}
419 
420 		return ret;
421 	}
422 
423 	/// Outputs email style text
424 	string toPlainText() {
425 		string ret;
426 
427 		bool lastWasBlock  = false;
428 
429 		foreach(child; children) {
430 			string n = child.toPlainText();
431 
432 			if(child.isBlock() || lastWasBlock) {
433 				n = n.strip;
434 				ret = ret.strip;
435 
436 				ret ~= "\n\n";
437 
438 				// blocks must be preceeded
439 				// and followed by lines
440 				//if(ret[$-1] != '\n' && n[0] != '\n')
441 					//ret ~= "\n";
442 
443 				lastWasBlock = child.isBlock();
444 			}
445 
446 			ret ~= n;
447 		}
448 
449 		return ret;
450 	}
451 
452 	/// converts itself to an HTML element
453 	Element toHtml(Document document) {
454 		Element container = document.createElement("div");
455 
456 		foreach(child; children) {
457 			container.appendChild(child.toHtml(document));
458 		}
459 
460 		return container;
461 	}
462 }
463 
464 /// Just plain text inside the bbcode. If something doesn't parse as anything
465 /// else, it should fall back on this.
466 class BbCodePlainText : BbCodeComponent {
467 	string value;
468 
469 	this(string txt) {
470 		value = txt;
471 	}
472 
473 	invariant () {
474 		assert(this.children.length == 0);
475 		assert(this.tagName == "");
476 	}
477 
478 	override string toString() {
479 		return value;
480 	}
481 
482 	override string toPlainText() {
483 		string ret;
484 
485 		foreach(paragraph; value.split("\n")) {
486 			if(paragraph.length == 0)
487 				ret ~= "\n";
488 			else
489 				ret ~= paragraph.wrap(74);
490 
491 			// just a sanity check
492 			if(ret[$-1] != '\n')
493 				ret ~= '\n';
494 		}
495 
496 		return ret;
497 	}
498 
499 	override Element toHtml(Document document) {
500 		return document.createTextNode(value);
501 	}
502 }
503 
504 /// A "tag" that doesn't close. For example, "[hr]".
505 class BbCodeNonClosedTag : BbCodeComponent {
506 	invariant() {
507 		assert(this.children.length == 0);
508 	}
509 
510 	override string toString() {
511 		return format("[%s%s]", tagName, attributeValue.length ?
512 			("=\"" ~ attributeValue ~ "\"")
513 			: "");
514 	}
515 
516 	/// Override me!
517 	override string toPlainText() {
518 		return "";
519 	}
520 
521 	/// Override me if the attribute is meaningful or the tag names don't match!
522 	override Element toHtml(Document document) {
523 		return document.createElement(tagName);
524 	}
525 }
526 
527 /// A tag that requires a closing tag and may have children, such as [p]child[/p]
528 class BbCodeTagWithChildren : BbCodeComponent {
529 	override string toString() {
530 		string ret = format("[%s%s]", tagName, attributeValue.length ?
531 			("=\"" ~ attributeValue ~ "\"")
532 			: "");
533 
534 		foreach(child; children)
535 			ret ~= child.toString();
536 
537 		ret ~= "[/" ~ tagName ~ "]";
538 
539 		return ret;
540 	}
541 
542 	override Element toHtml(Document document) {
543 		auto e = document.createElement(tagName);
544 		foreach(child; children)
545 			e.appendChild(child.toHtml(document));
546 		return e;
547 	}
548 }
Suggestion Box / Bug Report