1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource { 98 /// Convenience method for web scraping. Requires [arsd.http2] to be 99 /// included in the build as well as [arsd.characterencodings]. 100 static Document fromUrl()(string url, bool strictMode = false) { 101 import arsd.http2; 102 auto client = new HttpClient(); 103 104 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 105 auto res = req.waitForCompletion(); 106 107 auto document = new Document(); 108 if(strictMode) { 109 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 110 } else { 111 document.parseGarbage(cast(string) res.content); 112 } 113 114 return document; 115 } 116 117 ///. 118 this(string data, bool caseSensitive = false, bool strict = false) { 119 parseUtf8(data, caseSensitive, strict); 120 } 121 122 /** 123 Creates an empty document. It has *nothing* in it at all. 124 */ 125 this() { 126 127 } 128 129 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 130 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 131 /// can chain it. 132 /// 133 /// Example: document["p"].innerText("hello").addClass("modified"); 134 /// 135 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 136 /// 137 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 138 /// 139 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 140 /// you could put in some kind of custom filter function tho. 141 ElementCollection opIndex(string selector) { 142 auto e = ElementCollection(this.root); 143 return e[selector]; 144 } 145 146 string _contentType = "text/html; charset=utf-8"; 147 148 /// If you're using this for some other kind of XML, you can 149 /// set the content type here. 150 /// 151 /// Note: this has no impact on the function of this class. 152 /// It is only used if the document is sent via a protocol like HTTP. 153 /// 154 /// This may be called by parse() if it recognizes the data. Otherwise, 155 /// if you don't set it, it assumes text/html; charset=utf-8. 156 @property string contentType(string mimeType) { 157 _contentType = mimeType; 158 return _contentType; 159 } 160 161 /// implementing the FileResource interface, useful for sending via 162 /// http automatically. 163 @property string filename() const { return null; } 164 165 /// implementing the FileResource interface, useful for sending via 166 /// http automatically. 167 override @property string contentType() const { 168 return _contentType; 169 } 170 171 /// implementing the FileResource interface; it calls toString. 172 override immutable(ubyte)[] getData() const { 173 return cast(immutable(ubyte)[]) this.toString(); 174 } 175 176 177 /// Concatenates any consecutive text nodes 178 /* 179 void normalize() { 180 181 } 182 */ 183 184 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 185 /// Call this before calling parse(). 186 187 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 188 void enableAddingSpecialTagsToDom() { 189 parseSawComment = (string) => true; 190 parseSawAspCode = (string) => true; 191 parseSawPhpCode = (string) => true; 192 parseSawQuestionInstruction = (string) => true; 193 parseSawBangInstruction = (string) => true; 194 } 195 196 /// If the parser sees a html comment, it will call this callback 197 /// <!-- comment --> will call parseSawComment(" comment ") 198 /// Return true if you want the node appended to the document. 199 bool delegate(string) parseSawComment; 200 201 /// If the parser sees <% asp code... %>, it will call this callback. 202 /// It will be passed "% asp code... %" or "%= asp code .. %" 203 /// Return true if you want the node appended to the document. 204 bool delegate(string) parseSawAspCode; 205 206 /// If the parser sees <?php php code... ?>, it will call this callback. 207 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 208 /// Note: dom.d cannot identify the other php <? code ?> short format. 209 /// Return true if you want the node appended to the document. 210 bool delegate(string) parseSawPhpCode; 211 212 /// if it sees a <?xxx> that is not php or asp 213 /// it calls this function with the contents. 214 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 215 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 216 /// Return true if you want the node appended to the document. 217 bool delegate(string) parseSawQuestionInstruction; 218 219 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 220 /// it calls this function with the contents. 221 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 222 /// Return true if you want the node appended to the document. 223 bool delegate(string) parseSawBangInstruction; 224 225 /// Given the kind of garbage you find on the Internet, try to make sense of it. 226 /// Equivalent to document.parse(data, false, false, null); 227 /// (Case-insensitive, non-strict, determine character encoding from the data.) 228 229 /// NOTE: this makes no attempt at added security. 230 /// 231 /// It is a template so it lazily imports characterencodings. 232 void parseGarbage()(string data) { 233 parse(data, false, false, null); 234 } 235 236 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 237 /// Will throw exceptions on things like unclosed tags. 238 void parseStrict(string data) { 239 parseStream(toUtf8Stream(data), true, true); 240 } 241 242 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 243 /// tag soup, but does NOT try to correct bad character encodings. 244 /// 245 /// They will still throw an exception. 246 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 247 parseStream(toUtf8Stream(data), caseSensitive, strict); 248 } 249 250 // this is a template so we get lazy import behavior 251 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 252 import arsd.characterencodings; 253 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 254 if(dataEncoding is null) { 255 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 256 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 257 // Now, XML and HTML can both list encoding in the document, but we can't really parse 258 // it here without changing a lot of code until we know the encoding. So I'm going to 259 // do some hackish string checking. 260 if(dataEncoding is null) { 261 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 262 // first, look for an XML prolog 263 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 264 if(idx != -1) { 265 idx += "encoding=\"".length; 266 // we're probably past the prolog if it's this far in; we might be looking at 267 // content. Forget about it. 268 if(idx > 100) 269 idx = -1; 270 } 271 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 272 if(idx == -1) { 273 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 274 if(idx != -1) { 275 idx += "charset=".length; 276 if(dataAsBytes[idx] == '"') 277 idx++; 278 } 279 } 280 281 // found something in either branch... 282 if(idx != -1) { 283 // read till a quote or about 12 chars, whichever comes first... 284 auto end = idx; 285 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 286 end++; 287 288 dataEncoding = cast(string) dataAsBytes[idx .. end]; 289 } 290 // otherwise, we just don't know. 291 } 292 } 293 294 if(dataEncoding is null) { 295 if(strict) 296 throw new MarkupException("I couldn't figure out the encoding of this document."); 297 else 298 // if we really don't know by here, it means we already tried UTF-8, 299 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 300 // tags... let's assume it's Windows-1252, since that's probably the most 301 // common aside from utf that wouldn't be labeled. 302 303 dataEncoding = "Windows 1252"; 304 } 305 306 // and now, go ahead and convert it. 307 308 string data; 309 310 if(!strict) { 311 // if we're in non-strict mode, we need to check 312 // the document for mislabeling too; sometimes 313 // web documents will say they are utf-8, but aren't 314 // actually properly encoded. If it fails to validate, 315 // we'll assume it's actually Windows encoding - the most 316 // likely candidate for mislabeled garbage. 317 dataEncoding = dataEncoding.toLower(); 318 dataEncoding = dataEncoding.replace(" ", ""); 319 dataEncoding = dataEncoding.replace("-", ""); 320 dataEncoding = dataEncoding.replace("_", ""); 321 if(dataEncoding == "utf8") { 322 try { 323 validate(rawdata); 324 } catch(UTFException e) { 325 dataEncoding = "Windows 1252"; 326 } 327 } 328 } 329 330 if(dataEncoding != "UTF-8") { 331 if(strict) 332 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 333 else { 334 try { 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 336 } catch(Exception e) { 337 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 338 } 339 } 340 } else 341 data = rawdata; 342 343 return toUtf8Stream(data); 344 } 345 346 private 347 Utf8Stream toUtf8Stream(in string rawdata) { 348 string data = rawdata; 349 static if(is(Utf8Stream == string)) 350 return data; 351 else 352 return new Utf8Stream(data); 353 } 354 355 /++ 356 List of elements that can be assumed to be self-closed 357 in this document. The default for a Document are a hard-coded 358 list of ones appropriate for HTML. For [XmlDocument], it defaults 359 to empty. You can modify this after construction but before parsing. 360 361 History: 362 Added February 8, 2021 (included in dub release 9.2) 363 +/ 364 string[] selfClosedElements = htmlSelfClosedElements; 365 366 /** 367 Take XMLish data and try to make the DOM tree out of it. 368 369 The goal isn't to be perfect, but to just be good enough to 370 approximate Javascript's behavior. 371 372 If strict, it throws on something that doesn't make sense. 373 (Examples: mismatched tags. It doesn't validate!) 374 If not strict, it tries to recover anyway, and only throws 375 when something is REALLY unworkable. 376 377 If strict is false, it uses a magic list of tags that needn't 378 be closed. If you are writing a document specifically for this, 379 try to avoid such - use self closed tags at least. Easier to parse. 380 381 The dataEncoding argument can be used to pass a specific 382 charset encoding for automatic conversion. If null (which is NOT 383 the default!), it tries to determine from the data itself, 384 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 385 386 If this assumption is wrong, it can throw on non-ascii 387 characters! 388 389 390 Note that it previously assumed the data was encoded as UTF-8, which 391 is why the dataEncoding argument defaults to that. 392 393 So it shouldn't break backward compatibility. 394 395 But, if you want the best behavior on wild data - figuring it out from the document 396 instead of assuming - you'll probably want to change that argument to null. 397 398 This is a template so it lazily imports arsd.characterencodings, which is required 399 to fix up data encodings. 400 401 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 402 dependency. If it is data from the Internet though, a random website, the encoding 403 is often a lie. This function, if dataEncoding == null, can correct for that, or 404 you can try parseGarbage. In those cases, arsd.characterencodings is required to 405 compile. 406 */ 407 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 408 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 409 parseStream(data, caseSensitive, strict); 410 } 411 412 // note: this work best in strict mode, unless data is just a simple string wrapper 413 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 414 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 415 // of my big app. 416 417 assert(data !is null); 418 419 // go through character by character. 420 // if you see a <, consider it a tag. 421 // name goes until the first non tagname character 422 // then see if it self closes or has an attribute 423 424 // if not in a tag, anything not a tag is a big text 425 // node child. It ends as soon as it sees a < 426 427 // Whitespace in text or attributes is preserved, but not between attributes 428 429 // & and friends are converted when I know them, left the same otherwise 430 431 432 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 433 //validate(data); // it *must* be UTF-8 for this to work correctly 434 435 sizediff_t pos = 0; 436 437 clear(); 438 439 loose = !caseSensitive; 440 441 bool sawImproperNesting = false; 442 bool paragraphHackfixRequired = false; 443 444 int getLineNumber(sizediff_t p) { 445 int line = 1; 446 foreach(c; data[0..p]) 447 if(c == '\n') 448 line++; 449 return line; 450 } 451 452 void parseError(string message) { 453 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 454 } 455 456 bool eatWhitespace() { 457 bool ateAny = false; 458 while(pos < data.length && data[pos].isSimpleWhite) { 459 pos++; 460 ateAny = true; 461 } 462 return ateAny; 463 } 464 465 string readTagName() { 466 // remember to include : for namespaces 467 // basically just keep going until >, /, or whitespace 468 auto start = pos; 469 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 470 { 471 pos++; 472 if(pos == data.length) { 473 if(strict) 474 throw new Exception("tag name incomplete when file ended"); 475 else 476 break; 477 } 478 } 479 480 if(!caseSensitive) 481 return toLower(data[start..pos]); 482 else 483 return data[start..pos]; 484 } 485 486 string readAttributeName() { 487 // remember to include : for namespaces 488 // basically just keep going until >, /, or whitespace 489 auto start = pos; 490 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 491 { 492 if(data[pos] == '<') { 493 if(strict) 494 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 495 else 496 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 497 } 498 pos++; 499 if(pos == data.length) { 500 if(strict) 501 throw new Exception("unterminated attribute name"); 502 else 503 break; 504 } 505 } 506 507 if(!caseSensitive) 508 return toLower(data[start..pos]); 509 else 510 return data[start..pos]; 511 } 512 513 string readAttributeValue() { 514 if(pos >= data.length) { 515 if(strict) 516 throw new Exception("no attribute value before end of file"); 517 else 518 return null; 519 } 520 switch(data[pos]) { 521 case '\'': 522 case '"': 523 auto started = pos; 524 char end = data[pos]; 525 pos++; 526 auto start = pos; 527 while(pos < data.length && data[pos] != end) 528 pos++; 529 if(strict && pos == data.length) 530 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 531 string v = htmlEntitiesDecode(data[start..pos], strict); 532 pos++; // skip over the end 533 return v; 534 default: 535 if(strict) 536 parseError("Attributes must be quoted"); 537 // read until whitespace or terminator (/> or >) 538 auto start = pos; 539 while( 540 pos < data.length && 541 data[pos] != '>' && 542 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 543 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 544 !data[pos].isSimpleWhite) 545 pos++; 546 547 string v = htmlEntitiesDecode(data[start..pos], strict); 548 // don't skip the end - we'll need it later 549 return v; 550 } 551 } 552 553 TextNode readTextNode() { 554 auto start = pos; 555 while(pos < data.length && data[pos] != '<') { 556 pos++; 557 } 558 559 return TextNode.fromUndecodedString(this, data[start..pos]); 560 } 561 562 // this is obsolete! 563 RawSource readCDataNode() { 564 auto start = pos; 565 while(pos < data.length && data[pos] != '<') { 566 pos++; 567 } 568 569 return new RawSource(this, data[start..pos]); 570 } 571 572 573 struct Ele { 574 int type; // element or closing tag or nothing 575 /* 576 type == 0 means regular node, self-closed (element is valid) 577 type == 1 means closing tag (payload is the tag name, element may be valid) 578 type == 2 means you should ignore it completely 579 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 580 type == 4 means the document was totally empty 581 */ 582 Element element; // for type == 0 or type == 3 583 string payload; // for type == 1 584 } 585 // recursively read a tag 586 Ele readElement(string[] parentChain = null) { 587 // FIXME: this is the slowest function in this module, by far, even in strict mode. 588 // Loose mode should perform decently, but strict mode is the important one. 589 if(!strict && parentChain is null) 590 parentChain = []; 591 592 static string[] recentAutoClosedTags; 593 594 if(pos >= data.length) 595 { 596 if(strict) { 597 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 598 } else { 599 if(parentChain.length) 600 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 601 else 602 return Ele(4); // signal emptiness upstream 603 } 604 } 605 606 if(data[pos] != '<') { 607 return Ele(0, readTextNode(), null); 608 } 609 610 enforce(data[pos] == '<'); 611 pos++; 612 if(pos == data.length) { 613 if(strict) 614 throw new MarkupException("Found trailing < at end of file"); 615 // if not strict, we'll just skip the switch 616 } else 617 switch(data[pos]) { 618 // I don't care about these, so I just want to skip them 619 case '!': // might be a comment, a doctype, or a special instruction 620 pos++; 621 622 // FIXME: we should store these in the tree too 623 // though I like having it stripped out tbh. 624 625 if(pos == data.length) { 626 if(strict) 627 throw new MarkupException("<! opened at end of file"); 628 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 629 // comment 630 pos += 2; 631 632 // FIXME: technically, a comment is anything 633 // between -- and -- inside a <!> block. 634 // so in <!-- test -- lol> , the " lol" is NOT a comment 635 // and should probably be handled differently in here, but for now 636 // I'll just keep running until --> since that's the common way 637 638 auto commentStart = pos; 639 while(pos+3 < data.length && data[pos..pos+3] != "-->") 640 pos++; 641 642 auto end = commentStart; 643 644 if(pos + 3 >= data.length) { 645 if(strict) 646 throw new MarkupException("unclosed comment"); 647 end = data.length; 648 pos = data.length; 649 } else { 650 end = pos; 651 assert(data[pos] == '-'); 652 pos++; 653 assert(data[pos] == '-'); 654 pos++; 655 assert(data[pos] == '>'); 656 pos++; 657 } 658 659 if(parseSawComment !is null) 660 if(parseSawComment(data[commentStart .. end])) { 661 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 662 } 663 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 664 pos += 7; 665 666 auto cdataStart = pos; 667 668 ptrdiff_t end = -1; 669 typeof(end) cdataEnd; 670 671 if(pos < data.length) { 672 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 673 end = data[pos .. $].indexOf("]]>"); 674 } 675 676 if(end == -1) { 677 if(strict) 678 throw new MarkupException("Unclosed CDATA section"); 679 end = pos; 680 cdataEnd = pos; 681 } else { 682 cdataEnd = pos + end; 683 pos = cdataEnd + 3; 684 } 685 686 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 687 } else { 688 auto start = pos; 689 while(pos < data.length && data[pos] != '>') 690 pos++; 691 692 auto bangEnds = pos; 693 if(pos == data.length) { 694 if(strict) 695 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 696 } else pos++; // skipping the > 697 698 if(parseSawBangInstruction !is null) 699 if(parseSawBangInstruction(data[start .. bangEnds])) { 700 // FIXME: these should be able to modify the parser state, 701 // doing things like adding entities, somehow. 702 703 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 704 } 705 } 706 707 /* 708 if(pos < data.length && data[pos] == '>') 709 pos++; // skip the > 710 else 711 assert(!strict); 712 */ 713 break; 714 case '%': 715 case '?': 716 /* 717 Here's what we want to support: 718 719 <% asp code %> 720 <%= asp code %> 721 <?php php code ?> 722 <?= php code ?> 723 724 The contents don't really matter, just if it opens with 725 one of the above for, it ends on the two char terminator. 726 727 <?something> 728 this is NOT php code 729 because I've seen this in the wild: <?EM-dummyText> 730 731 This could be php with shorttags which would be cut off 732 prematurely because if(a >) - that > counts as the close 733 of the tag, but since dom.d can't tell the difference 734 between that and the <?EM> real world example, it will 735 not try to look for the ?> ending. 736 737 The difference between this and the asp/php stuff is that it 738 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 739 on >. 740 */ 741 742 char end = data[pos]; 743 auto started = pos; 744 bool isAsp = end == '%'; 745 int currentIndex = 0; 746 bool isPhp = false; 747 bool isEqualTag = false; 748 int phpCount = 0; 749 750 more: 751 pos++; // skip the start 752 if(pos == data.length) { 753 if(strict) 754 throw new MarkupException("Unclosed <"~end~" by end of file"); 755 } else { 756 currentIndex++; 757 if(currentIndex == 1 && data[pos] == '=') { 758 if(!isAsp) 759 isPhp = true; 760 isEqualTag = true; 761 goto more; 762 } 763 if(currentIndex == 1 && data[pos] == 'p') 764 phpCount++; 765 if(currentIndex == 2 && data[pos] == 'h') 766 phpCount++; 767 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 768 isPhp = true; 769 770 if(data[pos] == '>') { 771 if((isAsp || isPhp) && data[pos - 1] != end) 772 goto more; 773 // otherwise we're done 774 } else 775 goto more; 776 } 777 778 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 779 auto code = data[started .. pos]; 780 781 782 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 783 if(pos < data.length) 784 pos++; // get past the > 785 786 if(isAsp && parseSawAspCode !is null) { 787 if(parseSawAspCode(code)) { 788 return Ele(3, new AspCode(this, code), null); 789 } 790 } else if(isPhp && parseSawPhpCode !is null) { 791 if(parseSawPhpCode(code)) { 792 return Ele(3, new PhpCode(this, code), null); 793 } 794 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 795 if(parseSawQuestionInstruction(code)) { 796 return Ele(3, new QuestionInstruction(this, code), null); 797 } 798 } 799 break; 800 case '/': // closing an element 801 pos++; // skip the start 802 auto p = pos; 803 while(pos < data.length && data[pos] != '>') 804 pos++; 805 //writefln("</%s>", data[p..pos]); 806 if(pos == data.length && data[pos-1] != '>') { 807 if(strict) 808 throw new MarkupException("File ended before closing tag had a required >"); 809 else 810 data ~= ">"; // just hack it in 811 } 812 pos++; // skip the '>' 813 814 string tname = data[p..pos-1]; 815 if(!caseSensitive) 816 tname = tname.toLower(); 817 818 return Ele(1, null, tname); // closing tag reports itself here 819 case ' ': // assume it isn't a real element... 820 if(strict) { 821 parseError("bad markup - improperly placed <"); 822 assert(0); // parseError always throws 823 } else 824 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 825 default: 826 827 if(!strict) { 828 // what about something that kinda looks like a tag, but isn't? 829 auto nextTag = data[pos .. $].indexOf("<"); 830 auto closeTag = data[pos .. $].indexOf(">"); 831 if(closeTag != -1 && nextTag != -1) 832 if(nextTag < closeTag) { 833 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 834 835 auto equal = data[pos .. $].indexOf("=\""); 836 if(equal != -1 && equal < closeTag) { 837 // this MIGHT be ok, soldier on 838 } else { 839 // definitely no good, this must be a (horribly distorted) text node 840 pos++; // skip the < we're on - don't want text node to end prematurely 841 auto node = readTextNode(); 842 node.contents = "<" ~ node.contents; // put this back 843 return Ele(0, node, null); 844 } 845 } 846 } 847 848 string tagName = readTagName(); 849 string[string] attributes; 850 851 Ele addTag(bool selfClosed) { 852 if(selfClosed) 853 pos++; 854 else { 855 if(!strict) 856 if(tagName.isInArray(selfClosedElements)) 857 // these are de-facto self closed 858 selfClosed = true; 859 } 860 861 import std.algorithm.comparison; 862 863 if(strict) { 864 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 865 } else { 866 // if we got here, it's probably because a slash was in an 867 // unquoted attribute - don't trust the selfClosed value 868 if(!selfClosed) 869 selfClosed = tagName.isInArray(selfClosedElements); 870 871 while(pos < data.length && data[pos] != '>') 872 pos++; 873 874 if(pos >= data.length) { 875 // the tag never closed 876 assert(data.length != 0); 877 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 878 } 879 } 880 881 auto whereThisTagStarted = pos; // for better error messages 882 883 pos++; 884 885 auto e = createElement(tagName); 886 e.attributes = attributes; 887 version(dom_node_indexes) { 888 if(e.dataset.nodeIndex.length == 0) 889 e.dataset.nodeIndex = to!string(&(e.attributes)); 890 } 891 e.selfClosed = selfClosed; 892 e.parseAttributes(); 893 894 895 // HACK to handle script and style as a raw data section as it is in HTML browsers 896 if(tagName == "script" || tagName == "style") { 897 if(!selfClosed) { 898 string closer = "</" ~ tagName ~ ">"; 899 ptrdiff_t ending; 900 if(pos >= data.length) 901 ending = -1; 902 else 903 ending = indexOf(data[pos..$], closer); 904 905 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 906 /* 907 if(loose && ending == -1 && pos < data.length) 908 ending = indexOf(data[pos..$], closer.toUpper()); 909 */ 910 if(ending == -1) { 911 if(strict) 912 throw new Exception("tag " ~ tagName ~ " never closed"); 913 else { 914 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 915 if(pos < data.length) { 916 e = new TextNode(this, data[pos .. $]); 917 pos = data.length; 918 } 919 } 920 } else { 921 ending += pos; 922 e.innerRawSource = data[pos..ending]; 923 pos = ending + closer.length; 924 } 925 } 926 return Ele(0, e, null); 927 } 928 929 bool closed = selfClosed; 930 931 void considerHtmlParagraphHack(Element n) { 932 assert(!strict); 933 if(e.tagName == "p" && e.tagName == n.tagName) { 934 // html lets you write <p> para 1 <p> para 1 935 // but in the dom tree, they should be siblings, not children. 936 paragraphHackfixRequired = true; 937 } 938 } 939 940 //writef("<%s>", tagName); 941 while(!closed) { 942 Ele n; 943 if(strict) 944 n = readElement(); 945 else 946 n = readElement(parentChain ~ tagName); 947 948 if(n.type == 4) return n; // the document is empty 949 950 if(n.type == 3 && n.element !is null) { 951 // special node, append if possible 952 if(e !is null) 953 e.appendChild(n.element); 954 else 955 piecesBeforeRoot ~= n.element; 956 } else if(n.type == 0) { 957 if(!strict) 958 considerHtmlParagraphHack(n.element); 959 e.appendChild(n.element); 960 } else if(n.type == 1) { 961 bool found = false; 962 if(n.payload != tagName) { 963 if(strict) 964 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 965 else { 966 sawImproperNesting = true; 967 // this is so we don't drop several levels of awful markup 968 if(n.element) { 969 if(!strict) 970 considerHtmlParagraphHack(n.element); 971 e.appendChild(n.element); 972 n.element = null; 973 } 974 975 // is the element open somewhere up the chain? 976 foreach(i, parent; parentChain) 977 if(parent == n.payload) { 978 recentAutoClosedTags ~= tagName; 979 // just rotating it so we don't inadvertently break stuff with vile crap 980 if(recentAutoClosedTags.length > 4) 981 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 982 983 n.element = e; 984 return n; 985 } 986 987 // if not, this is a text node; we can't fix it up... 988 989 // If it's already in the tree somewhere, assume it is closed by algorithm 990 // and we shouldn't output it - odds are the user just flipped a couple tags 991 foreach(ele; e.tree) { 992 if(ele.tagName == n.payload) { 993 found = true; 994 break; 995 } 996 } 997 998 foreach(ele; recentAutoClosedTags) { 999 if(ele == n.payload) { 1000 found = true; 1001 break; 1002 } 1003 } 1004 1005 if(!found) // if not found in the tree though, it's probably just text 1006 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1007 } 1008 } else { 1009 if(n.element) { 1010 if(!strict) 1011 considerHtmlParagraphHack(n.element); 1012 e.appendChild(n.element); 1013 } 1014 } 1015 1016 if(n.payload == tagName) // in strict mode, this is always true 1017 closed = true; 1018 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1019 } 1020 //writef("</%s>\n", tagName); 1021 return Ele(0, e, null); 1022 } 1023 1024 // if a tag was opened but not closed by end of file, we can arrive here 1025 if(!strict && pos >= data.length) 1026 return addTag(false); 1027 //else if(strict) assert(0); // should be caught before 1028 1029 switch(data[pos]) { 1030 default: assert(0); 1031 case '/': // self closing tag 1032 return addTag(true); 1033 case '>': 1034 return addTag(false); 1035 case ' ': 1036 case '\t': 1037 case '\n': 1038 case '\r': 1039 // there might be attributes... 1040 moreAttributes: 1041 eatWhitespace(); 1042 1043 // same deal as above the switch.... 1044 if(!strict && pos >= data.length) 1045 return addTag(false); 1046 1047 if(strict && pos >= data.length) 1048 throw new MarkupException("tag open, didn't find > before end of file"); 1049 1050 switch(data[pos]) { 1051 case '/': // self closing tag 1052 return addTag(true); 1053 case '>': // closed tag; open -- we now read the contents 1054 return addTag(false); 1055 default: // it is an attribute 1056 string attrName = readAttributeName(); 1057 string attrValue = attrName; 1058 1059 bool ateAny = eatWhitespace(); 1060 if(strict && ateAny) 1061 throw new MarkupException("inappropriate whitespace after attribute name"); 1062 1063 if(pos >= data.length) { 1064 if(strict) 1065 assert(0, "this should have thrown in readAttributeName"); 1066 else { 1067 data ~= ">"; 1068 goto blankValue; 1069 } 1070 } 1071 if(data[pos] == '=') { 1072 pos++; 1073 1074 ateAny = eatWhitespace(); 1075 if(strict && ateAny) 1076 throw new MarkupException("inappropriate whitespace after attribute equals"); 1077 1078 attrValue = readAttributeValue(); 1079 1080 eatWhitespace(); 1081 } 1082 1083 blankValue: 1084 1085 if(strict && attrName in attributes) 1086 throw new MarkupException("Repeated attribute: " ~ attrName); 1087 1088 if(attrName.strip().length) 1089 attributes[attrName] = attrValue; 1090 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1091 1092 if(!strict && pos < data.length && data[pos] == '<') { 1093 // this is the broken tag that doesn't have a > at the end 1094 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1095 // let's insert one as a hack 1096 goto case '>'; 1097 } 1098 1099 goto moreAttributes; 1100 } 1101 } 1102 } 1103 1104 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1105 //assert(0); 1106 } 1107 1108 eatWhitespace(); 1109 Ele r; 1110 do { 1111 r = readElement(); // there SHOULD only be one element... 1112 1113 if(r.type == 3 && r.element !is null) 1114 piecesBeforeRoot ~= r.element; 1115 1116 if(r.type == 4) 1117 break; // the document is completely empty... 1118 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1119 1120 root = r.element; 1121 1122 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1123 while(r.type != 4) { 1124 r = readElement(); 1125 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1126 if(r.element !is null) 1127 piecesAfterRoot ~= r.element; 1128 } 1129 } 1130 1131 if(root is null) 1132 { 1133 if(strict) 1134 assert(0, "empty document should be impossible in strict mode"); 1135 else 1136 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1137 } 1138 1139 if(paragraphHackfixRequired) { 1140 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1141 1142 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1143 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1144 1145 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1146 // Kind of inefficient because we can't detect when we recurse back out of a node. 1147 Element[Element] insertLocations; 1148 auto iterator = root.tree; 1149 foreach(ele; iterator) { 1150 if(ele.parentNode is null) 1151 continue; 1152 1153 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1154 auto shouldBePreviousSibling = ele.parentNode; 1155 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1156 if (auto p = holder in insertLocations) { 1157 shouldBePreviousSibling = *p; 1158 assert(shouldBePreviousSibling.parentNode is holder); 1159 } 1160 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1161 insertLocations[holder] = ele; 1162 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1163 } 1164 } 1165 } 1166 } 1167 1168 /* end massive parse function */ 1169 1170 /// Gets the <title> element's innerText, if one exists 1171 @property string title() { 1172 bool doesItMatch(Element e) { 1173 return (e.tagName == "title"); 1174 } 1175 1176 auto e = findFirst(&doesItMatch); 1177 if(e) 1178 return e.innerText(); 1179 return ""; 1180 } 1181 1182 /// Sets the title of the page, creating a <title> element if needed. 1183 @property void title(string t) { 1184 bool doesItMatch(Element e) { 1185 return (e.tagName == "title"); 1186 } 1187 1188 auto e = findFirst(&doesItMatch); 1189 1190 if(!e) { 1191 e = createElement("title"); 1192 auto heads = getElementsByTagName("head"); 1193 if(heads.length) 1194 heads[0].appendChild(e); 1195 } 1196 1197 if(e) 1198 e.innerText = t; 1199 } 1200 1201 // FIXME: would it work to alias root this; ???? might be a good idea 1202 /// These functions all forward to the root element. See the documentation in the Element class. 1203 Element getElementById(string id) { 1204 return root.getElementById(id); 1205 } 1206 1207 /// ditto 1208 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1209 if( is(SomeElementType : Element)) 1210 out(ret) { assert(ret !is null); } 1211 body { 1212 return root.requireElementById!(SomeElementType)(id, file, line); 1213 } 1214 1215 /// ditto 1216 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1217 if( is(SomeElementType : Element)) 1218 out(ret) { assert(ret !is null); } 1219 body { 1220 auto e = cast(SomeElementType) querySelector(selector); 1221 if(e is null) 1222 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1223 return e; 1224 } 1225 1226 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1227 if(is(SomeElementType : Element)) 1228 { 1229 auto e = cast(SomeElementType) querySelector(selector); 1230 return MaybeNullElement!SomeElementType(e); 1231 } 1232 1233 /// ditto 1234 @scriptable 1235 Element querySelector(string selector) { 1236 // see comment below on Document.querySelectorAll 1237 auto s = Selector(selector);//, !loose); 1238 foreach(ref comp; s.components) 1239 if(comp.parts.length && comp.parts[0].separation == 0) 1240 comp.parts[0].separation = -1; 1241 foreach(e; s.getMatchingElementsLazy(this.root)) 1242 return e; 1243 return null; 1244 1245 } 1246 1247 /// ditto 1248 @scriptable 1249 Element[] querySelectorAll(string selector) { 1250 // In standards-compliant code, the document is slightly magical 1251 // in that it is a pseudoelement at top level. It should actually 1252 // match the root as one of its children. 1253 // 1254 // In versions of dom.d before Dec 29 2019, this worked because 1255 // querySelectorAll was willing to return itself. With that bug fix 1256 // (search "arbitrary id asduiwh" in this file for associated unittest) 1257 // this would have failed. Hence adding back the root if it matches the 1258 // selector itself. 1259 // 1260 // I'd love to do this better later. 1261 1262 auto s = Selector(selector);//, !loose); 1263 foreach(ref comp; s.components) 1264 if(comp.parts.length && comp.parts[0].separation == 0) 1265 comp.parts[0].separation = -1; 1266 return s.getMatchingElements(this.root); 1267 } 1268 1269 /// ditto 1270 deprecated("use querySelectorAll instead") 1271 Element[] getElementsBySelector(string selector) { 1272 return root.getElementsBySelector(selector); 1273 } 1274 1275 /// ditto 1276 @scriptable 1277 Element[] getElementsByTagName(string tag) { 1278 return root.getElementsByTagName(tag); 1279 } 1280 1281 /// ditto 1282 @scriptable 1283 Element[] getElementsByClassName(string tag) { 1284 return root.getElementsByClassName(tag); 1285 } 1286 1287 /** FIXME: btw, this could just be a lazy range...... */ 1288 Element getFirstElementByTagName(string tag) { 1289 if(loose) 1290 tag = tag.toLower(); 1291 bool doesItMatch(Element e) { 1292 return e.tagName == tag; 1293 } 1294 return findFirst(&doesItMatch); 1295 } 1296 1297 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1298 Element mainBody() { 1299 return getFirstElementByTagName("body"); 1300 } 1301 1302 /// this uses a weird thing... it's [name=] if no colon and 1303 /// [property=] if colon 1304 string getMeta(string name) { 1305 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1306 auto e = querySelector("head meta["~thing~"="~name~"]"); 1307 if(e is null) 1308 return null; 1309 return e.content; 1310 } 1311 1312 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1313 void setMeta(string name, string value) { 1314 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1315 auto e = querySelector("head meta["~thing~"="~name~"]"); 1316 if(e is null) { 1317 e = requireSelector("head").addChild("meta"); 1318 e.setAttribute(thing, name); 1319 } 1320 1321 e.content = value; 1322 } 1323 1324 ///. 1325 Form[] forms() { 1326 return cast(Form[]) getElementsByTagName("form"); 1327 } 1328 1329 ///. 1330 Form createForm() 1331 out(ret) { 1332 assert(ret !is null); 1333 } 1334 body { 1335 return cast(Form) createElement("form"); 1336 } 1337 1338 ///. 1339 Element createElement(string name) { 1340 if(loose) 1341 name = name.toLower(); 1342 1343 auto e = Element.make(name, null, null, selfClosedElements); 1344 e.parentDocument = this; 1345 1346 return e; 1347 1348 // return new Element(this, name, null, selfClosed); 1349 } 1350 1351 ///. 1352 Element createFragment() { 1353 return new DocumentFragment(this); 1354 } 1355 1356 ///. 1357 Element createTextNode(string content) { 1358 return new TextNode(this, content); 1359 } 1360 1361 1362 ///. 1363 Element findFirst(bool delegate(Element) doesItMatch) { 1364 if(root is null) 1365 return null; 1366 Element result; 1367 1368 bool goThroughElement(Element e) { 1369 if(doesItMatch(e)) { 1370 result = e; 1371 return true; 1372 } 1373 1374 foreach(child; e.children) { 1375 if(goThroughElement(child)) 1376 return true; 1377 } 1378 1379 return false; 1380 } 1381 1382 goThroughElement(root); 1383 1384 return result; 1385 } 1386 1387 ///. 1388 void clear() { 1389 root = null; 1390 loose = false; 1391 } 1392 1393 ///. 1394 void setProlog(string d) { 1395 _prolog = d; 1396 prologWasSet = true; 1397 } 1398 1399 ///. 1400 private string _prolog = "<!DOCTYPE html>\n"; 1401 private bool prologWasSet = false; // set to true if the user changed it 1402 1403 @property string prolog() const { 1404 // if the user explicitly changed it, do what they want 1405 // or if we didn't keep/find stuff from the document itself, 1406 // we'll use the builtin one as a default. 1407 if(prologWasSet || piecesBeforeRoot.length == 0) 1408 return _prolog; 1409 1410 string p; 1411 foreach(e; piecesBeforeRoot) 1412 p ~= e.toString() ~ "\n"; 1413 return p; 1414 } 1415 1416 ///. 1417 override string toString() const { 1418 return prolog ~ root.toString(); 1419 } 1420 1421 /++ 1422 Writes it out with whitespace for easier eyeball debugging 1423 1424 Do NOT use for anything other than eyeball debugging, 1425 because whitespace may be significant content in XML. 1426 +/ 1427 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1428 import std..string; 1429 string s = prolog.strip; 1430 1431 /* 1432 if(insertComments) s ~= "<!--"; 1433 s ~= "\n"; 1434 if(insertComments) s ~= "-->"; 1435 */ 1436 1437 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1438 foreach(a; piecesAfterRoot) 1439 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1440 return s; 1441 } 1442 1443 ///. 1444 Element root; 1445 1446 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1447 Element[] piecesBeforeRoot; 1448 1449 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1450 Element[] piecesAfterRoot; 1451 1452 ///. 1453 bool loose; 1454 1455 1456 1457 // what follows are for mutation events that you can observe 1458 void delegate(DomMutationEvent)[] eventObservers; 1459 1460 void dispatchMutationEvent(DomMutationEvent e) { 1461 foreach(o; eventObservers) 1462 o(e); 1463 } 1464 } 1465 1466 /// This represents almost everything in the DOM. 1467 /// Group: core_functionality 1468 class Element { 1469 /// Returns a collection of elements by selector. 1470 /// See: [Document.opIndex] 1471 ElementCollection opIndex(string selector) { 1472 auto e = ElementCollection(this); 1473 return e[selector]; 1474 } 1475 1476 /++ 1477 Returns the child node with the particular index. 1478 1479 Be aware that child nodes include text nodes, including 1480 whitespace-only nodes. 1481 +/ 1482 Element opIndex(size_t index) { 1483 if(index >= children.length) 1484 return null; 1485 return this.children[index]; 1486 } 1487 1488 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1489 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1490 if( 1491 is(SomeElementType : Element) 1492 ) 1493 out(ret) { 1494 assert(ret !is null); 1495 } 1496 body { 1497 auto e = cast(SomeElementType) getElementById(id); 1498 if(e is null) 1499 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1500 return e; 1501 } 1502 1503 /// ditto but with selectors instead of ids 1504 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1505 if( 1506 is(SomeElementType : Element) 1507 ) 1508 out(ret) { 1509 assert(ret !is null); 1510 } 1511 body { 1512 auto e = cast(SomeElementType) querySelector(selector); 1513 if(e is null) 1514 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1515 return e; 1516 } 1517 1518 1519 /++ 1520 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1521 +/ 1522 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1523 if(is(SomeElementType : Element)) 1524 { 1525 auto e = cast(SomeElementType) querySelector(selector); 1526 return MaybeNullElement!SomeElementType(e); 1527 } 1528 1529 1530 1531 /// get all the classes on this element 1532 @property string[] classes() { 1533 return split(className, " "); 1534 } 1535 1536 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1537 @scriptable 1538 Element addClass(string c) { 1539 if(hasClass(c)) 1540 return this; // don't add it twice 1541 1542 string cn = getAttribute("class"); 1543 if(cn.length == 0) { 1544 setAttribute("class", c); 1545 return this; 1546 } else { 1547 setAttribute("class", cn ~ " " ~ c); 1548 } 1549 1550 return this; 1551 } 1552 1553 /// Removes a particular class name. 1554 @scriptable 1555 Element removeClass(string c) { 1556 if(!hasClass(c)) 1557 return this; 1558 string n; 1559 foreach(name; classes) { 1560 if(c == name) 1561 continue; // cut it out 1562 if(n.length) 1563 n ~= " "; 1564 n ~= name; 1565 } 1566 1567 className = n.strip(); 1568 1569 return this; 1570 } 1571 1572 /// Returns whether the given class appears in this element. 1573 bool hasClass(string c) { 1574 string cn = className; 1575 1576 auto idx = cn.indexOf(c); 1577 if(idx == -1) 1578 return false; 1579 1580 foreach(cla; cn.split(" ")) 1581 if(cla == c) 1582 return true; 1583 return false; 1584 1585 /* 1586 int rightSide = idx + c.length; 1587 1588 bool checkRight() { 1589 if(rightSide == cn.length) 1590 return true; // it's the only class 1591 else if(iswhite(cn[rightSide])) 1592 return true; 1593 return false; // this is a substring of something else.. 1594 } 1595 1596 if(idx == 0) { 1597 return checkRight(); 1598 } else { 1599 if(!iswhite(cn[idx - 1])) 1600 return false; // substring 1601 return checkRight(); 1602 } 1603 1604 assert(0); 1605 */ 1606 } 1607 1608 1609 /* ******************************* 1610 DOM Mutation 1611 *********************************/ 1612 /// convenience function to quickly add a tag with some text or 1613 /// other relevant info (for example, it's a src for an <img> element 1614 /// instead of inner text) 1615 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1616 in { 1617 assert(tagName !is null); 1618 } 1619 out(e) { 1620 //assert(e.parentNode is this); 1621 //assert(e.parentDocument is this.parentDocument); 1622 } 1623 body { 1624 auto e = Element.make(tagName, childInfo, childInfo2); 1625 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1626 // return the parent. That will break existing code though. 1627 return appendChild(e); 1628 } 1629 1630 /// Another convenience function. Adds a child directly after the current one, returning 1631 /// the new child. 1632 /// 1633 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1634 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1635 in { 1636 assert(tagName !is null); 1637 assert(parentNode !is null); 1638 } 1639 out(e) { 1640 assert(e.parentNode is this.parentNode); 1641 assert(e.parentDocument is this.parentDocument); 1642 } 1643 body { 1644 auto e = Element.make(tagName, childInfo, childInfo2); 1645 return parentNode.insertAfter(this, e); 1646 } 1647 1648 /// 1649 Element addSibling(Element e) { 1650 return parentNode.insertAfter(this, e); 1651 } 1652 1653 /// 1654 Element addChild(Element e) { 1655 return this.appendChild(e); 1656 } 1657 1658 /// Convenience function to append text intermixed with other children. 1659 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1660 /// or div.addChildren("Hello, ", user.name, "!"); 1661 1662 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1663 void addChildren(T...)(T t) { 1664 foreach(item; t) { 1665 static if(is(item : Element)) 1666 appendChild(item); 1667 else static if (is(isSomeString!(item))) 1668 appendText(to!string(item)); 1669 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1670 } 1671 } 1672 1673 ///. 1674 Element addChild(string tagName, Element firstChild, string info2 = null) 1675 in { 1676 assert(firstChild !is null); 1677 } 1678 out(ret) { 1679 assert(ret !is null); 1680 assert(ret.parentNode is this); 1681 assert(firstChild.parentNode is ret); 1682 1683 assert(ret.parentDocument is this.parentDocument); 1684 //assert(firstChild.parentDocument is this.parentDocument); 1685 } 1686 body { 1687 auto e = Element.make(tagName, "", info2); 1688 e.appendChild(firstChild); 1689 this.appendChild(e); 1690 return e; 1691 } 1692 1693 /// 1694 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1695 in { 1696 } 1697 out(ret) { 1698 assert(ret !is null); 1699 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1700 assert(ret.parentDocument is this.parentDocument); 1701 } 1702 body { 1703 auto e = Element.make(tagName, "", info2); 1704 this.appendChild(e); 1705 e.innerHTML = innerHtml.source; 1706 return e; 1707 } 1708 1709 1710 /// . 1711 void appendChildren(Element[] children) { 1712 foreach(ele; children) 1713 appendChild(ele); 1714 } 1715 1716 ///. 1717 void reparent(Element newParent) 1718 in { 1719 assert(newParent !is null); 1720 assert(parentNode !is null); 1721 } 1722 out { 1723 assert(this.parentNode is newParent); 1724 //assert(isInArray(this, newParent.children)); 1725 } 1726 body { 1727 parentNode.removeChild(this); 1728 newParent.appendChild(this); 1729 } 1730 1731 /** 1732 Strips this tag out of the document, putting its inner html 1733 as children of the parent. 1734 1735 For example, given: `<p>hello <b>there</b></p>`, if you 1736 call `stripOut` on the `b` element, you'll be left with 1737 `<p>hello there<p>`. 1738 1739 The idea here is to make it easy to get rid of garbage 1740 markup you aren't interested in. 1741 */ 1742 void stripOut() 1743 in { 1744 assert(parentNode !is null); 1745 } 1746 out { 1747 assert(parentNode is null); 1748 assert(children.length == 0); 1749 } 1750 body { 1751 foreach(c; children) 1752 c.parentNode = null; // remove the parent 1753 if(children.length) 1754 parentNode.replaceChild(this, this.children); 1755 else 1756 parentNode.removeChild(this); 1757 this.children.length = 0; // we reparented them all above 1758 } 1759 1760 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1761 /// if the element already isn't in a tree, it does nothing. 1762 Element removeFromTree() 1763 in { 1764 1765 } 1766 out(var) { 1767 assert(this.parentNode is null); 1768 assert(var is this); 1769 } 1770 body { 1771 if(this.parentNode is null) 1772 return this; 1773 1774 this.parentNode.removeChild(this); 1775 1776 return this; 1777 } 1778 1779 /++ 1780 Wraps this element inside the given element. 1781 It's like `this.replaceWith(what); what.appendchild(this);` 1782 1783 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1784 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1785 +/ 1786 Element wrapIn(Element what) 1787 in { 1788 assert(what !is null); 1789 } 1790 out(ret) { 1791 assert(this.parentNode is what); 1792 assert(ret is what); 1793 } 1794 body { 1795 this.replaceWith(what); 1796 what.appendChild(this); 1797 1798 return what; 1799 } 1800 1801 /// Replaces this element with something else in the tree. 1802 Element replaceWith(Element e) 1803 in { 1804 assert(this.parentNode !is null); 1805 } 1806 body { 1807 e.removeFromTree(); 1808 this.parentNode.replaceChild(this, e); 1809 return e; 1810 } 1811 1812 /** 1813 Splits the className into an array of each class given 1814 */ 1815 string[] classNames() const { 1816 return className().split(" "); 1817 } 1818 1819 /** 1820 Fetches the first consecutive text nodes concatenated together. 1821 1822 1823 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1824 1825 See_also: [directText], [innerText] 1826 */ 1827 string firstInnerText() const { 1828 string s; 1829 foreach(child; children) { 1830 if(child.nodeType != NodeType.Text) 1831 break; 1832 1833 s ~= child.nodeValue(); 1834 } 1835 return s; 1836 } 1837 1838 1839 /** 1840 Returns the text directly under this element. 1841 1842 1843 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1844 past child tags. So, `<example>some <b>bold</b> text</example>` 1845 will return `some text` because it only gets the text, skipping non-text children. 1846 1847 See_also: [firstInnerText], [innerText] 1848 */ 1849 @property string directText() { 1850 string ret; 1851 foreach(e; children) { 1852 if(e.nodeType == NodeType.Text) 1853 ret ~= e.nodeValue(); 1854 } 1855 1856 return ret; 1857 } 1858 1859 /** 1860 Sets the direct text, without modifying other child nodes. 1861 1862 1863 Unlike [innerText], this does *not* remove existing elements in the element. 1864 1865 It only replaces the first text node it sees. 1866 1867 If there are no text nodes, it calls [appendText]. 1868 1869 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1870 */ 1871 @property void directText(string text) { 1872 foreach(e; children) { 1873 if(e.nodeType == NodeType.Text) { 1874 auto it = cast(TextNode) e; 1875 it.contents = text; 1876 return; 1877 } 1878 } 1879 1880 appendText(text); 1881 } 1882 1883 // do nothing, this is primarily a virtual hook 1884 // for links and forms 1885 void setValue(string field, string value) { } 1886 1887 1888 // this is a thing so i can remove observer support if it gets slow 1889 // I have not implemented all these yet 1890 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1891 if(parentDocument is null) return; 1892 DomMutationEvent me; 1893 me.operation = operation; 1894 me.target = this; 1895 me.relatedString = s1; 1896 me.relatedString2 = s2; 1897 me.related = r; 1898 me.related2 = r2; 1899 parentDocument.dispatchMutationEvent(me); 1900 } 1901 1902 // putting all the members up front 1903 1904 // this ought to be private. don't use it directly. 1905 Element[] children; 1906 1907 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1908 string tagName; 1909 1910 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1911 string[string] attributes; 1912 1913 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1914 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1915 private bool selfClosed; 1916 1917 /// Get the parent Document object that contains this element. 1918 /// It may be null, so remember to check for that. 1919 Document parentDocument; 1920 1921 ///. 1922 inout(Element) parentNode() inout { 1923 auto p = _parentNode; 1924 1925 if(cast(DocumentFragment) p) 1926 return p._parentNode; 1927 1928 return p; 1929 } 1930 1931 //protected 1932 Element parentNode(Element e) { 1933 return _parentNode = e; 1934 } 1935 1936 private Element _parentNode; 1937 1938 // the next few methods are for implementing interactive kind of things 1939 private CssStyle _computedStyle; 1940 1941 // these are here for event handlers. Don't forget that this library never fires events. 1942 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1943 EventHandler[][string] bubblingEventHandlers; 1944 EventHandler[][string] capturingEventHandlers; 1945 EventHandler[string] defaultEventHandlers; 1946 1947 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1948 if(event.length > 2 && event[0..2] == "on") 1949 event = event[2 .. $]; 1950 1951 if(useCapture) 1952 capturingEventHandlers[event] ~= handler; 1953 else 1954 bubblingEventHandlers[event] ~= handler; 1955 } 1956 1957 1958 // and now methods 1959 1960 /++ 1961 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 1962 1963 History: 1964 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 1965 immutable global list for HTML. It still defaults to the same list, but you can change it now via 1966 the parameter. 1967 +/ 1968 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 1969 bool selfClosed = tagName.isInArray(selfClosedElements); 1970 1971 Element e; 1972 // want to create the right kind of object for the given tag... 1973 switch(tagName) { 1974 case "#text": 1975 e = new TextNode(null, childInfo); 1976 return e; 1977 // break; 1978 case "table": 1979 e = new Table(null); 1980 break; 1981 case "a": 1982 e = new Link(null); 1983 break; 1984 case "form": 1985 e = new Form(null); 1986 break; 1987 case "tr": 1988 e = new TableRow(null); 1989 break; 1990 case "td", "th": 1991 e = new TableCell(null, tagName); 1992 break; 1993 default: 1994 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1995 } 1996 1997 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1998 e.tagName = tagName; 1999 e.selfClosed = selfClosed; 2000 2001 if(childInfo !is null) 2002 switch(tagName) { 2003 /* html5 convenience tags */ 2004 case "audio": 2005 if(childInfo.length) 2006 e.addChild("source", childInfo); 2007 if(childInfo2 !is null) 2008 e.appendText(childInfo2); 2009 break; 2010 case "source": 2011 e.src = childInfo; 2012 if(childInfo2 !is null) 2013 e.type = childInfo2; 2014 break; 2015 /* regular html 4 stuff */ 2016 case "img": 2017 e.src = childInfo; 2018 if(childInfo2 !is null) 2019 e.alt = childInfo2; 2020 break; 2021 case "link": 2022 e.href = childInfo; 2023 if(childInfo2 !is null) 2024 e.rel = childInfo2; 2025 break; 2026 case "option": 2027 e.innerText = childInfo; 2028 if(childInfo2 !is null) 2029 e.value = childInfo2; 2030 break; 2031 case "input": 2032 e.type = "hidden"; 2033 e.name = childInfo; 2034 if(childInfo2 !is null) 2035 e.value = childInfo2; 2036 break; 2037 case "button": 2038 e.innerText = childInfo; 2039 if(childInfo2 !is null) 2040 e.type = childInfo2; 2041 break; 2042 case "a": 2043 e.innerText = childInfo; 2044 if(childInfo2 !is null) 2045 e.href = childInfo2; 2046 break; 2047 case "script": 2048 case "style": 2049 e.innerRawSource = childInfo; 2050 break; 2051 case "meta": 2052 e.name = childInfo; 2053 if(childInfo2 !is null) 2054 e.content = childInfo2; 2055 break; 2056 /* generically, assume we were passed text and perhaps class */ 2057 default: 2058 e.innerText = childInfo; 2059 if(childInfo2.length) 2060 e.className = childInfo2; 2061 } 2062 2063 return e; 2064 } 2065 2066 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2067 // FIXME: childInfo2 is ignored when info1 is null 2068 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2069 m.innerHTML = innerHtml.source; 2070 return m; 2071 } 2072 2073 static Element make(string tagName, Element child, string childInfo2 = null) { 2074 auto m = Element.make(tagName, cast(string) null, childInfo2); 2075 m.appendChild(child); 2076 return m; 2077 } 2078 2079 2080 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2081 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2082 parentDocument = _parentDocument; 2083 tagName = _tagName; 2084 if(_attributes !is null) 2085 attributes = _attributes; 2086 selfClosed = _selfClosed; 2087 2088 version(dom_node_indexes) 2089 this.dataset.nodeIndex = to!string(&(this.attributes)); 2090 2091 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2092 } 2093 2094 /++ 2095 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2096 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2097 2098 History: 2099 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2100 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2101 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2102 something here if you like. 2103 +/ 2104 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2105 tagName = _tagName; 2106 if(_attributes !is null) 2107 attributes = _attributes; 2108 selfClosed = tagName.isInArray(selfClosedElements); 2109 2110 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2111 //children.length = 8; 2112 //children.length = 0; 2113 2114 version(dom_node_indexes) 2115 this.dataset.nodeIndex = to!string(&(this.attributes)); 2116 } 2117 2118 private this(Document _parentDocument) { 2119 parentDocument = _parentDocument; 2120 2121 version(dom_node_indexes) 2122 this.dataset.nodeIndex = to!string(&(this.attributes)); 2123 } 2124 2125 2126 /* ******************************* 2127 Navigating the DOM 2128 *********************************/ 2129 2130 /// Returns the first child of this element. If it has no children, returns null. 2131 /// Remember, text nodes are children too. 2132 @property Element firstChild() { 2133 return children.length ? children[0] : null; 2134 } 2135 2136 /// 2137 @property Element lastChild() { 2138 return children.length ? children[$ - 1] : null; 2139 } 2140 2141 /// UNTESTED 2142 /// the next element you would encounter if you were reading it in the source 2143 Element nextInSource() { 2144 auto n = firstChild; 2145 if(n is null) 2146 n = nextSibling(); 2147 if(n is null) { 2148 auto p = this.parentNode; 2149 while(p !is null && n is null) { 2150 n = p.nextSibling; 2151 } 2152 } 2153 2154 return n; 2155 } 2156 2157 /// UNTESTED 2158 /// ditto 2159 Element previousInSource() { 2160 auto p = previousSibling; 2161 if(p is null) { 2162 auto par = parentNode; 2163 if(par) 2164 p = par.lastChild; 2165 if(p is null) 2166 p = par; 2167 } 2168 return p; 2169 } 2170 2171 ///. 2172 @property Element previousElementSibling() { 2173 return previousSibling("*"); 2174 } 2175 2176 ///. 2177 @property Element previousSibling(string tagName = null) { 2178 if(this.parentNode is null) 2179 return null; 2180 Element ps = null; 2181 foreach(e; this.parentNode.childNodes) { 2182 if(e is this) 2183 break; 2184 if(tagName == "*" && e.nodeType != NodeType.Text) { 2185 ps = e; 2186 } else if(tagName is null || e.tagName == tagName) 2187 ps = e; 2188 } 2189 2190 return ps; 2191 } 2192 2193 ///. 2194 @property Element nextElementSibling() { 2195 return nextSibling("*"); 2196 } 2197 2198 ///. 2199 @property Element nextSibling(string tagName = null) { 2200 if(this.parentNode is null) 2201 return null; 2202 Element ns = null; 2203 bool mightBe = false; 2204 foreach(e; this.parentNode.childNodes) { 2205 if(e is this) { 2206 mightBe = true; 2207 continue; 2208 } 2209 if(mightBe) { 2210 if(tagName == "*" && e.nodeType != NodeType.Text) { 2211 ns = e; 2212 break; 2213 } 2214 if(tagName is null || e.tagName == tagName) { 2215 ns = e; 2216 break; 2217 } 2218 } 2219 } 2220 2221 return ns; 2222 } 2223 2224 2225 /// Gets the nearest node, going up the chain, with the given tagName 2226 /// May return null or throw. 2227 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2228 if(tagName is null) { 2229 static if(is(T == Form)) 2230 tagName = "form"; 2231 else static if(is(T == Table)) 2232 tagName = "table"; 2233 else static if(is(T == Link)) 2234 tagName == "a"; 2235 } 2236 2237 auto par = this.parentNode; 2238 while(par !is null) { 2239 if(tagName is null || par.tagName == tagName) 2240 break; 2241 par = par.parentNode; 2242 } 2243 2244 static if(!is(T == Element)) { 2245 auto t = cast(T) par; 2246 if(t is null) 2247 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2248 } else 2249 auto t = par; 2250 2251 return t; 2252 } 2253 2254 ///. 2255 Element getElementById(string id) { 2256 // FIXME: I use this function a lot, and it's kinda slow 2257 // not terribly slow, but not great. 2258 foreach(e; tree) 2259 if(e.id == id) 2260 return e; 2261 return null; 2262 } 2263 2264 /++ 2265 Returns a child element that matches the given `selector`. 2266 2267 Note: you can give multiple selectors, separated by commas. 2268 It will return the first match it finds. 2269 +/ 2270 @scriptable 2271 Element querySelector(string selector) { 2272 Selector s = Selector(selector); 2273 foreach(ele; tree) 2274 if(s.matchesElement(ele)) 2275 return ele; 2276 return null; 2277 } 2278 2279 /// a more standards-compliant alias for getElementsBySelector 2280 @scriptable 2281 Element[] querySelectorAll(string selector) { 2282 return getElementsBySelector(selector); 2283 } 2284 2285 /// If the element matches the given selector. Previously known as `matchesSelector`. 2286 @scriptable 2287 bool matches(string selector) { 2288 /+ 2289 bool caseSensitiveTags = true; 2290 if(parentDocument && parentDocument.loose) 2291 caseSensitiveTags = false; 2292 +/ 2293 2294 Selector s = Selector(selector); 2295 return s.matchesElement(this); 2296 } 2297 2298 /// Returns itself or the closest parent that matches the given selector, or null if none found 2299 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2300 @scriptable 2301 Element closest(string selector) { 2302 Element e = this; 2303 while(e !is null) { 2304 if(e.matches(selector)) 2305 return e; 2306 e = e.parentNode; 2307 } 2308 return null; 2309 } 2310 2311 /** 2312 Returns elements that match the given CSS selector 2313 2314 * -- all, default if nothing else is there 2315 2316 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2317 2318 It is all additive 2319 2320 OP 2321 2322 space = descendant 2323 > = direct descendant 2324 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2325 2326 [foo] Foo is present as an attribute 2327 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2328 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2329 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2330 2331 [item$=sdas] ends with 2332 [item^-sdsad] begins with 2333 2334 Quotes are optional here. 2335 2336 Pseudos: 2337 :first-child 2338 :last-child 2339 :link (same as a[href] for our purposes here) 2340 2341 2342 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2343 2344 2345 2346 This ONLY cares about elements. text, etc, are ignored 2347 2348 2349 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2350 */ 2351 Element[] getElementsBySelector(string selector) { 2352 // FIXME: this function could probably use some performance attention 2353 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2354 2355 2356 bool caseSensitiveTags = true; 2357 if(parentDocument && parentDocument.loose) 2358 caseSensitiveTags = false; 2359 2360 Element[] ret; 2361 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2362 ret ~= sel.getElements(this); 2363 return ret; 2364 } 2365 2366 /// . 2367 Element[] getElementsByClassName(string cn) { 2368 // is this correct? 2369 return getElementsBySelector("." ~ cn); 2370 } 2371 2372 ///. 2373 Element[] getElementsByTagName(string tag) { 2374 if(parentDocument && parentDocument.loose) 2375 tag = tag.toLower(); 2376 Element[] ret; 2377 foreach(e; tree) 2378 if(e.tagName == tag) 2379 ret ~= e; 2380 return ret; 2381 } 2382 2383 2384 /* ******************************* 2385 Attributes 2386 *********************************/ 2387 2388 /** 2389 Gets the given attribute value, or null if the 2390 attribute is not set. 2391 2392 Note that the returned string is decoded, so it no longer contains any xml entities. 2393 */ 2394 @scriptable 2395 string getAttribute(string name) const { 2396 if(parentDocument && parentDocument.loose) 2397 name = name.toLower(); 2398 auto e = name in attributes; 2399 if(e) 2400 return *e; 2401 else 2402 return null; 2403 } 2404 2405 /** 2406 Sets an attribute. Returns this for easy chaining 2407 */ 2408 @scriptable 2409 Element setAttribute(string name, string value) { 2410 if(parentDocument && parentDocument.loose) 2411 name = name.toLower(); 2412 2413 // I never use this shit legitimately and neither should you 2414 auto it = name.toLower(); 2415 if(it == "href" || it == "src") { 2416 auto v = value.strip().toLower(); 2417 if(v.startsWith("vbscript:")) 2418 value = value[9..$]; 2419 if(v.startsWith("javascript:")) 2420 value = value[11..$]; 2421 } 2422 2423 attributes[name] = value; 2424 2425 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2426 2427 return this; 2428 } 2429 2430 /** 2431 Returns if the attribute exists. 2432 */ 2433 @scriptable 2434 bool hasAttribute(string name) { 2435 if(parentDocument && parentDocument.loose) 2436 name = name.toLower(); 2437 2438 if(name in attributes) 2439 return true; 2440 else 2441 return false; 2442 } 2443 2444 /** 2445 Removes the given attribute from the element. 2446 */ 2447 @scriptable 2448 Element removeAttribute(string name) 2449 out(ret) { 2450 assert(ret is this); 2451 } 2452 body { 2453 if(parentDocument && parentDocument.loose) 2454 name = name.toLower(); 2455 if(name in attributes) 2456 attributes.remove(name); 2457 2458 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2459 return this; 2460 } 2461 2462 /** 2463 Gets the class attribute's contents. Returns 2464 an empty string if it has no class. 2465 */ 2466 @property string className() const { 2467 auto c = getAttribute("class"); 2468 if(c is null) 2469 return ""; 2470 return c; 2471 } 2472 2473 ///. 2474 @property Element className(string c) { 2475 setAttribute("class", c); 2476 return this; 2477 } 2478 2479 /** 2480 Provides easy access to common HTML attributes, object style. 2481 2482 --- 2483 auto element = Element.make("a"); 2484 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2485 string where = a.href; // same as a.getAttribute("href"); 2486 --- 2487 2488 */ 2489 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2490 if(v !is null) 2491 setAttribute(name, v); 2492 return getAttribute(name); 2493 } 2494 2495 /** 2496 Old access to attributes. Use [attrs] instead. 2497 2498 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2499 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2500 2501 Instead, use element.attrs.attribute, element.attrs["attribute"], 2502 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2503 */ 2504 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2505 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2506 } 2507 2508 /* 2509 // this would be nice for convenience, but it broke the getter above. 2510 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2511 if(boolean) 2512 setAttribute(name, name); 2513 else 2514 removeAttribute(name); 2515 } 2516 */ 2517 2518 /** 2519 Returns the element's children. 2520 */ 2521 @property const(Element[]) childNodes() const { 2522 return children; 2523 } 2524 2525 /// Mutable version of the same 2526 @property Element[] childNodes() { // FIXME: the above should be inout 2527 return children; 2528 } 2529 2530 /++ 2531 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2532 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2533 +/ 2534 @property DataSet dataset() { 2535 return DataSet(this); 2536 } 2537 2538 /++ 2539 Gives dot/opIndex access to attributes 2540 --- 2541 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2542 --- 2543 +/ 2544 @property AttributeSet attrs() { 2545 return AttributeSet(this); 2546 } 2547 2548 /++ 2549 Provides both string and object style (like in Javascript) access to the style attribute. 2550 2551 --- 2552 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2553 --- 2554 +/ 2555 @property ElementStyle style() { 2556 return ElementStyle(this); 2557 } 2558 2559 /++ 2560 This sets the style attribute with a string. 2561 +/ 2562 @property ElementStyle style(string s) { 2563 this.setAttribute("style", s); 2564 return this.style; 2565 } 2566 2567 private void parseAttributes(string[] whichOnes = null) { 2568 /+ 2569 if(whichOnes is null) 2570 whichOnes = attributes.keys; 2571 foreach(attr; whichOnes) { 2572 switch(attr) { 2573 case "id": 2574 2575 break; 2576 case "class": 2577 2578 break; 2579 case "style": 2580 2581 break; 2582 default: 2583 // we don't care about it 2584 } 2585 } 2586 +/ 2587 } 2588 2589 2590 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2591 /// Don't use this. 2592 @property CssStyle computedStyle() { 2593 if(_computedStyle is null) { 2594 auto style = this.getAttribute("style"); 2595 /* we'll treat shitty old html attributes as css here */ 2596 if(this.hasAttribute("width")) 2597 style ~= "; width: " ~ this.attrs.width; 2598 if(this.hasAttribute("height")) 2599 style ~= "; height: " ~ this.attrs.height; 2600 if(this.hasAttribute("bgcolor")) 2601 style ~= "; background-color: " ~ this.attrs.bgcolor; 2602 if(this.tagName == "body" && this.hasAttribute("text")) 2603 style ~= "; color: " ~ this.attrs.text; 2604 if(this.hasAttribute("color")) 2605 style ~= "; color: " ~ this.attrs.color; 2606 /* done */ 2607 2608 2609 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2610 } 2611 return _computedStyle; 2612 } 2613 2614 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2615 version(browser) { 2616 void* expansionHook; ///ditto 2617 int offsetWidth; ///ditto 2618 int offsetHeight; ///ditto 2619 int offsetLeft; ///ditto 2620 int offsetTop; ///ditto 2621 Element offsetParent; ///ditto 2622 bool hasLayout; ///ditto 2623 int zIndex; ///ditto 2624 2625 ///ditto 2626 int absoluteLeft() { 2627 int a = offsetLeft; 2628 auto p = offsetParent; 2629 while(p) { 2630 a += p.offsetLeft; 2631 p = p.offsetParent; 2632 } 2633 2634 return a; 2635 } 2636 2637 ///ditto 2638 int absoluteTop() { 2639 int a = offsetTop; 2640 auto p = offsetParent; 2641 while(p) { 2642 a += p.offsetTop; 2643 p = p.offsetParent; 2644 } 2645 2646 return a; 2647 } 2648 } 2649 2650 // Back to the regular dom functions 2651 2652 public: 2653 2654 2655 /* ******************************* 2656 DOM Mutation 2657 *********************************/ 2658 2659 /// Removes all inner content from the tag; all child text and elements are gone. 2660 void removeAllChildren() 2661 out { 2662 assert(this.children.length == 0); 2663 } 2664 body { 2665 children = null; 2666 } 2667 2668 /// History: added June 13, 2020 2669 Element appendSibling(Element e) { 2670 parentNode.insertAfter(this, e); 2671 return e; 2672 } 2673 2674 /// History: added June 13, 2020 2675 Element prependSibling(Element e) { 2676 parentNode.insertBefore(this, e); 2677 return e; 2678 } 2679 2680 2681 /++ 2682 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2683 2684 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2685 2686 History: 2687 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2688 +/ 2689 Element appendChild(Element e) 2690 in { 2691 assert(e !is null); 2692 } 2693 out (ret) { 2694 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2695 assert(e.parentDocument is this.parentDocument); 2696 assert(e is ret); 2697 } 2698 body { 2699 if(e.parentNode !is null) 2700 e.parentNode.removeChild(e); 2701 2702 selfClosed = false; 2703 e.parentNode = this; 2704 e.parentDocument = this.parentDocument; 2705 if(auto frag = cast(DocumentFragment) e) 2706 children ~= frag.children; 2707 else 2708 children ~= e; 2709 2710 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2711 2712 return e; 2713 } 2714 2715 /// Inserts the second element to this node, right before the first param 2716 Element insertBefore(in Element where, Element what) 2717 in { 2718 assert(where !is null); 2719 assert(where.parentNode is this); 2720 assert(what !is null); 2721 assert(what.parentNode is null); 2722 } 2723 out (ret) { 2724 assert(where.parentNode is this); 2725 assert(what.parentNode is this); 2726 2727 assert(what.parentDocument is this.parentDocument); 2728 assert(ret is what); 2729 } 2730 body { 2731 foreach(i, e; children) { 2732 if(e is where) { 2733 if(auto frag = cast(DocumentFragment) what) 2734 children = children[0..i] ~ frag.children ~ children[i..$]; 2735 else 2736 children = children[0..i] ~ what ~ children[i..$]; 2737 what.parentDocument = this.parentDocument; 2738 what.parentNode = this; 2739 return what; 2740 } 2741 } 2742 2743 return what; 2744 2745 assert(0); 2746 } 2747 2748 /++ 2749 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2750 +/ 2751 Element insertAfter(in Element where, Element what) 2752 in { 2753 assert(where !is null); 2754 assert(where.parentNode is this); 2755 assert(what !is null); 2756 assert(what.parentNode is null); 2757 } 2758 out (ret) { 2759 assert(where.parentNode is this); 2760 assert(what.parentNode is this); 2761 assert(what.parentDocument is this.parentDocument); 2762 assert(ret is what); 2763 } 2764 body { 2765 foreach(i, e; children) { 2766 if(e is where) { 2767 if(auto frag = cast(DocumentFragment) what) 2768 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2769 else 2770 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2771 what.parentNode = this; 2772 what.parentDocument = this.parentDocument; 2773 return what; 2774 } 2775 } 2776 2777 return what; 2778 2779 assert(0); 2780 } 2781 2782 /// swaps one child for a new thing. Returns the old child which is now parentless. 2783 Element swapNode(Element child, Element replacement) 2784 in { 2785 assert(child !is null); 2786 assert(replacement !is null); 2787 assert(child.parentNode is this); 2788 } 2789 out(ret) { 2790 assert(ret is child); 2791 assert(ret.parentNode is null); 2792 assert(replacement.parentNode is this); 2793 assert(replacement.parentDocument is this.parentDocument); 2794 } 2795 body { 2796 foreach(ref c; this.children) 2797 if(c is child) { 2798 c.parentNode = null; 2799 c = replacement; 2800 c.parentNode = this; 2801 c.parentDocument = this.parentDocument; 2802 return child; 2803 } 2804 assert(0); 2805 } 2806 2807 2808 /++ 2809 Appends the given to the node. 2810 2811 2812 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2813 yields `<example>text <b>bold</b> hi</example>`. 2814 2815 See_Also: 2816 [firstInnerText], [directText], [innerText], [appendChild] 2817 +/ 2818 @scriptable 2819 Element appendText(string text) { 2820 Element e = new TextNode(parentDocument, text); 2821 appendChild(e); 2822 return this; 2823 } 2824 2825 /++ 2826 Returns child elements which are of a tag type (excludes text, comments, etc.). 2827 2828 2829 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2830 2831 Params: 2832 tagName = filter results to only the child elements with the given tag name. 2833 +/ 2834 @property Element[] childElements(string tagName = null) { 2835 Element[] ret; 2836 foreach(c; children) 2837 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2838 ret ~= c; 2839 return ret; 2840 } 2841 2842 /++ 2843 Appends the given html to the element, returning the elements appended 2844 2845 2846 This is similar to `element.innerHTML += "html string";` in Javascript. 2847 +/ 2848 @scriptable 2849 Element[] appendHtml(string html) { 2850 Document d = new Document("<root>" ~ html ~ "</root>"); 2851 return stealChildren(d.root); 2852 } 2853 2854 2855 ///. 2856 void insertChildAfter(Element child, Element where) 2857 in { 2858 assert(child !is null); 2859 assert(where !is null); 2860 assert(where.parentNode is this); 2861 assert(!selfClosed); 2862 //assert(isInArray(where, children)); 2863 } 2864 out { 2865 assert(child.parentNode is this); 2866 assert(where.parentNode is this); 2867 //assert(isInArray(where, children)); 2868 //assert(isInArray(child, children)); 2869 } 2870 body { 2871 foreach(ref i, c; children) { 2872 if(c is where) { 2873 i++; 2874 if(auto frag = cast(DocumentFragment) child) 2875 children = children[0..i] ~ child.children ~ children[i..$]; 2876 else 2877 children = children[0..i] ~ child ~ children[i..$]; 2878 child.parentNode = this; 2879 child.parentDocument = this.parentDocument; 2880 break; 2881 } 2882 } 2883 } 2884 2885 /++ 2886 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2887 2888 Params: 2889 e = the element whose children you want to steal 2890 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2891 +/ 2892 Element[] stealChildren(Element e, Element position = null) 2893 in { 2894 assert(!selfClosed); 2895 assert(e !is null); 2896 //if(position !is null) 2897 //assert(isInArray(position, children)); 2898 } 2899 out (ret) { 2900 assert(e.children.length == 0); 2901 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2902 version(none) 2903 debug foreach(child; ret) { 2904 assert(child.parentNode is this); 2905 assert(child.parentDocument is this.parentDocument); 2906 } 2907 } 2908 body { 2909 foreach(c; e.children) { 2910 c.parentNode = this; 2911 c.parentDocument = this.parentDocument; 2912 } 2913 if(position is null) 2914 children ~= e.children; 2915 else { 2916 foreach(i, child; children) { 2917 if(child is position) { 2918 children = children[0..i] ~ 2919 e.children ~ 2920 children[i..$]; 2921 break; 2922 } 2923 } 2924 } 2925 2926 auto ret = e.children[]; 2927 e.children.length = 0; 2928 2929 return ret; 2930 } 2931 2932 /// Puts the current element first in our children list. The given element must not have a parent already. 2933 Element prependChild(Element e) 2934 in { 2935 assert(e.parentNode is null); 2936 assert(!selfClosed); 2937 } 2938 out { 2939 assert(e.parentNode is this); 2940 assert(e.parentDocument is this.parentDocument); 2941 assert(children[0] is e); 2942 } 2943 body { 2944 e.parentNode = this; 2945 e.parentDocument = this.parentDocument; 2946 if(auto frag = cast(DocumentFragment) e) 2947 children = e.children ~ children; 2948 else 2949 children = e ~ children; 2950 return e; 2951 } 2952 2953 2954 /** 2955 Returns a string containing all child elements, formatted such that it could be pasted into 2956 an XML file. 2957 */ 2958 @property string innerHTML(Appender!string where = appender!string()) const { 2959 if(children is null) 2960 return ""; 2961 2962 auto start = where.data.length; 2963 2964 foreach(child; children) { 2965 assert(child !is null); 2966 2967 child.writeToAppender(where); 2968 } 2969 2970 return where.data[start .. $]; 2971 } 2972 2973 /** 2974 Takes some html and replaces the element's children with the tree made from the string. 2975 */ 2976 @property Element innerHTML(string html, bool strict = false) { 2977 if(html.length) 2978 selfClosed = false; 2979 2980 if(html.length == 0) { 2981 // I often say innerHTML = ""; as a shortcut to clear it out, 2982 // so let's optimize that slightly. 2983 removeAllChildren(); 2984 return this; 2985 } 2986 2987 auto doc = new Document(); 2988 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2989 2990 children = doc.root.children; 2991 foreach(c; children) { 2992 c.parentNode = this; 2993 c.parentDocument = this.parentDocument; 2994 } 2995 2996 reparentTreeDocuments(); 2997 2998 doc.root.children = null; 2999 3000 return this; 3001 } 3002 3003 /// ditto 3004 @property Element innerHTML(Html html) { 3005 return this.innerHTML = html.source; 3006 } 3007 3008 private void reparentTreeDocuments() { 3009 foreach(c; this.tree) 3010 c.parentDocument = this.parentDocument; 3011 } 3012 3013 /** 3014 Replaces this node with the given html string, which is parsed 3015 3016 Note: this invalidates the this reference, since it is removed 3017 from the tree. 3018 3019 Returns the new children that replace this. 3020 */ 3021 @property Element[] outerHTML(string html) { 3022 auto doc = new Document(); 3023 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3024 3025 children = doc.root.children; 3026 foreach(c; children) { 3027 c.parentNode = this; 3028 c.parentDocument = this.parentDocument; 3029 } 3030 3031 3032 reparentTreeDocuments(); 3033 3034 3035 stripOut(); 3036 3037 return doc.root.children; 3038 } 3039 3040 /++ 3041 Returns all the html for this element, including the tag itself. 3042 3043 This is equivalent to calling toString(). 3044 +/ 3045 @property string outerHTML() { 3046 return this.toString(); 3047 } 3048 3049 /// This sets the inner content of the element *without* trying to parse it. 3050 /// You can inject any code in there; this serves as an escape hatch from the dom. 3051 /// 3052 /// The only times you might actually need it are for < style > and < script > tags in html. 3053 /// Other than that, innerHTML and/or innerText should do the job. 3054 @property void innerRawSource(string rawSource) { 3055 children.length = 0; 3056 auto rs = new RawSource(parentDocument, rawSource); 3057 rs.parentNode = this; 3058 3059 children ~= rs; 3060 } 3061 3062 ///. 3063 Element replaceChild(Element find, Element replace) 3064 in { 3065 assert(find !is null); 3066 assert(replace !is null); 3067 assert(replace.parentNode is null); 3068 } 3069 out(ret) { 3070 assert(ret is replace); 3071 assert(replace.parentNode is this); 3072 assert(replace.parentDocument is this.parentDocument); 3073 assert(find.parentNode is null); 3074 } 3075 body { 3076 // FIXME 3077 //if(auto frag = cast(DocumentFragment) replace) 3078 //return this.replaceChild(frag, replace.children); 3079 for(int i = 0; i < children.length; i++) { 3080 if(children[i] is find) { 3081 replace.parentNode = this; 3082 children[i].parentNode = null; 3083 children[i] = replace; 3084 replace.parentDocument = this.parentDocument; 3085 return replace; 3086 } 3087 } 3088 3089 throw new Exception("no such child"); 3090 } 3091 3092 /** 3093 Replaces the given element with a whole group. 3094 */ 3095 void replaceChild(Element find, Element[] replace) 3096 in { 3097 assert(find !is null); 3098 assert(replace !is null); 3099 assert(find.parentNode is this); 3100 debug foreach(r; replace) 3101 assert(r.parentNode is null); 3102 } 3103 out { 3104 assert(find.parentNode is null); 3105 assert(children.length >= replace.length); 3106 debug foreach(child; children) 3107 assert(child !is find); 3108 debug foreach(r; replace) 3109 assert(r.parentNode is this); 3110 } 3111 body { 3112 if(replace.length == 0) { 3113 removeChild(find); 3114 return; 3115 } 3116 assert(replace.length); 3117 for(int i = 0; i < children.length; i++) { 3118 if(children[i] is find) { 3119 children[i].parentNode = null; // this element should now be dead 3120 children[i] = replace[0]; 3121 foreach(e; replace) { 3122 e.parentNode = this; 3123 e.parentDocument = this.parentDocument; 3124 } 3125 3126 children = .insertAfter(children, i, replace[1..$]); 3127 3128 return; 3129 } 3130 } 3131 3132 throw new Exception("no such child"); 3133 } 3134 3135 3136 /** 3137 Removes the given child from this list. 3138 3139 Returns the removed element. 3140 */ 3141 Element removeChild(Element c) 3142 in { 3143 assert(c !is null); 3144 assert(c.parentNode is this); 3145 } 3146 out { 3147 debug foreach(child; children) 3148 assert(child !is c); 3149 assert(c.parentNode is null); 3150 } 3151 body { 3152 foreach(i, e; children) { 3153 if(e is c) { 3154 children = children[0..i] ~ children [i+1..$]; 3155 c.parentNode = null; 3156 return c; 3157 } 3158 } 3159 3160 throw new Exception("no such child"); 3161 } 3162 3163 /// This removes all the children from this element, returning the old list. 3164 Element[] removeChildren() 3165 out (ret) { 3166 assert(children.length == 0); 3167 debug foreach(r; ret) 3168 assert(r.parentNode is null); 3169 } 3170 body { 3171 Element[] oldChildren = children.dup; 3172 foreach(c; oldChildren) 3173 c.parentNode = null; 3174 3175 children.length = 0; 3176 3177 return oldChildren; 3178 } 3179 3180 /** 3181 Fetch the inside text, with all tags stripped out. 3182 3183 <p>cool <b>api</b> & code dude<p> 3184 innerText of that is "cool api & code dude". 3185 3186 This does not match what real innerText does! 3187 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3188 3189 It is more like textContent. 3190 */ 3191 @scriptable 3192 @property string innerText() const { 3193 string s; 3194 foreach(child; children) { 3195 if(child.nodeType != NodeType.Text) 3196 s ~= child.innerText; 3197 else 3198 s ~= child.nodeValue(); 3199 } 3200 return s; 3201 } 3202 3203 /// 3204 alias textContent = innerText; 3205 3206 /** 3207 Sets the inside text, replacing all children. You don't 3208 have to worry about entity encoding. 3209 */ 3210 @scriptable 3211 @property void innerText(string text) { 3212 selfClosed = false; 3213 Element e = new TextNode(parentDocument, text); 3214 e.parentNode = this; 3215 children = [e]; 3216 } 3217 3218 /** 3219 Strips this node out of the document, replacing it with the given text 3220 */ 3221 @property void outerText(string text) { 3222 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3223 } 3224 3225 /** 3226 Same result as innerText; the tag with all inner tags stripped out 3227 */ 3228 @property string outerText() const { 3229 return innerText; 3230 } 3231 3232 3233 /* ******************************* 3234 Miscellaneous 3235 *********************************/ 3236 3237 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3238 @property Element cloned() 3239 /+ 3240 out(ret) { 3241 // FIXME: not sure why these fail... 3242 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3243 assert(ret.tagName == this.tagName); 3244 } 3245 body { 3246 +/ 3247 { 3248 return this.cloneNode(true); 3249 } 3250 3251 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3252 Element cloneNode(bool deepClone) { 3253 auto e = Element.make(this.tagName); 3254 e.parentDocument = this.parentDocument; 3255 e.attributes = this.attributes.aadup; 3256 e.selfClosed = this.selfClosed; 3257 3258 if(deepClone) { 3259 foreach(child; children) { 3260 e.appendChild(child.cloneNode(true)); 3261 } 3262 } 3263 3264 3265 return e; 3266 } 3267 3268 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3269 string nodeValue() const { 3270 return ""; 3271 } 3272 3273 // should return int 3274 ///. 3275 @property int nodeType() const { 3276 return 1; 3277 } 3278 3279 3280 invariant () { 3281 assert(tagName.indexOf(" ") == -1); 3282 3283 if(children !is null) 3284 debug foreach(child; children) { 3285 // assert(parentNode !is null); 3286 assert(child !is null); 3287 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3288 assert(child !is this); 3289 //assert(child !is parentNode); 3290 } 3291 3292 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3293 if(parentNode !is null) { 3294 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3295 auto lol = cast(TextNode) this; 3296 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3297 } 3298 +/ 3299 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3300 // reason is so you can create these without needing a reference to the document 3301 } 3302 3303 /** 3304 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3305 an XML file. 3306 */ 3307 override string toString() const { 3308 return writeToAppender(); 3309 } 3310 3311 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3312 if(indentWith is null) 3313 return null; 3314 string s; 3315 3316 if(insertComments) s ~= "<!--"; 3317 s ~= "\n"; 3318 foreach(indent; 0 .. indentationLevel) 3319 s ~= indentWith; 3320 if(insertComments) s ~= "-->"; 3321 3322 return s; 3323 } 3324 3325 /++ 3326 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3327 for eyeball debugging. 3328 +/ 3329 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3330 3331 // first step is to concatenate any consecutive text nodes to simplify 3332 // the white space analysis. this changes the tree! but i'm allowed since 3333 // the comment always says it changes the comments 3334 // 3335 // actually i'm not allowed cuz it is const so i will cheat and lie 3336 /+ 3337 TextNode lastTextChild = null; 3338 for(int a = 0; a < this.children.length; a++) { 3339 auto child = this.children[a]; 3340 if(auto tn = cast(TextNode) child) { 3341 if(lastTextChild) { 3342 lastTextChild.contents ~= tn.contents; 3343 for(int b = a; b < this.children.length - 1; b++) 3344 this.children[b] = this.children[b + 1]; 3345 this.children = this.children[0 .. $-1]; 3346 } else { 3347 lastTextChild = tn; 3348 } 3349 } else { 3350 lastTextChild = null; 3351 } 3352 } 3353 +/ 3354 3355 const(Element)[] children; 3356 3357 TextNode lastTextChild = null; 3358 for(int a = 0; a < this.children.length; a++) { 3359 auto child = this.children[a]; 3360 if(auto tn = cast(const(TextNode)) child) { 3361 if(lastTextChild !is null) { 3362 lastTextChild.contents ~= tn.contents; 3363 } else { 3364 lastTextChild = new TextNode(""); 3365 lastTextChild.parentNode = cast(Element) this; 3366 lastTextChild.contents ~= tn.contents; 3367 children ~= lastTextChild; 3368 } 3369 } else { 3370 lastTextChild = null; 3371 children ~= child; 3372 } 3373 } 3374 3375 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3376 3377 s ~= "<"; 3378 s ~= tagName; 3379 3380 // i sort these for consistent output. might be more legible 3381 // but especially it keeps it the same for diff purposes. 3382 import std.algorithm : sort; 3383 auto keys = sort(attributes.keys); 3384 foreach(n; keys) { 3385 auto v = attributes[n]; 3386 s ~= " "; 3387 s ~= n; 3388 s ~= "=\""; 3389 s ~= htmlEntitiesEncode(v); 3390 s ~= "\""; 3391 } 3392 3393 if(selfClosed){ 3394 s ~= " />"; 3395 return s; 3396 } 3397 3398 s ~= ">"; 3399 3400 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3401 // just keep them on the same line 3402 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3403 foreach(child; children) { 3404 s ~= child.toString();//toPrettyString(false, 0, null); 3405 } 3406 } else { 3407 foreach(child; children) { 3408 assert(child !is null); 3409 3410 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3411 } 3412 3413 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3414 } 3415 3416 s ~= "</"; 3417 s ~= tagName; 3418 s ~= ">"; 3419 3420 return s; 3421 } 3422 3423 /+ 3424 /// Writes out the opening tag only, if applicable. 3425 string writeTagOnly(Appender!string where = appender!string()) const { 3426 +/ 3427 3428 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3429 /// Note: the ordering of attributes in the string is undefined. 3430 /// Returns the string it creates. 3431 string writeToAppender(Appender!string where = appender!string()) const { 3432 assert(tagName !is null); 3433 3434 where.reserve((this.children.length + 1) * 512); 3435 3436 auto start = where.data.length; 3437 3438 where.put("<"); 3439 where.put(tagName); 3440 3441 import std.algorithm : sort; 3442 auto keys = sort(attributes.keys); 3443 foreach(n; keys) { 3444 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3445 //assert(v !is null); 3446 where.put(" "); 3447 where.put(n); 3448 where.put("=\""); 3449 htmlEntitiesEncode(v, where); 3450 where.put("\""); 3451 } 3452 3453 if(selfClosed){ 3454 where.put(" />"); 3455 return where.data[start .. $]; 3456 } 3457 3458 where.put('>'); 3459 3460 innerHTML(where); 3461 3462 where.put("</"); 3463 where.put(tagName); 3464 where.put('>'); 3465 3466 return where.data[start .. $]; 3467 } 3468 3469 /** 3470 Returns a lazy range of all its children, recursively. 3471 */ 3472 @property ElementStream tree() { 3473 return new ElementStream(this); 3474 } 3475 3476 // I moved these from Form because they are generally useful. 3477 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3478 /// Tags: HTML, HTML5 3479 // FIXME: add overloads for other label types... 3480 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3481 auto fs = this; 3482 auto i = fs.addChild("label"); 3483 3484 if(!(type == "checkbox" || type == "radio")) 3485 i.addChild("span", label); 3486 3487 Element input; 3488 if(type == "textarea") 3489 input = i.addChild("textarea"). 3490 setAttribute("name", name). 3491 setAttribute("rows", "6"); 3492 else 3493 input = i.addChild("input"). 3494 setAttribute("name", name). 3495 setAttribute("type", type); 3496 3497 if(type == "checkbox" || type == "radio") 3498 i.addChild("span", label); 3499 3500 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3501 fieldOptions.applyToElement(input); 3502 return i; 3503 } 3504 3505 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3506 auto fs = this; 3507 auto i = fs.addChild("label"); 3508 i.addChild(label); 3509 Element input; 3510 if(type == "textarea") 3511 input = i.addChild("textarea"). 3512 setAttribute("name", name). 3513 setAttribute("rows", "6"); 3514 else 3515 input = i.addChild("input"). 3516 setAttribute("name", name). 3517 setAttribute("type", type); 3518 3519 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3520 fieldOptions.applyToElement(input); 3521 return i; 3522 } 3523 3524 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3525 return addField(label, name, "text", fieldOptions); 3526 } 3527 3528 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3529 auto fs = this; 3530 auto i = fs.addChild("label"); 3531 i.addChild("span", label); 3532 auto sel = i.addChild("select").setAttribute("name", name); 3533 3534 foreach(k, opt; options) 3535 sel.addChild("option", opt, k); 3536 3537 // FIXME: implement requirements somehow 3538 3539 return i; 3540 } 3541 3542 Element addSubmitButton(string label = null) { 3543 auto t = this; 3544 auto holder = t.addChild("div"); 3545 holder.addClass("submit-holder"); 3546 auto i = holder.addChild("input"); 3547 i.type = "submit"; 3548 if(label.length) 3549 i.value = label; 3550 return holder; 3551 } 3552 3553 } 3554 3555 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3556 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3557 /// Group: core_functionality 3558 class XmlDocument : Document { 3559 this(string data) { 3560 selfClosedElements = null; 3561 contentType = "text/xml; charset=utf-8"; 3562 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3563 3564 parseStrict(data); 3565 } 3566 } 3567 3568 3569 3570 3571 import std..string; 3572 3573 /* domconvenience follows { */ 3574 3575 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3576 /// Group: core_functionality 3577 Element[] findComments(Document document, string txt) { 3578 return findComments(document.root, txt); 3579 } 3580 3581 /// ditto 3582 Element[] findComments(Element element, string txt) { 3583 txt = txt.strip().toLower(); 3584 Element[] ret; 3585 3586 foreach(comment; element.getElementsByTagName("#comment")) { 3587 string t = comment.nodeValue().strip().toLower(); 3588 if(t == txt) 3589 ret ~= comment; 3590 } 3591 3592 return ret; 3593 } 3594 3595 /// An option type that propagates null. See: [Element.optionSelector] 3596 /// Group: implementations 3597 struct MaybeNullElement(SomeElementType) { 3598 this(SomeElementType ele) { 3599 this.element = ele; 3600 } 3601 SomeElementType element; 3602 3603 /// Forwards to the element, wit a null check inserted that propagates null. 3604 auto opDispatch(string method, T...)(T args) { 3605 alias type = typeof(__traits(getMember, element, method)(args)); 3606 static if(is(type : Element)) { 3607 if(element is null) 3608 return MaybeNullElement!type(null); 3609 return __traits(getMember, element, method)(args); 3610 } else static if(is(type == string)) { 3611 if(element is null) 3612 return cast(string) null; 3613 return __traits(getMember, element, method)(args); 3614 } else static if(is(type == void)) { 3615 if(element is null) 3616 return; 3617 __traits(getMember, element, method)(args); 3618 } else { 3619 static assert(0); 3620 } 3621 } 3622 3623 /// Allows implicit casting to the wrapped element. 3624 alias element this; 3625 } 3626 3627 /++ 3628 A collection of elements which forwards methods to the children. 3629 +/ 3630 /// Group: implementations 3631 struct ElementCollection { 3632 /// 3633 this(Element e) { 3634 elements = [e]; 3635 } 3636 3637 /// 3638 this(Element e, string selector) { 3639 elements = e.querySelectorAll(selector); 3640 } 3641 3642 /// 3643 this(Element[] e) { 3644 elements = e; 3645 } 3646 3647 Element[] elements; 3648 //alias elements this; // let it implicitly convert to the underlying array 3649 3650 /// 3651 ElementCollection opIndex(string selector) { 3652 ElementCollection ec; 3653 foreach(e; elements) 3654 ec.elements ~= e.getElementsBySelector(selector); 3655 return ec; 3656 } 3657 3658 /// 3659 Element opIndex(int i) { 3660 return elements[i]; 3661 } 3662 3663 /// if you slice it, give the underlying array for easy forwarding of the 3664 /// collection to range expecting algorithms or looping over. 3665 Element[] opSlice() { 3666 return elements; 3667 } 3668 3669 /// And input range primitives so we can foreach over this 3670 void popFront() { 3671 elements = elements[1..$]; 3672 } 3673 3674 /// ditto 3675 Element front() { 3676 return elements[0]; 3677 } 3678 3679 /// ditto 3680 bool empty() { 3681 return !elements.length; 3682 } 3683 3684 /++ 3685 Collects strings from the collection, concatenating them together 3686 Kinda like running reduce and ~= on it. 3687 3688 --- 3689 document["p"].collect!"innerText"; 3690 --- 3691 +/ 3692 string collect(string method)(string separator = "") { 3693 string text; 3694 foreach(e; elements) { 3695 text ~= mixin("e." ~ method); 3696 text ~= separator; 3697 } 3698 return text; 3699 } 3700 3701 /// Forward method calls to each individual [Element|element] of the collection 3702 /// returns this so it can be chained. 3703 ElementCollection opDispatch(string name, T...)(T t) { 3704 foreach(e; elements) { 3705 mixin("e." ~ name)(t); 3706 } 3707 return this; 3708 } 3709 3710 /++ 3711 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3712 +/ 3713 ElementCollection wrapIn(Element what) { 3714 foreach(e; elements) { 3715 e.wrapIn(what.cloneNode(false)); 3716 } 3717 3718 return this; 3719 } 3720 3721 /// Concatenates two ElementCollection together. 3722 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3723 return ElementCollection(this.elements ~ rhs.elements); 3724 } 3725 } 3726 3727 3728 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3729 /// Group: implementations 3730 mixin template JavascriptStyleDispatch() { 3731 /// 3732 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3733 if(v !is null) 3734 return set(name, v); 3735 return get(name); 3736 } 3737 3738 /// 3739 string opIndex(string key) const { 3740 return get(key); 3741 } 3742 3743 /// 3744 string opIndexAssign(string value, string field) { 3745 return set(field, value); 3746 } 3747 3748 // FIXME: doesn't seem to work 3749 string* opBinary(string op)(string key) if(op == "in") { 3750 return key in fields; 3751 } 3752 } 3753 3754 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3755 /// 3756 /// Do not create this object directly. 3757 /// Group: implementations 3758 struct DataSet { 3759 /// 3760 this(Element e) { 3761 this._element = e; 3762 } 3763 3764 private Element _element; 3765 /// 3766 string set(string name, string value) { 3767 _element.setAttribute("data-" ~ unCamelCase(name), value); 3768 return value; 3769 } 3770 3771 /// 3772 string get(string name) const { 3773 return _element.getAttribute("data-" ~ unCamelCase(name)); 3774 } 3775 3776 /// 3777 mixin JavascriptStyleDispatch!(); 3778 } 3779 3780 /// Proxy object for attributes which will replace the main opDispatch eventually 3781 /// Group: implementations 3782 struct AttributeSet { 3783 /// 3784 this(Element e) { 3785 this._element = e; 3786 } 3787 3788 private Element _element; 3789 /// 3790 string set(string name, string value) { 3791 _element.setAttribute(name, value); 3792 return value; 3793 } 3794 3795 /// 3796 string get(string name) const { 3797 return _element.getAttribute(name); 3798 } 3799 3800 /// 3801 mixin JavascriptStyleDispatch!(); 3802 } 3803 3804 3805 3806 /// for style, i want to be able to set it with a string like a plain attribute, 3807 /// but also be able to do properties Javascript style. 3808 3809 /// Group: implementations 3810 struct ElementStyle { 3811 this(Element parent) { 3812 _element = parent; 3813 } 3814 3815 Element _element; 3816 3817 @property ref inout(string) _attribute() inout { 3818 auto s = "style" in _element.attributes; 3819 if(s is null) { 3820 auto e = cast() _element; // const_cast 3821 e.attributes["style"] = ""; // we need something to reference 3822 s = cast(inout) ("style" in e.attributes); 3823 } 3824 3825 assert(s !is null); 3826 return *s; 3827 } 3828 3829 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3830 3831 string set(string name, string value) { 3832 if(name.length == 0) 3833 return value; 3834 if(name == "cssFloat") 3835 name = "float"; 3836 else 3837 name = unCamelCase(name); 3838 auto r = rules(); 3839 r[name] = value; 3840 3841 _attribute = ""; 3842 foreach(k, v; r) { 3843 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3844 continue; 3845 if(_attribute.length) 3846 _attribute ~= " "; 3847 _attribute ~= k ~ ": " ~ v ~ ";"; 3848 } 3849 3850 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3851 3852 return value; 3853 } 3854 string get(string name) const { 3855 if(name == "cssFloat") 3856 name = "float"; 3857 else 3858 name = unCamelCase(name); 3859 auto r = rules(); 3860 if(name in r) 3861 return r[name]; 3862 return null; 3863 } 3864 3865 string[string] rules() const { 3866 string[string] ret; 3867 foreach(rule; _attribute.split(";")) { 3868 rule = rule.strip(); 3869 if(rule.length == 0) 3870 continue; 3871 auto idx = rule.indexOf(":"); 3872 if(idx == -1) 3873 ret[rule] = ""; 3874 else { 3875 auto name = rule[0 .. idx].strip(); 3876 auto value = rule[idx + 1 .. $].strip(); 3877 3878 ret[name] = value; 3879 } 3880 } 3881 3882 return ret; 3883 } 3884 3885 mixin JavascriptStyleDispatch!(); 3886 } 3887 3888 /// Converts a camel cased propertyName to a css style dashed property-name 3889 string unCamelCase(string a) { 3890 string ret; 3891 foreach(c; a) 3892 if((c >= 'A' && c <= 'Z')) 3893 ret ~= "-" ~ toLower("" ~ c)[0]; 3894 else 3895 ret ~= c; 3896 return ret; 3897 } 3898 3899 /// Translates a css style property-name to a camel cased propertyName 3900 string camelCase(string a) { 3901 string ret; 3902 bool justSawDash = false; 3903 foreach(c; a) 3904 if(c == '-') { 3905 justSawDash = true; 3906 } else { 3907 if(justSawDash) { 3908 justSawDash = false; 3909 ret ~= toUpper("" ~ c); 3910 } else 3911 ret ~= c; 3912 } 3913 return ret; 3914 } 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 // domconvenience ends } 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 // @safe: 3937 3938 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3939 // Instead, override writeToAppender(); 3940 3941 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3942 3943 // Stripping them is useful for reading php as html.... but adding them 3944 // is good for building php. 3945 3946 // I need to maintain compatibility with the way it is now too. 3947 3948 import std..string; 3949 import std.exception; 3950 import std.uri; 3951 import std.array; 3952 import std.range; 3953 3954 //import std.stdio; 3955 3956 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3957 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3958 // most likely a typo so I say kill kill kill. 3959 3960 3961 /++ 3962 This might belong in another module, but it represents a file with a mime type and some data. 3963 Document implements this interface with type = text/html (see Document.contentType for more info) 3964 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3965 +/ 3966 /// Group: bonus_functionality 3967 interface FileResource { 3968 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3969 @property string contentType() const; 3970 /// the data 3971 immutable(ubyte)[] getData() const; 3972 /++ 3973 filename, return null if none 3974 3975 History: 3976 Added December 25, 2020 3977 +/ 3978 @property string filename() const; 3979 } 3980 3981 3982 3983 3984 ///. 3985 /// Group: bonus_functionality 3986 enum NodeType { Text = 3 } 3987 3988 3989 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3990 /// Group: core_functionality 3991 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 3992 in {} 3993 out(ret) { assert(ret !is null); } 3994 body { 3995 auto ret = cast(T) e; 3996 if(ret is null) 3997 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3998 return ret; 3999 } 4000 4001 4002 ///. 4003 /// Group: core_functionality 4004 class DocumentFragment : Element { 4005 ///. 4006 this(Document _parentDocument) { 4007 tagName = "#fragment"; 4008 super(_parentDocument); 4009 } 4010 4011 /++ 4012 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4013 4014 Since: March 29, 2018 (or git tagged v2.1.0) 4015 +/ 4016 this(Html html) { 4017 this(null); 4018 4019 this.innerHTML = html.source; 4020 } 4021 4022 ///. 4023 override string writeToAppender(Appender!string where = appender!string()) const { 4024 return this.innerHTML(where); 4025 } 4026 4027 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 4028 string s; 4029 foreach(child; children) 4030 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 4031 return s; 4032 } 4033 4034 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4035 /* 4036 override inout(Element) parentNode() inout { 4037 return children.length ? children[0].parentNode : null; 4038 } 4039 */ 4040 override Element parentNode(Element p) { 4041 this._parentNode = p; 4042 foreach(child; children) 4043 child.parentNode = p; 4044 return p; 4045 } 4046 } 4047 4048 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4049 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4050 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4051 /// 4052 /// The output parameter can be given to append to an existing buffer. You don't have to 4053 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4054 /// Group: core_functionality 4055 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4056 // if there's no entities, we can save a lot of time by not bothering with the 4057 // decoding loop. This check cuts the net toString time by better than half in my test. 4058 // let me know if it made your tests worse though, since if you use an entity in just about 4059 // every location, the check will add time... but I suspect the average experience is like mine 4060 // since the check gives up as soon as it can anyway. 4061 4062 bool shortcut = true; 4063 foreach(char c; data) { 4064 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4065 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4066 shortcut = false; // there's actual work to be done 4067 break; 4068 } 4069 } 4070 4071 if(shortcut) { 4072 output.put(data); 4073 return data; 4074 } 4075 4076 auto start = output.data.length; 4077 4078 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4079 4080 foreach(dchar d; data) { 4081 if(d == '&') 4082 output.put("&"); 4083 else if (d == '<') 4084 output.put("<"); 4085 else if (d == '>') 4086 output.put(">"); 4087 else if (d == '\"') 4088 output.put("""); 4089 // else if (d == '\'') 4090 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4091 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4092 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4093 // idk about apostrophes though. Might be worth it, might not. 4094 else if (!encodeNonAscii || (d < 128 && d > 0)) 4095 output.put(d); 4096 else 4097 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4098 } 4099 4100 //assert(output !is null); // this fails on empty attributes..... 4101 return output.data[start .. $]; 4102 4103 // data = data.replace("\u00a0", " "); 4104 } 4105 4106 /// An alias for htmlEntitiesEncode; it works for xml too 4107 /// Group: core_functionality 4108 string xmlEntitiesEncode(string data) { 4109 return htmlEntitiesEncode(data); 4110 } 4111 4112 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4113 /// Group: core_functionality 4114 dchar parseEntity(in dchar[] entity) { 4115 switch(entity[1..$-1]) { 4116 case "quot": 4117 return '"'; 4118 case "apos": 4119 return '\''; 4120 case "lt": 4121 return '<'; 4122 case "gt": 4123 return '>'; 4124 case "amp": 4125 return '&'; 4126 // the next are html rather than xml 4127 4128 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4129 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4130 case "Tab": return '\u0009'; 4131 case "NewLine": return '\u000A'; 4132 case "excl": return '\u0021'; 4133 case "QUOT": return '\u0022'; 4134 case "num": return '\u0023'; 4135 case "dollar": return '\u0024'; 4136 case "percnt": return '\u0025'; 4137 case "AMP": return '\u0026'; 4138 case "lpar": return '\u0028'; 4139 case "rpar": return '\u0029'; 4140 case "ast": case "midast": return '\u002A'; 4141 case "plus": return '\u002B'; 4142 case "comma": return '\u002C'; 4143 case "period": return '\u002E'; 4144 case "sol": return '\u002F'; 4145 case "colon": return '\u003A'; 4146 case "semi": return '\u003B'; 4147 case "LT": return '\u003C'; 4148 case "equals": return '\u003D'; 4149 case "GT": return '\u003E'; 4150 case "quest": return '\u003F'; 4151 case "commat": return '\u0040'; 4152 case "lsqb": case "lbrack": return '\u005B'; 4153 case "bsol": return '\u005C'; 4154 case "rsqb": case "rbrack": return '\u005D'; 4155 case "Hat": return '\u005E'; 4156 case "lowbar": case "UnderBar": return '\u005F'; 4157 case "grave": case "DiacriticalGrave": return '\u0060'; 4158 case "lcub": case "lbrace": return '\u007B'; 4159 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4160 case "rcub": case "rbrace": return '\u007D'; 4161 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4162 case "iexcl": return '\u00A1'; 4163 case "cent": return '\u00A2'; 4164 case "pound": return '\u00A3'; 4165 case "curren": return '\u00A4'; 4166 case "yen": return '\u00A5'; 4167 case "brvbar": return '\u00A6'; 4168 case "sect": return '\u00A7'; 4169 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4170 case "copy": case "COPY": return '\u00A9'; 4171 case "ordf": return '\u00AA'; 4172 case "laquo": return '\u00AB'; 4173 case "not": return '\u00AC'; 4174 case "shy": return '\u00AD'; 4175 case "reg": case "circledR": case "REG": return '\u00AE'; 4176 case "macr": case "strns": return '\u00AF'; 4177 case "deg": return '\u00B0'; 4178 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4179 case "sup2": return '\u00B2'; 4180 case "sup3": return '\u00B3'; 4181 case "acute": case "DiacriticalAcute": return '\u00B4'; 4182 case "micro": return '\u00B5'; 4183 case "para": return '\u00B6'; 4184 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4185 case "cedil": case "Cedilla": return '\u00B8'; 4186 case "sup1": return '\u00B9'; 4187 case "ordm": return '\u00BA'; 4188 case "raquo": return '\u00BB'; 4189 case "frac14": return '\u00BC'; 4190 case "frac12": case "half": return '\u00BD'; 4191 case "frac34": return '\u00BE'; 4192 case "iquest": return '\u00BF'; 4193 case "Agrave": return '\u00C0'; 4194 case "Aacute": return '\u00C1'; 4195 case "Acirc": return '\u00C2'; 4196 case "Atilde": return '\u00C3'; 4197 case "Auml": return '\u00C4'; 4198 case "Aring": case "angst": return '\u00C5'; 4199 case "AElig": return '\u00C6'; 4200 case "Ccedil": return '\u00C7'; 4201 case "Egrave": return '\u00C8'; 4202 case "Eacute": return '\u00C9'; 4203 case "Ecirc": return '\u00CA'; 4204 case "Euml": return '\u00CB'; 4205 case "Igrave": return '\u00CC'; 4206 case "Iacute": return '\u00CD'; 4207 case "Icirc": return '\u00CE'; 4208 case "Iuml": return '\u00CF'; 4209 case "ETH": return '\u00D0'; 4210 case "Ntilde": return '\u00D1'; 4211 case "Ograve": return '\u00D2'; 4212 case "Oacute": return '\u00D3'; 4213 case "Ocirc": return '\u00D4'; 4214 case "Otilde": return '\u00D5'; 4215 case "Ouml": return '\u00D6'; 4216 case "times": return '\u00D7'; 4217 case "Oslash": return '\u00D8'; 4218 case "Ugrave": return '\u00D9'; 4219 case "Uacute": return '\u00DA'; 4220 case "Ucirc": return '\u00DB'; 4221 case "Uuml": return '\u00DC'; 4222 case "Yacute": return '\u00DD'; 4223 case "THORN": return '\u00DE'; 4224 case "szlig": return '\u00DF'; 4225 case "agrave": return '\u00E0'; 4226 case "aacute": return '\u00E1'; 4227 case "acirc": return '\u00E2'; 4228 case "atilde": return '\u00E3'; 4229 case "auml": return '\u00E4'; 4230 case "aring": return '\u00E5'; 4231 case "aelig": return '\u00E6'; 4232 case "ccedil": return '\u00E7'; 4233 case "egrave": return '\u00E8'; 4234 case "eacute": return '\u00E9'; 4235 case "ecirc": return '\u00EA'; 4236 case "euml": return '\u00EB'; 4237 case "igrave": return '\u00EC'; 4238 case "iacute": return '\u00ED'; 4239 case "icirc": return '\u00EE'; 4240 case "iuml": return '\u00EF'; 4241 case "eth": return '\u00F0'; 4242 case "ntilde": return '\u00F1'; 4243 case "ograve": return '\u00F2'; 4244 case "oacute": return '\u00F3'; 4245 case "ocirc": return '\u00F4'; 4246 case "otilde": return '\u00F5'; 4247 case "ouml": return '\u00F6'; 4248 case "divide": case "div": return '\u00F7'; 4249 case "oslash": return '\u00F8'; 4250 case "ugrave": return '\u00F9'; 4251 case "uacute": return '\u00FA'; 4252 case "ucirc": return '\u00FB'; 4253 case "uuml": return '\u00FC'; 4254 case "yacute": return '\u00FD'; 4255 case "thorn": return '\u00FE'; 4256 case "yuml": return '\u00FF'; 4257 case "Amacr": return '\u0100'; 4258 case "amacr": return '\u0101'; 4259 case "Abreve": return '\u0102'; 4260 case "abreve": return '\u0103'; 4261 case "Aogon": return '\u0104'; 4262 case "aogon": return '\u0105'; 4263 case "Cacute": return '\u0106'; 4264 case "cacute": return '\u0107'; 4265 case "Ccirc": return '\u0108'; 4266 case "ccirc": return '\u0109'; 4267 case "Cdot": return '\u010A'; 4268 case "cdot": return '\u010B'; 4269 case "Ccaron": return '\u010C'; 4270 case "ccaron": return '\u010D'; 4271 case "Dcaron": return '\u010E'; 4272 case "dcaron": return '\u010F'; 4273 case "Dstrok": return '\u0110'; 4274 case "dstrok": return '\u0111'; 4275 case "Emacr": return '\u0112'; 4276 case "emacr": return '\u0113'; 4277 case "Edot": return '\u0116'; 4278 case "edot": return '\u0117'; 4279 case "Eogon": return '\u0118'; 4280 case "eogon": return '\u0119'; 4281 case "Ecaron": return '\u011A'; 4282 case "ecaron": return '\u011B'; 4283 case "Gcirc": return '\u011C'; 4284 case "gcirc": return '\u011D'; 4285 case "Gbreve": return '\u011E'; 4286 case "gbreve": return '\u011F'; 4287 case "Gdot": return '\u0120'; 4288 case "gdot": return '\u0121'; 4289 case "Gcedil": return '\u0122'; 4290 case "Hcirc": return '\u0124'; 4291 case "hcirc": return '\u0125'; 4292 case "Hstrok": return '\u0126'; 4293 case "hstrok": return '\u0127'; 4294 case "Itilde": return '\u0128'; 4295 case "itilde": return '\u0129'; 4296 case "Imacr": return '\u012A'; 4297 case "imacr": return '\u012B'; 4298 case "Iogon": return '\u012E'; 4299 case "iogon": return '\u012F'; 4300 case "Idot": return '\u0130'; 4301 case "imath": case "inodot": return '\u0131'; 4302 case "IJlig": return '\u0132'; 4303 case "ijlig": return '\u0133'; 4304 case "Jcirc": return '\u0134'; 4305 case "jcirc": return '\u0135'; 4306 case "Kcedil": return '\u0136'; 4307 case "kcedil": return '\u0137'; 4308 case "kgreen": return '\u0138'; 4309 case "Lacute": return '\u0139'; 4310 case "lacute": return '\u013A'; 4311 case "Lcedil": return '\u013B'; 4312 case "lcedil": return '\u013C'; 4313 case "Lcaron": return '\u013D'; 4314 case "lcaron": return '\u013E'; 4315 case "Lmidot": return '\u013F'; 4316 case "lmidot": return '\u0140'; 4317 case "Lstrok": return '\u0141'; 4318 case "lstrok": return '\u0142'; 4319 case "Nacute": return '\u0143'; 4320 case "nacute": return '\u0144'; 4321 case "Ncedil": return '\u0145'; 4322 case "ncedil": return '\u0146'; 4323 case "Ncaron": return '\u0147'; 4324 case "ncaron": return '\u0148'; 4325 case "napos": return '\u0149'; 4326 case "ENG": return '\u014A'; 4327 case "eng": return '\u014B'; 4328 case "Omacr": return '\u014C'; 4329 case "omacr": return '\u014D'; 4330 case "Odblac": return '\u0150'; 4331 case "odblac": return '\u0151'; 4332 case "OElig": return '\u0152'; 4333 case "oelig": return '\u0153'; 4334 case "Racute": return '\u0154'; 4335 case "racute": return '\u0155'; 4336 case "Rcedil": return '\u0156'; 4337 case "rcedil": return '\u0157'; 4338 case "Rcaron": return '\u0158'; 4339 case "rcaron": return '\u0159'; 4340 case "Sacute": return '\u015A'; 4341 case "sacute": return '\u015B'; 4342 case "Scirc": return '\u015C'; 4343 case "scirc": return '\u015D'; 4344 case "Scedil": return '\u015E'; 4345 case "scedil": return '\u015F'; 4346 case "Scaron": return '\u0160'; 4347 case "scaron": return '\u0161'; 4348 case "Tcedil": return '\u0162'; 4349 case "tcedil": return '\u0163'; 4350 case "Tcaron": return '\u0164'; 4351 case "tcaron": return '\u0165'; 4352 case "Tstrok": return '\u0166'; 4353 case "tstrok": return '\u0167'; 4354 case "Utilde": return '\u0168'; 4355 case "utilde": return '\u0169'; 4356 case "Umacr": return '\u016A'; 4357 case "umacr": return '\u016B'; 4358 case "Ubreve": return '\u016C'; 4359 case "ubreve": return '\u016D'; 4360 case "Uring": return '\u016E'; 4361 case "uring": return '\u016F'; 4362 case "Udblac": return '\u0170'; 4363 case "udblac": return '\u0171'; 4364 case "Uogon": return '\u0172'; 4365 case "uogon": return '\u0173'; 4366 case "Wcirc": return '\u0174'; 4367 case "wcirc": return '\u0175'; 4368 case "Ycirc": return '\u0176'; 4369 case "ycirc": return '\u0177'; 4370 case "Yuml": return '\u0178'; 4371 case "Zacute": return '\u0179'; 4372 case "zacute": return '\u017A'; 4373 case "Zdot": return '\u017B'; 4374 case "zdot": return '\u017C'; 4375 case "Zcaron": return '\u017D'; 4376 case "zcaron": return '\u017E'; 4377 case "fnof": return '\u0192'; 4378 case "imped": return '\u01B5'; 4379 case "gacute": return '\u01F5'; 4380 case "jmath": return '\u0237'; 4381 case "circ": return '\u02C6'; 4382 case "caron": case "Hacek": return '\u02C7'; 4383 case "breve": case "Breve": return '\u02D8'; 4384 case "dot": case "DiacriticalDot": return '\u02D9'; 4385 case "ring": return '\u02DA'; 4386 case "ogon": return '\u02DB'; 4387 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4388 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4389 case "DownBreve": return '\u0311'; 4390 case "Alpha": return '\u0391'; 4391 case "Beta": return '\u0392'; 4392 case "Gamma": return '\u0393'; 4393 case "Delta": return '\u0394'; 4394 case "Epsilon": return '\u0395'; 4395 case "Zeta": return '\u0396'; 4396 case "Eta": return '\u0397'; 4397 case "Theta": return '\u0398'; 4398 case "Iota": return '\u0399'; 4399 case "Kappa": return '\u039A'; 4400 case "Lambda": return '\u039B'; 4401 case "Mu": return '\u039C'; 4402 case "Nu": return '\u039D'; 4403 case "Xi": return '\u039E'; 4404 case "Omicron": return '\u039F'; 4405 case "Pi": return '\u03A0'; 4406 case "Rho": return '\u03A1'; 4407 case "Sigma": return '\u03A3'; 4408 case "Tau": return '\u03A4'; 4409 case "Upsilon": return '\u03A5'; 4410 case "Phi": return '\u03A6'; 4411 case "Chi": return '\u03A7'; 4412 case "Psi": return '\u03A8'; 4413 case "Omega": case "ohm": return '\u03A9'; 4414 case "alpha": return '\u03B1'; 4415 case "beta": return '\u03B2'; 4416 case "gamma": return '\u03B3'; 4417 case "delta": return '\u03B4'; 4418 case "epsi": case "epsilon": return '\u03B5'; 4419 case "zeta": return '\u03B6'; 4420 case "eta": return '\u03B7'; 4421 case "theta": return '\u03B8'; 4422 case "iota": return '\u03B9'; 4423 case "kappa": return '\u03BA'; 4424 case "lambda": return '\u03BB'; 4425 case "mu": return '\u03BC'; 4426 case "nu": return '\u03BD'; 4427 case "xi": return '\u03BE'; 4428 case "omicron": return '\u03BF'; 4429 case "pi": return '\u03C0'; 4430 case "rho": return '\u03C1'; 4431 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4432 case "sigma": return '\u03C3'; 4433 case "tau": return '\u03C4'; 4434 case "upsi": case "upsilon": return '\u03C5'; 4435 case "phi": return '\u03C6'; 4436 case "chi": return '\u03C7'; 4437 case "psi": return '\u03C8'; 4438 case "omega": return '\u03C9'; 4439 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4440 case "Upsi": case "upsih": return '\u03D2'; 4441 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4442 case "piv": case "varpi": return '\u03D6'; 4443 case "Gammad": return '\u03DC'; 4444 case "gammad": case "digamma": return '\u03DD'; 4445 case "kappav": case "varkappa": return '\u03F0'; 4446 case "rhov": case "varrho": return '\u03F1'; 4447 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4448 case "bepsi": case "backepsilon": return '\u03F6'; 4449 case "IOcy": return '\u0401'; 4450 case "DJcy": return '\u0402'; 4451 case "GJcy": return '\u0403'; 4452 case "Jukcy": return '\u0404'; 4453 case "DScy": return '\u0405'; 4454 case "Iukcy": return '\u0406'; 4455 case "YIcy": return '\u0407'; 4456 case "Jsercy": return '\u0408'; 4457 case "LJcy": return '\u0409'; 4458 case "NJcy": return '\u040A'; 4459 case "TSHcy": return '\u040B'; 4460 case "KJcy": return '\u040C'; 4461 case "Ubrcy": return '\u040E'; 4462 case "DZcy": return '\u040F'; 4463 case "Acy": return '\u0410'; 4464 case "Bcy": return '\u0411'; 4465 case "Vcy": return '\u0412'; 4466 case "Gcy": return '\u0413'; 4467 case "Dcy": return '\u0414'; 4468 case "IEcy": return '\u0415'; 4469 case "ZHcy": return '\u0416'; 4470 case "Zcy": return '\u0417'; 4471 case "Icy": return '\u0418'; 4472 case "Jcy": return '\u0419'; 4473 case "Kcy": return '\u041A'; 4474 case "Lcy": return '\u041B'; 4475 case "Mcy": return '\u041C'; 4476 case "Ncy": return '\u041D'; 4477 case "Ocy": return '\u041E'; 4478 case "Pcy": return '\u041F'; 4479 case "Rcy": return '\u0420'; 4480 case "Scy": return '\u0421'; 4481 case "Tcy": return '\u0422'; 4482 case "Ucy": return '\u0423'; 4483 case "Fcy": return '\u0424'; 4484 case "KHcy": return '\u0425'; 4485 case "TScy": return '\u0426'; 4486 case "CHcy": return '\u0427'; 4487 case "SHcy": return '\u0428'; 4488 case "SHCHcy": return '\u0429'; 4489 case "HARDcy": return '\u042A'; 4490 case "Ycy": return '\u042B'; 4491 case "SOFTcy": return '\u042C'; 4492 case "Ecy": return '\u042D'; 4493 case "YUcy": return '\u042E'; 4494 case "YAcy": return '\u042F'; 4495 case "acy": return '\u0430'; 4496 case "bcy": return '\u0431'; 4497 case "vcy": return '\u0432'; 4498 case "gcy": return '\u0433'; 4499 case "dcy": return '\u0434'; 4500 case "iecy": return '\u0435'; 4501 case "zhcy": return '\u0436'; 4502 case "zcy": return '\u0437'; 4503 case "icy": return '\u0438'; 4504 case "jcy": return '\u0439'; 4505 case "kcy": return '\u043A'; 4506 case "lcy": return '\u043B'; 4507 case "mcy": return '\u043C'; 4508 case "ncy": return '\u043D'; 4509 case "ocy": return '\u043E'; 4510 case "pcy": return '\u043F'; 4511 case "rcy": return '\u0440'; 4512 case "scy": return '\u0441'; 4513 case "tcy": return '\u0442'; 4514 case "ucy": return '\u0443'; 4515 case "fcy": return '\u0444'; 4516 case "khcy": return '\u0445'; 4517 case "tscy": return '\u0446'; 4518 case "chcy": return '\u0447'; 4519 case "shcy": return '\u0448'; 4520 case "shchcy": return '\u0449'; 4521 case "hardcy": return '\u044A'; 4522 case "ycy": return '\u044B'; 4523 case "softcy": return '\u044C'; 4524 case "ecy": return '\u044D'; 4525 case "yucy": return '\u044E'; 4526 case "yacy": return '\u044F'; 4527 case "iocy": return '\u0451'; 4528 case "djcy": return '\u0452'; 4529 case "gjcy": return '\u0453'; 4530 case "jukcy": return '\u0454'; 4531 case "dscy": return '\u0455'; 4532 case "iukcy": return '\u0456'; 4533 case "yicy": return '\u0457'; 4534 case "jsercy": return '\u0458'; 4535 case "ljcy": return '\u0459'; 4536 case "njcy": return '\u045A'; 4537 case "tshcy": return '\u045B'; 4538 case "kjcy": return '\u045C'; 4539 case "ubrcy": return '\u045E'; 4540 case "dzcy": return '\u045F'; 4541 case "ensp": return '\u2002'; 4542 case "emsp": return '\u2003'; 4543 case "emsp13": return '\u2004'; 4544 case "emsp14": return '\u2005'; 4545 case "numsp": return '\u2007'; 4546 case "puncsp": return '\u2008'; 4547 case "thinsp": case "ThinSpace": return '\u2009'; 4548 case "hairsp": case "VeryThinSpace": return '\u200A'; 4549 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4550 case "zwnj": return '\u200C'; 4551 case "zwj": return '\u200D'; 4552 case "lrm": return '\u200E'; 4553 case "rlm": return '\u200F'; 4554 case "hyphen": case "dash": return '\u2010'; 4555 case "ndash": return '\u2013'; 4556 case "mdash": return '\u2014'; 4557 case "horbar": return '\u2015'; 4558 case "Verbar": case "Vert": return '\u2016'; 4559 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4560 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4561 case "lsquor": case "sbquo": return '\u201A'; 4562 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4563 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4564 case "ldquor": case "bdquo": return '\u201E'; 4565 case "dagger": return '\u2020'; 4566 case "Dagger": case "ddagger": return '\u2021'; 4567 case "bull": case "bullet": return '\u2022'; 4568 case "nldr": return '\u2025'; 4569 case "hellip": case "mldr": return '\u2026'; 4570 case "permil": return '\u2030'; 4571 case "pertenk": return '\u2031'; 4572 case "prime": return '\u2032'; 4573 case "Prime": return '\u2033'; 4574 case "tprime": return '\u2034'; 4575 case "bprime": case "backprime": return '\u2035'; 4576 case "lsaquo": return '\u2039'; 4577 case "rsaquo": return '\u203A'; 4578 case "oline": case "OverBar": return '\u203E'; 4579 case "caret": return '\u2041'; 4580 case "hybull": return '\u2043'; 4581 case "frasl": return '\u2044'; 4582 case "bsemi": return '\u204F'; 4583 case "qprime": return '\u2057'; 4584 case "MediumSpace": return '\u205F'; 4585 case "NoBreak": return '\u2060'; 4586 case "ApplyFunction": case "af": return '\u2061'; 4587 case "InvisibleTimes": case "it": return '\u2062'; 4588 case "InvisibleComma": case "ic": return '\u2063'; 4589 case "euro": return '\u20AC'; 4590 case "tdot": case "TripleDot": return '\u20DB'; 4591 case "DotDot": return '\u20DC'; 4592 case "Copf": case "complexes": return '\u2102'; 4593 case "incare": return '\u2105'; 4594 case "gscr": return '\u210A'; 4595 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4596 case "Hfr": case "Poincareplane": return '\u210C'; 4597 case "quaternions": case "Hopf": return '\u210D'; 4598 case "planckh": return '\u210E'; 4599 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4600 case "Iscr": case "imagline": return '\u2110'; 4601 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4602 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4603 case "ell": return '\u2113'; 4604 case "Nopf": case "naturals": return '\u2115'; 4605 case "numero": return '\u2116'; 4606 case "copysr": return '\u2117'; 4607 case "weierp": case "wp": return '\u2118'; 4608 case "Popf": case "primes": return '\u2119'; 4609 case "rationals": case "Qopf": return '\u211A'; 4610 case "Rscr": case "realine": return '\u211B'; 4611 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4612 case "reals": case "Ropf": return '\u211D'; 4613 case "rx": return '\u211E'; 4614 case "trade": case "TRADE": return '\u2122'; 4615 case "integers": case "Zopf": return '\u2124'; 4616 case "mho": return '\u2127'; 4617 case "Zfr": case "zeetrf": return '\u2128'; 4618 case "iiota": return '\u2129'; 4619 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4620 case "Cfr": case "Cayleys": return '\u212D'; 4621 case "escr": return '\u212F'; 4622 case "Escr": case "expectation": return '\u2130'; 4623 case "Fscr": case "Fouriertrf": return '\u2131'; 4624 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4625 case "order": case "orderof": case "oscr": return '\u2134'; 4626 case "alefsym": case "aleph": return '\u2135'; 4627 case "beth": return '\u2136'; 4628 case "gimel": return '\u2137'; 4629 case "daleth": return '\u2138'; 4630 case "CapitalDifferentialD": case "DD": return '\u2145'; 4631 case "DifferentialD": case "dd": return '\u2146'; 4632 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4633 case "ImaginaryI": case "ii": return '\u2148'; 4634 case "frac13": return '\u2153'; 4635 case "frac23": return '\u2154'; 4636 case "frac15": return '\u2155'; 4637 case "frac25": return '\u2156'; 4638 case "frac35": return '\u2157'; 4639 case "frac45": return '\u2158'; 4640 case "frac16": return '\u2159'; 4641 case "frac56": return '\u215A'; 4642 case "frac18": return '\u215B'; 4643 case "frac38": return '\u215C'; 4644 case "frac58": return '\u215D'; 4645 case "frac78": return '\u215E'; 4646 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4647 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4648 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4649 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4650 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4651 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4652 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4653 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4654 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4655 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4656 case "nlarr": case "nleftarrow": return '\u219A'; 4657 case "nrarr": case "nrightarrow": return '\u219B'; 4658 case "rarrw": case "rightsquigarrow": return '\u219D'; 4659 case "Larr": case "twoheadleftarrow": return '\u219E'; 4660 case "Uarr": return '\u219F'; 4661 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4662 case "Darr": return '\u21A1'; 4663 case "larrtl": case "leftarrowtail": return '\u21A2'; 4664 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4665 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4666 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4667 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4668 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4669 case "larrhk": case "hookleftarrow": return '\u21A9'; 4670 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4671 case "larrlp": case "looparrowleft": return '\u21AB'; 4672 case "rarrlp": case "looparrowright": return '\u21AC'; 4673 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4674 case "nharr": case "nleftrightarrow": return '\u21AE'; 4675 case "lsh": case "Lsh": return '\u21B0'; 4676 case "rsh": case "Rsh": return '\u21B1'; 4677 case "ldsh": return '\u21B2'; 4678 case "rdsh": return '\u21B3'; 4679 case "crarr": return '\u21B5'; 4680 case "cularr": case "curvearrowleft": return '\u21B6'; 4681 case "curarr": case "curvearrowright": return '\u21B7'; 4682 case "olarr": case "circlearrowleft": return '\u21BA'; 4683 case "orarr": case "circlearrowright": return '\u21BB'; 4684 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4685 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4686 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4687 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4688 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4689 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4690 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4691 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4692 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4693 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4694 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4695 case "llarr": case "leftleftarrows": return '\u21C7'; 4696 case "uuarr": case "upuparrows": return '\u21C8'; 4697 case "rrarr": case "rightrightarrows": return '\u21C9'; 4698 case "ddarr": case "downdownarrows": return '\u21CA'; 4699 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4700 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4701 case "nlArr": case "nLeftarrow": return '\u21CD'; 4702 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4703 case "nrArr": case "nRightarrow": return '\u21CF'; 4704 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4705 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4706 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4707 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4708 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4709 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4710 case "nwArr": return '\u21D6'; 4711 case "neArr": return '\u21D7'; 4712 case "seArr": return '\u21D8'; 4713 case "swArr": return '\u21D9'; 4714 case "lAarr": case "Lleftarrow": return '\u21DA'; 4715 case "rAarr": case "Rrightarrow": return '\u21DB'; 4716 case "zigrarr": return '\u21DD'; 4717 case "larrb": case "LeftArrowBar": return '\u21E4'; 4718 case "rarrb": case "RightArrowBar": return '\u21E5'; 4719 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4720 case "loarr": return '\u21FD'; 4721 case "roarr": return '\u21FE'; 4722 case "hoarr": return '\u21FF'; 4723 case "forall": case "ForAll": return '\u2200'; 4724 case "comp": case "complement": return '\u2201'; 4725 case "part": case "PartialD": return '\u2202'; 4726 case "exist": case "Exists": return '\u2203'; 4727 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4728 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4729 case "nabla": case "Del": return '\u2207'; 4730 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4731 case "notin": case "NotElement": case "notinva": return '\u2209'; 4732 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4733 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4734 case "prod": case "Product": return '\u220F'; 4735 case "coprod": case "Coproduct": return '\u2210'; 4736 case "sum": case "Sum": return '\u2211'; 4737 case "minus": return '\u2212'; 4738 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4739 case "plusdo": case "dotplus": return '\u2214'; 4740 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4741 case "lowast": return '\u2217'; 4742 case "compfn": case "SmallCircle": return '\u2218'; 4743 case "radic": case "Sqrt": return '\u221A'; 4744 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4745 case "infin": return '\u221E'; 4746 case "angrt": return '\u221F'; 4747 case "ang": case "angle": return '\u2220'; 4748 case "angmsd": case "measuredangle": return '\u2221'; 4749 case "angsph": return '\u2222'; 4750 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4751 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4752 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4753 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4754 case "and": case "wedge": return '\u2227'; 4755 case "or": case "vee": return '\u2228'; 4756 case "cap": return '\u2229'; 4757 case "cup": return '\u222A'; 4758 case "int": case "Integral": return '\u222B'; 4759 case "Int": return '\u222C'; 4760 case "tint": case "iiint": return '\u222D'; 4761 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4762 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4763 case "Cconint": return '\u2230'; 4764 case "cwint": return '\u2231'; 4765 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4766 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4767 case "there4": case "therefore": case "Therefore": return '\u2234'; 4768 case "becaus": case "because": case "Because": return '\u2235'; 4769 case "ratio": return '\u2236'; 4770 case "Colon": case "Proportion": return '\u2237'; 4771 case "minusd": case "dotminus": return '\u2238'; 4772 case "mDDot": return '\u223A'; 4773 case "homtht": return '\u223B'; 4774 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4775 case "bsim": case "backsim": return '\u223D'; 4776 case "ac": case "mstpos": return '\u223E'; 4777 case "acd": return '\u223F'; 4778 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4779 case "nsim": case "NotTilde": return '\u2241'; 4780 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4781 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4782 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4783 case "cong": case "TildeFullEqual": return '\u2245'; 4784 case "simne": return '\u2246'; 4785 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4786 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4787 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4788 case "ape": case "approxeq": return '\u224A'; 4789 case "apid": return '\u224B'; 4790 case "bcong": case "backcong": return '\u224C'; 4791 case "asympeq": case "CupCap": return '\u224D'; 4792 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4793 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4794 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4795 case "eDot": case "doteqdot": return '\u2251'; 4796 case "efDot": case "fallingdotseq": return '\u2252'; 4797 case "erDot": case "risingdotseq": return '\u2253'; 4798 case "colone": case "coloneq": case "Assign": return '\u2254'; 4799 case "ecolon": case "eqcolon": return '\u2255'; 4800 case "ecir": case "eqcirc": return '\u2256'; 4801 case "cire": case "circeq": return '\u2257'; 4802 case "wedgeq": return '\u2259'; 4803 case "veeeq": return '\u225A'; 4804 case "trie": case "triangleq": return '\u225C'; 4805 case "equest": case "questeq": return '\u225F'; 4806 case "ne": case "NotEqual": return '\u2260'; 4807 case "equiv": case "Congruent": return '\u2261'; 4808 case "nequiv": case "NotCongruent": return '\u2262'; 4809 case "le": case "leq": return '\u2264'; 4810 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4811 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4812 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4813 case "lnE": case "lneqq": return '\u2268'; 4814 case "gnE": case "gneqq": return '\u2269'; 4815 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4816 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4817 case "twixt": case "between": return '\u226C'; 4818 case "NotCupCap": return '\u226D'; 4819 case "nlt": case "NotLess": case "nless": return '\u226E'; 4820 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4821 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4822 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4823 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4824 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4825 case "nlsim": case "NotLessTilde": return '\u2274'; 4826 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4827 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4828 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4829 case "ntlg": case "NotLessGreater": return '\u2278'; 4830 case "ntgl": case "NotGreaterLess": return '\u2279'; 4831 case "pr": case "Precedes": case "prec": return '\u227A'; 4832 case "sc": case "Succeeds": case "succ": return '\u227B'; 4833 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4834 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4835 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4836 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4837 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4838 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4839 case "sub": case "subset": return '\u2282'; 4840 case "sup": case "supset": case "Superset": return '\u2283'; 4841 case "nsub": return '\u2284'; 4842 case "nsup": return '\u2285'; 4843 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4844 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4845 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4846 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4847 case "subne": case "subsetneq": return '\u228A'; 4848 case "supne": case "supsetneq": return '\u228B'; 4849 case "cupdot": return '\u228D'; 4850 case "uplus": case "UnionPlus": return '\u228E'; 4851 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4852 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4853 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4854 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4855 case "sqcap": case "SquareIntersection": return '\u2293'; 4856 case "sqcup": case "SquareUnion": return '\u2294'; 4857 case "oplus": case "CirclePlus": return '\u2295'; 4858 case "ominus": case "CircleMinus": return '\u2296'; 4859 case "otimes": case "CircleTimes": return '\u2297'; 4860 case "osol": return '\u2298'; 4861 case "odot": case "CircleDot": return '\u2299'; 4862 case "ocir": case "circledcirc": return '\u229A'; 4863 case "oast": case "circledast": return '\u229B'; 4864 case "odash": case "circleddash": return '\u229D'; 4865 case "plusb": case "boxplus": return '\u229E'; 4866 case "minusb": case "boxminus": return '\u229F'; 4867 case "timesb": case "boxtimes": return '\u22A0'; 4868 case "sdotb": case "dotsquare": return '\u22A1'; 4869 case "vdash": case "RightTee": return '\u22A2'; 4870 case "dashv": case "LeftTee": return '\u22A3'; 4871 case "top": case "DownTee": return '\u22A4'; 4872 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4873 case "models": return '\u22A7'; 4874 case "vDash": case "DoubleRightTee": return '\u22A8'; 4875 case "Vdash": return '\u22A9'; 4876 case "Vvdash": return '\u22AA'; 4877 case "VDash": return '\u22AB'; 4878 case "nvdash": return '\u22AC'; 4879 case "nvDash": return '\u22AD'; 4880 case "nVdash": return '\u22AE'; 4881 case "nVDash": return '\u22AF'; 4882 case "prurel": return '\u22B0'; 4883 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4884 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4885 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4886 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4887 case "origof": return '\u22B6'; 4888 case "imof": return '\u22B7'; 4889 case "mumap": case "multimap": return '\u22B8'; 4890 case "hercon": return '\u22B9'; 4891 case "intcal": case "intercal": return '\u22BA'; 4892 case "veebar": return '\u22BB'; 4893 case "barvee": return '\u22BD'; 4894 case "angrtvb": return '\u22BE'; 4895 case "lrtri": return '\u22BF'; 4896 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4897 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4898 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4899 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4900 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4901 case "sdot": return '\u22C5'; 4902 case "sstarf": case "Star": return '\u22C6'; 4903 case "divonx": case "divideontimes": return '\u22C7'; 4904 case "bowtie": return '\u22C8'; 4905 case "ltimes": return '\u22C9'; 4906 case "rtimes": return '\u22CA'; 4907 case "lthree": case "leftthreetimes": return '\u22CB'; 4908 case "rthree": case "rightthreetimes": return '\u22CC'; 4909 case "bsime": case "backsimeq": return '\u22CD'; 4910 case "cuvee": case "curlyvee": return '\u22CE'; 4911 case "cuwed": case "curlywedge": return '\u22CF'; 4912 case "Sub": case "Subset": return '\u22D0'; 4913 case "Sup": case "Supset": return '\u22D1'; 4914 case "Cap": return '\u22D2'; 4915 case "Cup": return '\u22D3'; 4916 case "fork": case "pitchfork": return '\u22D4'; 4917 case "epar": return '\u22D5'; 4918 case "ltdot": case "lessdot": return '\u22D6'; 4919 case "gtdot": case "gtrdot": return '\u22D7'; 4920 case "Ll": return '\u22D8'; 4921 case "Gg": case "ggg": return '\u22D9'; 4922 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4923 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4924 case "cuepr": case "curlyeqprec": return '\u22DE'; 4925 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4926 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4927 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4928 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4929 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4930 case "lnsim": return '\u22E6'; 4931 case "gnsim": return '\u22E7'; 4932 case "prnsim": case "precnsim": return '\u22E8'; 4933 case "scnsim": case "succnsim": return '\u22E9'; 4934 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4935 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4936 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4937 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4938 case "vellip": return '\u22EE'; 4939 case "ctdot": return '\u22EF'; 4940 case "utdot": return '\u22F0'; 4941 case "dtdot": return '\u22F1'; 4942 case "disin": return '\u22F2'; 4943 case "isinsv": return '\u22F3'; 4944 case "isins": return '\u22F4'; 4945 case "isindot": return '\u22F5'; 4946 case "notinvc": return '\u22F6'; 4947 case "notinvb": return '\u22F7'; 4948 case "isinE": return '\u22F9'; 4949 case "nisd": return '\u22FA'; 4950 case "xnis": return '\u22FB'; 4951 case "nis": return '\u22FC'; 4952 case "notnivc": return '\u22FD'; 4953 case "notnivb": return '\u22FE'; 4954 case "barwed": case "barwedge": return '\u2305'; 4955 case "Barwed": case "doublebarwedge": return '\u2306'; 4956 case "lceil": case "LeftCeiling": return '\u2308'; 4957 case "rceil": case "RightCeiling": return '\u2309'; 4958 case "lfloor": case "LeftFloor": return '\u230A'; 4959 case "rfloor": case "RightFloor": return '\u230B'; 4960 case "drcrop": return '\u230C'; 4961 case "dlcrop": return '\u230D'; 4962 case "urcrop": return '\u230E'; 4963 case "ulcrop": return '\u230F'; 4964 case "bnot": return '\u2310'; 4965 case "profline": return '\u2312'; 4966 case "profsurf": return '\u2313'; 4967 case "telrec": return '\u2315'; 4968 case "target": return '\u2316'; 4969 case "ulcorn": case "ulcorner": return '\u231C'; 4970 case "urcorn": case "urcorner": return '\u231D'; 4971 case "dlcorn": case "llcorner": return '\u231E'; 4972 case "drcorn": case "lrcorner": return '\u231F'; 4973 case "frown": case "sfrown": return '\u2322'; 4974 case "smile": case "ssmile": return '\u2323'; 4975 case "cylcty": return '\u232D'; 4976 case "profalar": return '\u232E'; 4977 case "topbot": return '\u2336'; 4978 case "ovbar": return '\u233D'; 4979 case "solbar": return '\u233F'; 4980 case "angzarr": return '\u237C'; 4981 case "lmoust": case "lmoustache": return '\u23B0'; 4982 case "rmoust": case "rmoustache": return '\u23B1'; 4983 case "tbrk": case "OverBracket": return '\u23B4'; 4984 case "bbrk": case "UnderBracket": return '\u23B5'; 4985 case "bbrktbrk": return '\u23B6'; 4986 case "OverParenthesis": return '\u23DC'; 4987 case "UnderParenthesis": return '\u23DD'; 4988 case "OverBrace": return '\u23DE'; 4989 case "UnderBrace": return '\u23DF'; 4990 case "trpezium": return '\u23E2'; 4991 case "elinters": return '\u23E7'; 4992 case "blank": return '\u2423'; 4993 case "oS": case "circledS": return '\u24C8'; 4994 case "boxh": case "HorizontalLine": return '\u2500'; 4995 case "boxv": return '\u2502'; 4996 case "boxdr": return '\u250C'; 4997 case "boxdl": return '\u2510'; 4998 case "boxur": return '\u2514'; 4999 case "boxul": return '\u2518'; 5000 case "boxvr": return '\u251C'; 5001 case "boxvl": return '\u2524'; 5002 case "boxhd": return '\u252C'; 5003 case "boxhu": return '\u2534'; 5004 case "boxvh": return '\u253C'; 5005 case "boxH": return '\u2550'; 5006 case "boxV": return '\u2551'; 5007 case "boxdR": return '\u2552'; 5008 case "boxDr": return '\u2553'; 5009 case "boxDR": return '\u2554'; 5010 case "boxdL": return '\u2555'; 5011 case "boxDl": return '\u2556'; 5012 case "boxDL": return '\u2557'; 5013 case "boxuR": return '\u2558'; 5014 case "boxUr": return '\u2559'; 5015 case "boxUR": return '\u255A'; 5016 case "boxuL": return '\u255B'; 5017 case "boxUl": return '\u255C'; 5018 case "boxUL": return '\u255D'; 5019 case "boxvR": return '\u255E'; 5020 case "boxVr": return '\u255F'; 5021 case "boxVR": return '\u2560'; 5022 case "boxvL": return '\u2561'; 5023 case "boxVl": return '\u2562'; 5024 case "boxVL": return '\u2563'; 5025 case "boxHd": return '\u2564'; 5026 case "boxhD": return '\u2565'; 5027 case "boxHD": return '\u2566'; 5028 case "boxHu": return '\u2567'; 5029 case "boxhU": return '\u2568'; 5030 case "boxHU": return '\u2569'; 5031 case "boxvH": return '\u256A'; 5032 case "boxVh": return '\u256B'; 5033 case "boxVH": return '\u256C'; 5034 case "uhblk": return '\u2580'; 5035 case "lhblk": return '\u2584'; 5036 case "block": return '\u2588'; 5037 case "blk14": return '\u2591'; 5038 case "blk12": return '\u2592'; 5039 case "blk34": return '\u2593'; 5040 case "squ": case "square": case "Square": return '\u25A1'; 5041 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 5042 case "EmptyVerySmallSquare": return '\u25AB'; 5043 case "rect": return '\u25AD'; 5044 case "marker": return '\u25AE'; 5045 case "fltns": return '\u25B1'; 5046 case "xutri": case "bigtriangleup": return '\u25B3'; 5047 case "utrif": case "blacktriangle": return '\u25B4'; 5048 case "utri": case "triangle": return '\u25B5'; 5049 case "rtrif": case "blacktriangleright": return '\u25B8'; 5050 case "rtri": case "triangleright": return '\u25B9'; 5051 case "xdtri": case "bigtriangledown": return '\u25BD'; 5052 case "dtrif": case "blacktriangledown": return '\u25BE'; 5053 case "dtri": case "triangledown": return '\u25BF'; 5054 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5055 case "ltri": case "triangleleft": return '\u25C3'; 5056 case "loz": case "lozenge": return '\u25CA'; 5057 case "cir": return '\u25CB'; 5058 case "tridot": return '\u25EC'; 5059 case "xcirc": case "bigcirc": return '\u25EF'; 5060 case "ultri": return '\u25F8'; 5061 case "urtri": return '\u25F9'; 5062 case "lltri": return '\u25FA'; 5063 case "EmptySmallSquare": return '\u25FB'; 5064 case "FilledSmallSquare": return '\u25FC'; 5065 case "starf": case "bigstar": return '\u2605'; 5066 case "star": return '\u2606'; 5067 case "phone": return '\u260E'; 5068 case "female": return '\u2640'; 5069 case "male": return '\u2642'; 5070 case "spades": case "spadesuit": return '\u2660'; 5071 case "clubs": case "clubsuit": return '\u2663'; 5072 case "hearts": case "heartsuit": return '\u2665'; 5073 case "diams": case "diamondsuit": return '\u2666'; 5074 case "sung": return '\u266A'; 5075 case "flat": return '\u266D'; 5076 case "natur": case "natural": return '\u266E'; 5077 case "sharp": return '\u266F'; 5078 case "check": case "checkmark": return '\u2713'; 5079 case "cross": return '\u2717'; 5080 case "malt": case "maltese": return '\u2720'; 5081 case "sext": return '\u2736'; 5082 case "VerticalSeparator": return '\u2758'; 5083 case "lbbrk": return '\u2772'; 5084 case "rbbrk": return '\u2773'; 5085 case "bsolhsub": return '\u27C8'; 5086 case "suphsol": return '\u27C9'; 5087 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5088 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5089 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5090 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5091 case "Lang": return '\u27EA'; 5092 case "Rang": return '\u27EB'; 5093 case "loang": return '\u27EC'; 5094 case "roang": return '\u27ED'; 5095 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5096 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5097 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5098 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5099 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5100 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5101 case "xmap": case "longmapsto": return '\u27FC'; 5102 case "dzigrarr": return '\u27FF'; 5103 case "nvlArr": return '\u2902'; 5104 case "nvrArr": return '\u2903'; 5105 case "nvHarr": return '\u2904'; 5106 case "Map": return '\u2905'; 5107 case "lbarr": return '\u290C'; 5108 case "rbarr": case "bkarow": return '\u290D'; 5109 case "lBarr": return '\u290E'; 5110 case "rBarr": case "dbkarow": return '\u290F'; 5111 case "RBarr": case "drbkarow": return '\u2910'; 5112 case "DDotrahd": return '\u2911'; 5113 case "UpArrowBar": return '\u2912'; 5114 case "DownArrowBar": return '\u2913'; 5115 case "Rarrtl": return '\u2916'; 5116 case "latail": return '\u2919'; 5117 case "ratail": return '\u291A'; 5118 case "lAtail": return '\u291B'; 5119 case "rAtail": return '\u291C'; 5120 case "larrfs": return '\u291D'; 5121 case "rarrfs": return '\u291E'; 5122 case "larrbfs": return '\u291F'; 5123 case "rarrbfs": return '\u2920'; 5124 case "nwarhk": return '\u2923'; 5125 case "nearhk": return '\u2924'; 5126 case "searhk": case "hksearow": return '\u2925'; 5127 case "swarhk": case "hkswarow": return '\u2926'; 5128 case "nwnear": return '\u2927'; 5129 case "nesear": case "toea": return '\u2928'; 5130 case "seswar": case "tosa": return '\u2929'; 5131 case "swnwar": return '\u292A'; 5132 case "rarrc": return '\u2933'; 5133 case "cudarrr": return '\u2935'; 5134 case "ldca": return '\u2936'; 5135 case "rdca": return '\u2937'; 5136 case "cudarrl": return '\u2938'; 5137 case "larrpl": return '\u2939'; 5138 case "curarrm": return '\u293C'; 5139 case "cularrp": return '\u293D'; 5140 case "rarrpl": return '\u2945'; 5141 case "harrcir": return '\u2948'; 5142 case "Uarrocir": return '\u2949'; 5143 case "lurdshar": return '\u294A'; 5144 case "ldrushar": return '\u294B'; 5145 case "LeftRightVector": return '\u294E'; 5146 case "RightUpDownVector": return '\u294F'; 5147 case "DownLeftRightVector": return '\u2950'; 5148 case "LeftUpDownVector": return '\u2951'; 5149 case "LeftVectorBar": return '\u2952'; 5150 case "RightVectorBar": return '\u2953'; 5151 case "RightUpVectorBar": return '\u2954'; 5152 case "RightDownVectorBar": return '\u2955'; 5153 case "DownLeftVectorBar": return '\u2956'; 5154 case "DownRightVectorBar": return '\u2957'; 5155 case "LeftUpVectorBar": return '\u2958'; 5156 case "LeftDownVectorBar": return '\u2959'; 5157 case "LeftTeeVector": return '\u295A'; 5158 case "RightTeeVector": return '\u295B'; 5159 case "RightUpTeeVector": return '\u295C'; 5160 case "RightDownTeeVector": return '\u295D'; 5161 case "DownLeftTeeVector": return '\u295E'; 5162 case "DownRightTeeVector": return '\u295F'; 5163 case "LeftUpTeeVector": return '\u2960'; 5164 case "LeftDownTeeVector": return '\u2961'; 5165 case "lHar": return '\u2962'; 5166 case "uHar": return '\u2963'; 5167 case "rHar": return '\u2964'; 5168 case "dHar": return '\u2965'; 5169 case "luruhar": return '\u2966'; 5170 case "ldrdhar": return '\u2967'; 5171 case "ruluhar": return '\u2968'; 5172 case "rdldhar": return '\u2969'; 5173 case "lharul": return '\u296A'; 5174 case "llhard": return '\u296B'; 5175 case "rharul": return '\u296C'; 5176 case "lrhard": return '\u296D'; 5177 case "udhar": case "UpEquilibrium": return '\u296E'; 5178 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5179 case "RoundImplies": return '\u2970'; 5180 case "erarr": return '\u2971'; 5181 case "simrarr": return '\u2972'; 5182 case "larrsim": return '\u2973'; 5183 case "rarrsim": return '\u2974'; 5184 case "rarrap": return '\u2975'; 5185 case "ltlarr": return '\u2976'; 5186 case "gtrarr": return '\u2978'; 5187 case "subrarr": return '\u2979'; 5188 case "suplarr": return '\u297B'; 5189 case "lfisht": return '\u297C'; 5190 case "rfisht": return '\u297D'; 5191 case "ufisht": return '\u297E'; 5192 case "dfisht": return '\u297F'; 5193 case "lopar": return '\u2985'; 5194 case "ropar": return '\u2986'; 5195 case "lbrke": return '\u298B'; 5196 case "rbrke": return '\u298C'; 5197 case "lbrkslu": return '\u298D'; 5198 case "rbrksld": return '\u298E'; 5199 case "lbrksld": return '\u298F'; 5200 case "rbrkslu": return '\u2990'; 5201 case "langd": return '\u2991'; 5202 case "rangd": return '\u2992'; 5203 case "lparlt": return '\u2993'; 5204 case "rpargt": return '\u2994'; 5205 case "gtlPar": return '\u2995'; 5206 case "ltrPar": return '\u2996'; 5207 case "vzigzag": return '\u299A'; 5208 case "vangrt": return '\u299C'; 5209 case "angrtvbd": return '\u299D'; 5210 case "ange": return '\u29A4'; 5211 case "range": return '\u29A5'; 5212 case "dwangle": return '\u29A6'; 5213 case "uwangle": return '\u29A7'; 5214 case "angmsdaa": return '\u29A8'; 5215 case "angmsdab": return '\u29A9'; 5216 case "angmsdac": return '\u29AA'; 5217 case "angmsdad": return '\u29AB'; 5218 case "angmsdae": return '\u29AC'; 5219 case "angmsdaf": return '\u29AD'; 5220 case "angmsdag": return '\u29AE'; 5221 case "angmsdah": return '\u29AF'; 5222 case "bemptyv": return '\u29B0'; 5223 case "demptyv": return '\u29B1'; 5224 case "cemptyv": return '\u29B2'; 5225 case "raemptyv": return '\u29B3'; 5226 case "laemptyv": return '\u29B4'; 5227 case "ohbar": return '\u29B5'; 5228 case "omid": return '\u29B6'; 5229 case "opar": return '\u29B7'; 5230 case "operp": return '\u29B9'; 5231 case "olcross": return '\u29BB'; 5232 case "odsold": return '\u29BC'; 5233 case "olcir": return '\u29BE'; 5234 case "ofcir": return '\u29BF'; 5235 case "olt": return '\u29C0'; 5236 case "ogt": return '\u29C1'; 5237 case "cirscir": return '\u29C2'; 5238 case "cirE": return '\u29C3'; 5239 case "solb": return '\u29C4'; 5240 case "bsolb": return '\u29C5'; 5241 case "boxbox": return '\u29C9'; 5242 case "trisb": return '\u29CD'; 5243 case "rtriltri": return '\u29CE'; 5244 case "LeftTriangleBar": return '\u29CF'; 5245 case "RightTriangleBar": return '\u29D0'; 5246 case "iinfin": return '\u29DC'; 5247 case "infintie": return '\u29DD'; 5248 case "nvinfin": return '\u29DE'; 5249 case "eparsl": return '\u29E3'; 5250 case "smeparsl": return '\u29E4'; 5251 case "eqvparsl": return '\u29E5'; 5252 case "lozf": case "blacklozenge": return '\u29EB'; 5253 case "RuleDelayed": return '\u29F4'; 5254 case "dsol": return '\u29F6'; 5255 case "xodot": case "bigodot": return '\u2A00'; 5256 case "xoplus": case "bigoplus": return '\u2A01'; 5257 case "xotime": case "bigotimes": return '\u2A02'; 5258 case "xuplus": case "biguplus": return '\u2A04'; 5259 case "xsqcup": case "bigsqcup": return '\u2A06'; 5260 case "qint": case "iiiint": return '\u2A0C'; 5261 case "fpartint": return '\u2A0D'; 5262 case "cirfnint": return '\u2A10'; 5263 case "awint": return '\u2A11'; 5264 case "rppolint": return '\u2A12'; 5265 case "scpolint": return '\u2A13'; 5266 case "npolint": return '\u2A14'; 5267 case "pointint": return '\u2A15'; 5268 case "quatint": return '\u2A16'; 5269 case "intlarhk": return '\u2A17'; 5270 case "pluscir": return '\u2A22'; 5271 case "plusacir": return '\u2A23'; 5272 case "simplus": return '\u2A24'; 5273 case "plusdu": return '\u2A25'; 5274 case "plussim": return '\u2A26'; 5275 case "plustwo": return '\u2A27'; 5276 case "mcomma": return '\u2A29'; 5277 case "minusdu": return '\u2A2A'; 5278 case "loplus": return '\u2A2D'; 5279 case "roplus": return '\u2A2E'; 5280 case "Cross": return '\u2A2F'; 5281 case "timesd": return '\u2A30'; 5282 case "timesbar": return '\u2A31'; 5283 case "smashp": return '\u2A33'; 5284 case "lotimes": return '\u2A34'; 5285 case "rotimes": return '\u2A35'; 5286 case "otimesas": return '\u2A36'; 5287 case "Otimes": return '\u2A37'; 5288 case "odiv": return '\u2A38'; 5289 case "triplus": return '\u2A39'; 5290 case "triminus": return '\u2A3A'; 5291 case "tritime": return '\u2A3B'; 5292 case "iprod": case "intprod": return '\u2A3C'; 5293 case "amalg": return '\u2A3F'; 5294 case "capdot": return '\u2A40'; 5295 case "ncup": return '\u2A42'; 5296 case "ncap": return '\u2A43'; 5297 case "capand": return '\u2A44'; 5298 case "cupor": return '\u2A45'; 5299 case "cupcap": return '\u2A46'; 5300 case "capcup": return '\u2A47'; 5301 case "cupbrcap": return '\u2A48'; 5302 case "capbrcup": return '\u2A49'; 5303 case "cupcup": return '\u2A4A'; 5304 case "capcap": return '\u2A4B'; 5305 case "ccups": return '\u2A4C'; 5306 case "ccaps": return '\u2A4D'; 5307 case "ccupssm": return '\u2A50'; 5308 case "And": return '\u2A53'; 5309 case "Or": return '\u2A54'; 5310 case "andand": return '\u2A55'; 5311 case "oror": return '\u2A56'; 5312 case "orslope": return '\u2A57'; 5313 case "andslope": return '\u2A58'; 5314 case "andv": return '\u2A5A'; 5315 case "orv": return '\u2A5B'; 5316 case "andd": return '\u2A5C'; 5317 case "ord": return '\u2A5D'; 5318 case "wedbar": return '\u2A5F'; 5319 case "sdote": return '\u2A66'; 5320 case "simdot": return '\u2A6A'; 5321 case "congdot": return '\u2A6D'; 5322 case "easter": return '\u2A6E'; 5323 case "apacir": return '\u2A6F'; 5324 case "apE": return '\u2A70'; 5325 case "eplus": return '\u2A71'; 5326 case "pluse": return '\u2A72'; 5327 case "Esim": return '\u2A73'; 5328 case "Colone": return '\u2A74'; 5329 case "Equal": return '\u2A75'; 5330 case "eDDot": case "ddotseq": return '\u2A77'; 5331 case "equivDD": return '\u2A78'; 5332 case "ltcir": return '\u2A79'; 5333 case "gtcir": return '\u2A7A'; 5334 case "ltquest": return '\u2A7B'; 5335 case "gtquest": return '\u2A7C'; 5336 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5337 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5338 case "lesdot": return '\u2A7F'; 5339 case "gesdot": return '\u2A80'; 5340 case "lesdoto": return '\u2A81'; 5341 case "gesdoto": return '\u2A82'; 5342 case "lesdotor": return '\u2A83'; 5343 case "gesdotol": return '\u2A84'; 5344 case "lap": case "lessapprox": return '\u2A85'; 5345 case "gap": case "gtrapprox": return '\u2A86'; 5346 case "lne": case "lneq": return '\u2A87'; 5347 case "gne": case "gneq": return '\u2A88'; 5348 case "lnap": case "lnapprox": return '\u2A89'; 5349 case "gnap": case "gnapprox": return '\u2A8A'; 5350 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5351 case "gEl": case "gtreqqless": return '\u2A8C'; 5352 case "lsime": return '\u2A8D'; 5353 case "gsime": return '\u2A8E'; 5354 case "lsimg": return '\u2A8F'; 5355 case "gsiml": return '\u2A90'; 5356 case "lgE": return '\u2A91'; 5357 case "glE": return '\u2A92'; 5358 case "lesges": return '\u2A93'; 5359 case "gesles": return '\u2A94'; 5360 case "els": case "eqslantless": return '\u2A95'; 5361 case "egs": case "eqslantgtr": return '\u2A96'; 5362 case "elsdot": return '\u2A97'; 5363 case "egsdot": return '\u2A98'; 5364 case "el": return '\u2A99'; 5365 case "eg": return '\u2A9A'; 5366 case "siml": return '\u2A9D'; 5367 case "simg": return '\u2A9E'; 5368 case "simlE": return '\u2A9F'; 5369 case "simgE": return '\u2AA0'; 5370 case "LessLess": return '\u2AA1'; 5371 case "GreaterGreater": return '\u2AA2'; 5372 case "glj": return '\u2AA4'; 5373 case "gla": return '\u2AA5'; 5374 case "ltcc": return '\u2AA6'; 5375 case "gtcc": return '\u2AA7'; 5376 case "lescc": return '\u2AA8'; 5377 case "gescc": return '\u2AA9'; 5378 case "smt": return '\u2AAA'; 5379 case "lat": return '\u2AAB'; 5380 case "smte": return '\u2AAC'; 5381 case "late": return '\u2AAD'; 5382 case "bumpE": return '\u2AAE'; 5383 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5384 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5385 case "prE": return '\u2AB3'; 5386 case "scE": return '\u2AB4'; 5387 case "prnE": case "precneqq": return '\u2AB5'; 5388 case "scnE": case "succneqq": return '\u2AB6'; 5389 case "prap": case "precapprox": return '\u2AB7'; 5390 case "scap": case "succapprox": return '\u2AB8'; 5391 case "prnap": case "precnapprox": return '\u2AB9'; 5392 case "scnap": case "succnapprox": return '\u2ABA'; 5393 case "Pr": return '\u2ABB'; 5394 case "Sc": return '\u2ABC'; 5395 case "subdot": return '\u2ABD'; 5396 case "supdot": return '\u2ABE'; 5397 case "subplus": return '\u2ABF'; 5398 case "supplus": return '\u2AC0'; 5399 case "submult": return '\u2AC1'; 5400 case "supmult": return '\u2AC2'; 5401 case "subedot": return '\u2AC3'; 5402 case "supedot": return '\u2AC4'; 5403 case "subE": case "subseteqq": return '\u2AC5'; 5404 case "supE": case "supseteqq": return '\u2AC6'; 5405 case "subsim": return '\u2AC7'; 5406 case "supsim": return '\u2AC8'; 5407 case "subnE": case "subsetneqq": return '\u2ACB'; 5408 case "supnE": case "supsetneqq": return '\u2ACC'; 5409 case "csub": return '\u2ACF'; 5410 case "csup": return '\u2AD0'; 5411 case "csube": return '\u2AD1'; 5412 case "csupe": return '\u2AD2'; 5413 case "subsup": return '\u2AD3'; 5414 case "supsub": return '\u2AD4'; 5415 case "subsub": return '\u2AD5'; 5416 case "supsup": return '\u2AD6'; 5417 case "suphsub": return '\u2AD7'; 5418 case "supdsub": return '\u2AD8'; 5419 case "forkv": return '\u2AD9'; 5420 case "topfork": return '\u2ADA'; 5421 case "mlcp": return '\u2ADB'; 5422 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5423 case "Vdashl": return '\u2AE6'; 5424 case "Barv": return '\u2AE7'; 5425 case "vBar": return '\u2AE8'; 5426 case "vBarv": return '\u2AE9'; 5427 case "Vbar": return '\u2AEB'; 5428 case "Not": return '\u2AEC'; 5429 case "bNot": return '\u2AED'; 5430 case "rnmid": return '\u2AEE'; 5431 case "cirmid": return '\u2AEF'; 5432 case "midcir": return '\u2AF0'; 5433 case "topcir": return '\u2AF1'; 5434 case "nhpar": return '\u2AF2'; 5435 case "parsim": return '\u2AF3'; 5436 case "parsl": return '\u2AFD'; 5437 case "fflig": return '\uFB00'; 5438 case "filig": return '\uFB01'; 5439 case "fllig": return '\uFB02'; 5440 case "ffilig": return '\uFB03'; 5441 case "ffllig": return '\uFB04'; 5442 case "Ascr": return '\U0001D49C'; 5443 case "Cscr": return '\U0001D49E'; 5444 case "Dscr": return '\U0001D49F'; 5445 case "Gscr": return '\U0001D4A2'; 5446 case "Jscr": return '\U0001D4A5'; 5447 case "Kscr": return '\U0001D4A6'; 5448 case "Nscr": return '\U0001D4A9'; 5449 case "Oscr": return '\U0001D4AA'; 5450 case "Pscr": return '\U0001D4AB'; 5451 case "Qscr": return '\U0001D4AC'; 5452 case "Sscr": return '\U0001D4AE'; 5453 case "Tscr": return '\U0001D4AF'; 5454 case "Uscr": return '\U0001D4B0'; 5455 case "Vscr": return '\U0001D4B1'; 5456 case "Wscr": return '\U0001D4B2'; 5457 case "Xscr": return '\U0001D4B3'; 5458 case "Yscr": return '\U0001D4B4'; 5459 case "Zscr": return '\U0001D4B5'; 5460 case "ascr": return '\U0001D4B6'; 5461 case "bscr": return '\U0001D4B7'; 5462 case "cscr": return '\U0001D4B8'; 5463 case "dscr": return '\U0001D4B9'; 5464 case "fscr": return '\U0001D4BB'; 5465 case "hscr": return '\U0001D4BD'; 5466 case "iscr": return '\U0001D4BE'; 5467 case "jscr": return '\U0001D4BF'; 5468 case "kscr": return '\U0001D4C0'; 5469 case "lscr": return '\U0001D4C1'; 5470 case "mscr": return '\U0001D4C2'; 5471 case "nscr": return '\U0001D4C3'; 5472 case "pscr": return '\U0001D4C5'; 5473 case "qscr": return '\U0001D4C6'; 5474 case "rscr": return '\U0001D4C7'; 5475 case "sscr": return '\U0001D4C8'; 5476 case "tscr": return '\U0001D4C9'; 5477 case "uscr": return '\U0001D4CA'; 5478 case "vscr": return '\U0001D4CB'; 5479 case "wscr": return '\U0001D4CC'; 5480 case "xscr": return '\U0001D4CD'; 5481 case "yscr": return '\U0001D4CE'; 5482 case "zscr": return '\U0001D4CF'; 5483 case "Afr": return '\U0001D504'; 5484 case "Bfr": return '\U0001D505'; 5485 case "Dfr": return '\U0001D507'; 5486 case "Efr": return '\U0001D508'; 5487 case "Ffr": return '\U0001D509'; 5488 case "Gfr": return '\U0001D50A'; 5489 case "Jfr": return '\U0001D50D'; 5490 case "Kfr": return '\U0001D50E'; 5491 case "Lfr": return '\U0001D50F'; 5492 case "Mfr": return '\U0001D510'; 5493 case "Nfr": return '\U0001D511'; 5494 case "Ofr": return '\U0001D512'; 5495 case "Pfr": return '\U0001D513'; 5496 case "Qfr": return '\U0001D514'; 5497 case "Sfr": return '\U0001D516'; 5498 case "Tfr": return '\U0001D517'; 5499 case "Ufr": return '\U0001D518'; 5500 case "Vfr": return '\U0001D519'; 5501 case "Wfr": return '\U0001D51A'; 5502 case "Xfr": return '\U0001D51B'; 5503 case "Yfr": return '\U0001D51C'; 5504 case "afr": return '\U0001D51E'; 5505 case "bfr": return '\U0001D51F'; 5506 case "cfr": return '\U0001D520'; 5507 case "dfr": return '\U0001D521'; 5508 case "efr": return '\U0001D522'; 5509 case "ffr": return '\U0001D523'; 5510 case "gfr": return '\U0001D524'; 5511 case "hfr": return '\U0001D525'; 5512 case "ifr": return '\U0001D526'; 5513 case "jfr": return '\U0001D527'; 5514 case "kfr": return '\U0001D528'; 5515 case "lfr": return '\U0001D529'; 5516 case "mfr": return '\U0001D52A'; 5517 case "nfr": return '\U0001D52B'; 5518 case "ofr": return '\U0001D52C'; 5519 case "pfr": return '\U0001D52D'; 5520 case "qfr": return '\U0001D52E'; 5521 case "rfr": return '\U0001D52F'; 5522 case "sfr": return '\U0001D530'; 5523 case "tfr": return '\U0001D531'; 5524 case "ufr": return '\U0001D532'; 5525 case "vfr": return '\U0001D533'; 5526 case "wfr": return '\U0001D534'; 5527 case "xfr": return '\U0001D535'; 5528 case "yfr": return '\U0001D536'; 5529 case "zfr": return '\U0001D537'; 5530 case "Aopf": return '\U0001D538'; 5531 case "Bopf": return '\U0001D539'; 5532 case "Dopf": return '\U0001D53B'; 5533 case "Eopf": return '\U0001D53C'; 5534 case "Fopf": return '\U0001D53D'; 5535 case "Gopf": return '\U0001D53E'; 5536 case "Iopf": return '\U0001D540'; 5537 case "Jopf": return '\U0001D541'; 5538 case "Kopf": return '\U0001D542'; 5539 case "Lopf": return '\U0001D543'; 5540 case "Mopf": return '\U0001D544'; 5541 case "Oopf": return '\U0001D546'; 5542 case "Sopf": return '\U0001D54A'; 5543 case "Topf": return '\U0001D54B'; 5544 case "Uopf": return '\U0001D54C'; 5545 case "Vopf": return '\U0001D54D'; 5546 case "Wopf": return '\U0001D54E'; 5547 case "Xopf": return '\U0001D54F'; 5548 case "Yopf": return '\U0001D550'; 5549 case "aopf": return '\U0001D552'; 5550 case "bopf": return '\U0001D553'; 5551 case "copf": return '\U0001D554'; 5552 case "dopf": return '\U0001D555'; 5553 case "eopf": return '\U0001D556'; 5554 case "fopf": return '\U0001D557'; 5555 case "gopf": return '\U0001D558'; 5556 case "hopf": return '\U0001D559'; 5557 case "iopf": return '\U0001D55A'; 5558 case "jopf": return '\U0001D55B'; 5559 case "kopf": return '\U0001D55C'; 5560 case "lopf": return '\U0001D55D'; 5561 case "mopf": return '\U0001D55E'; 5562 case "nopf": return '\U0001D55F'; 5563 case "oopf": return '\U0001D560'; 5564 case "popf": return '\U0001D561'; 5565 case "qopf": return '\U0001D562'; 5566 case "ropf": return '\U0001D563'; 5567 case "sopf": return '\U0001D564'; 5568 case "topf": return '\U0001D565'; 5569 case "uopf": return '\U0001D566'; 5570 case "vopf": return '\U0001D567'; 5571 case "wopf": return '\U0001D568'; 5572 case "xopf": return '\U0001D569'; 5573 case "yopf": return '\U0001D56A'; 5574 case "zopf": return '\U0001D56B'; 5575 5576 // and handling numeric entities 5577 default: 5578 if(entity[1] == '#') { 5579 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5580 auto hex = entity[3..$-1]; 5581 5582 auto p = intFromHex(to!string(hex).toLower()); 5583 return cast(dchar) p; 5584 } else { 5585 auto decimal = entity[2..$-1]; 5586 5587 // dealing with broken html entities 5588 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5589 decimal = decimal[1 .. $]; 5590 5591 if(decimal.length == 0) 5592 return ' '; // this is really broken html 5593 // done with dealing with broken stuff 5594 5595 auto p = std.conv.to!int(decimal); 5596 return cast(dchar) p; 5597 } 5598 } else 5599 return '\ufffd'; // replacement character diamond thing 5600 } 5601 5602 assert(0); 5603 } 5604 5605 import std.utf; 5606 import std.stdio; 5607 5608 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5609 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5610 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5611 /// Group: core_functionality 5612 string htmlEntitiesDecode(string data, bool strict = false) { 5613 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5614 if(data.indexOf("&") == -1) // all html entities begin with & 5615 return data; // if there are no entities in here, we can return the original slice and save some time 5616 5617 char[] a; // this seems to do a *better* job than appender! 5618 5619 char[4] buffer; 5620 5621 bool tryingEntity = false; 5622 dchar[16] entityBeingTried; 5623 int entityBeingTriedLength = 0; 5624 int entityAttemptIndex = 0; 5625 5626 foreach(dchar ch; data) { 5627 if(tryingEntity) { 5628 entityAttemptIndex++; 5629 entityBeingTried[entityBeingTriedLength++] = ch; 5630 5631 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5632 if(ch == '&') { 5633 if(strict) 5634 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5635 5636 // if not strict, let's try to parse both. 5637 5638 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5639 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5640 else 5641 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5642 5643 // tryingEntity is still true 5644 entityBeingTriedLength = 1; 5645 entityAttemptIndex = 0; // restarting o this 5646 } else 5647 if(ch == ';') { 5648 tryingEntity = false; 5649 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5650 } else if(ch == ' ') { 5651 // e.g. you & i 5652 if(strict) 5653 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5654 else { 5655 tryingEntity = false; 5656 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5657 } 5658 } else { 5659 if(entityAttemptIndex >= 9) { 5660 if(strict) 5661 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5662 else { 5663 tryingEntity = false; 5664 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5665 } 5666 } 5667 } 5668 } else { 5669 if(ch == '&') { 5670 tryingEntity = true; 5671 entityBeingTriedLength = 0; 5672 entityBeingTried[entityBeingTriedLength++] = ch; 5673 entityAttemptIndex = 0; 5674 } else { 5675 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5676 } 5677 } 5678 } 5679 5680 if(tryingEntity) { 5681 if(strict) 5682 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5683 5684 // otherwise, let's try to recover, at least so we don't drop any data 5685 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5686 // FIXME: what if we have "cool &"? should we try to parse it? 5687 } 5688 5689 return cast(string) a; // assumeUnique is actually kinda slow, lol 5690 } 5691 5692 /// Group: implementations 5693 abstract class SpecialElement : Element { 5694 this(Document _parentDocument) { 5695 super(_parentDocument); 5696 } 5697 5698 ///. 5699 override Element appendChild(Element e) { 5700 assert(0, "Cannot append to a special node"); 5701 } 5702 5703 ///. 5704 @property override int nodeType() const { 5705 return 100; 5706 } 5707 } 5708 5709 ///. 5710 /// Group: implementations 5711 class RawSource : SpecialElement { 5712 ///. 5713 this(Document _parentDocument, string s) { 5714 super(_parentDocument); 5715 source = s; 5716 tagName = "#raw"; 5717 } 5718 5719 ///. 5720 override string nodeValue() const { 5721 return this.toString(); 5722 } 5723 5724 ///. 5725 override string writeToAppender(Appender!string where = appender!string()) const { 5726 where.put(source); 5727 return source; 5728 } 5729 5730 override string toPrettyString(bool, int, string) const { 5731 return source; 5732 } 5733 5734 5735 override RawSource cloneNode(bool deep) { 5736 return new RawSource(parentDocument, source); 5737 } 5738 5739 ///. 5740 string source; 5741 } 5742 5743 /// Group: implementations 5744 abstract class ServerSideCode : SpecialElement { 5745 this(Document _parentDocument, string type) { 5746 super(_parentDocument); 5747 tagName = "#" ~ type; 5748 } 5749 5750 ///. 5751 override string nodeValue() const { 5752 return this.source; 5753 } 5754 5755 ///. 5756 override string writeToAppender(Appender!string where = appender!string()) const { 5757 auto start = where.data.length; 5758 where.put("<"); 5759 where.put(source); 5760 where.put(">"); 5761 return where.data[start .. $]; 5762 } 5763 5764 override string toPrettyString(bool, int, string) const { 5765 return "<" ~ source ~ ">"; 5766 } 5767 5768 ///. 5769 string source; 5770 } 5771 5772 ///. 5773 /// Group: implementations 5774 class PhpCode : ServerSideCode { 5775 ///. 5776 this(Document _parentDocument, string s) { 5777 super(_parentDocument, "php"); 5778 source = s; 5779 } 5780 5781 override PhpCode cloneNode(bool deep) { 5782 return new PhpCode(parentDocument, source); 5783 } 5784 } 5785 5786 ///. 5787 /// Group: implementations 5788 class AspCode : ServerSideCode { 5789 ///. 5790 this(Document _parentDocument, string s) { 5791 super(_parentDocument, "asp"); 5792 source = s; 5793 } 5794 5795 override AspCode cloneNode(bool deep) { 5796 return new AspCode(parentDocument, source); 5797 } 5798 } 5799 5800 ///. 5801 /// Group: implementations 5802 class BangInstruction : SpecialElement { 5803 ///. 5804 this(Document _parentDocument, string s) { 5805 super(_parentDocument); 5806 source = s; 5807 tagName = "#bpi"; 5808 } 5809 5810 ///. 5811 override string nodeValue() const { 5812 return this.source; 5813 } 5814 5815 override BangInstruction cloneNode(bool deep) { 5816 return new BangInstruction(parentDocument, source); 5817 } 5818 5819 ///. 5820 override string writeToAppender(Appender!string where = appender!string()) const { 5821 auto start = where.data.length; 5822 where.put("<!"); 5823 where.put(source); 5824 where.put(">"); 5825 return where.data[start .. $]; 5826 } 5827 5828 override string toPrettyString(bool, int, string) const { 5829 string s; 5830 s ~= "<!"; 5831 s ~= source; 5832 s ~= ">"; 5833 return s; 5834 } 5835 5836 ///. 5837 string source; 5838 } 5839 5840 ///. 5841 /// Group: implementations 5842 class QuestionInstruction : SpecialElement { 5843 ///. 5844 this(Document _parentDocument, string s) { 5845 super(_parentDocument); 5846 source = s; 5847 tagName = "#qpi"; 5848 } 5849 5850 override QuestionInstruction cloneNode(bool deep) { 5851 return new QuestionInstruction(parentDocument, source); 5852 } 5853 5854 ///. 5855 override string nodeValue() const { 5856 return this.source; 5857 } 5858 5859 ///. 5860 override string writeToAppender(Appender!string where = appender!string()) const { 5861 auto start = where.data.length; 5862 where.put("<"); 5863 where.put(source); 5864 where.put(">"); 5865 return where.data[start .. $]; 5866 } 5867 5868 override string toPrettyString(bool, int, string) const { 5869 string s; 5870 s ~= "<"; 5871 s ~= source; 5872 s ~= ">"; 5873 return s; 5874 } 5875 5876 5877 ///. 5878 string source; 5879 } 5880 5881 ///. 5882 /// Group: implementations 5883 class HtmlComment : SpecialElement { 5884 ///. 5885 this(Document _parentDocument, string s) { 5886 super(_parentDocument); 5887 source = s; 5888 tagName = "#comment"; 5889 } 5890 5891 override HtmlComment cloneNode(bool deep) { 5892 return new HtmlComment(parentDocument, source); 5893 } 5894 5895 ///. 5896 override string nodeValue() const { 5897 return this.source; 5898 } 5899 5900 ///. 5901 override string writeToAppender(Appender!string where = appender!string()) const { 5902 auto start = where.data.length; 5903 where.put("<!--"); 5904 where.put(source); 5905 where.put("-->"); 5906 return where.data[start .. $]; 5907 } 5908 5909 override string toPrettyString(bool, int, string) const { 5910 string s; 5911 s ~= "<!--"; 5912 s ~= source; 5913 s ~= "-->"; 5914 return s; 5915 } 5916 5917 5918 ///. 5919 string source; 5920 } 5921 5922 5923 5924 5925 ///. 5926 /// Group: implementations 5927 class TextNode : Element { 5928 public: 5929 ///. 5930 this(Document _parentDocument, string e) { 5931 super(_parentDocument); 5932 contents = e; 5933 tagName = "#text"; 5934 } 5935 5936 /// 5937 this(string e) { 5938 this(null, e); 5939 } 5940 5941 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5942 5943 ///. 5944 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5945 auto e = new TextNode(_parentDocument, ""); 5946 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5947 return e; 5948 } 5949 5950 ///. 5951 override @property TextNode cloneNode(bool deep) { 5952 auto n = new TextNode(parentDocument, contents); 5953 return n; 5954 } 5955 5956 ///. 5957 override string nodeValue() const { 5958 return this.contents; //toString(); 5959 } 5960 5961 ///. 5962 @property override int nodeType() const { 5963 return NodeType.Text; 5964 } 5965 5966 ///. 5967 override string writeToAppender(Appender!string where = appender!string()) const { 5968 string s; 5969 if(contents.length) 5970 s = htmlEntitiesEncode(contents, where); 5971 else 5972 s = ""; 5973 5974 assert(s !is null); 5975 return s; 5976 } 5977 5978 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5979 string s; 5980 5981 string contents = this.contents; 5982 // we will first collapse the whitespace per html 5983 // sort of. note this can break stuff yo!!!! 5984 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5985 string n = ""; 5986 bool lastWasWhitespace = indentationLevel > 0; 5987 foreach(char c; contents) { 5988 if(c.isSimpleWhite) { 5989 if(!lastWasWhitespace) 5990 n ~= ' '; 5991 lastWasWhitespace = true; 5992 } else { 5993 n ~= c; 5994 lastWasWhitespace = false; 5995 } 5996 } 5997 5998 contents = n; 5999 } 6000 6001 if(this.parentNode !is null && this.parentNode.tagName != "p") { 6002 contents = contents.strip; 6003 } 6004 6005 auto e = htmlEntitiesEncode(contents); 6006 import std.algorithm.iteration : splitter; 6007 bool first = true; 6008 foreach(line; splitter(e, "\n")) { 6009 if(first) { 6010 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 6011 first = false; 6012 } else { 6013 s ~= "\n"; 6014 if(insertComments) 6015 s ~= "<!--"; 6016 foreach(i; 0 .. indentationLevel) 6017 s ~= "\t"; 6018 if(insertComments) 6019 s ~= "-->"; 6020 } 6021 s ~= line.stripRight; 6022 } 6023 return s; 6024 } 6025 6026 ///. 6027 override Element appendChild(Element e) { 6028 assert(0, "Cannot append to a text node"); 6029 } 6030 6031 ///. 6032 string contents; 6033 // alias contents content; // I just mistype this a lot, 6034 } 6035 6036 /** 6037 There are subclasses of Element offering improved helper 6038 functions for the element in HTML. 6039 */ 6040 6041 ///. 6042 /// Group: implementations 6043 class Link : Element { 6044 6045 ///. 6046 this(Document _parentDocument) { 6047 super(_parentDocument); 6048 this.tagName = "a"; 6049 } 6050 6051 6052 ///. 6053 this(string href, string text) { 6054 super("a"); 6055 setAttribute("href", href); 6056 innerText = text; 6057 } 6058 /+ 6059 /// Returns everything in the href EXCEPT the query string 6060 @property string targetSansQuery() { 6061 6062 } 6063 6064 ///. 6065 @property string domainName() { 6066 6067 } 6068 6069 ///. 6070 @property string path 6071 +/ 6072 /// This gets a variable from the URL's query string. 6073 string getValue(string name) { 6074 auto vars = variablesHash(); 6075 if(name in vars) 6076 return vars[name]; 6077 return null; 6078 } 6079 6080 private string[string] variablesHash() { 6081 string href = getAttribute("href"); 6082 if(href is null) 6083 return null; 6084 6085 auto ques = href.indexOf("?"); 6086 string str = ""; 6087 if(ques != -1) { 6088 str = href[ques+1..$]; 6089 6090 auto fragment = str.indexOf("#"); 6091 if(fragment != -1) 6092 str = str[0..fragment]; 6093 } 6094 6095 string[] variables = str.split("&"); 6096 6097 string[string] hash; 6098 6099 foreach(var; variables) { 6100 auto index = var.indexOf("="); 6101 if(index == -1) 6102 hash[var] = ""; 6103 else { 6104 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6105 } 6106 } 6107 6108 return hash; 6109 } 6110 6111 ///. 6112 /*private*/ void updateQueryString(string[string] vars) { 6113 string href = getAttribute("href"); 6114 6115 auto question = href.indexOf("?"); 6116 if(question != -1) 6117 href = href[0..question]; 6118 6119 string frag = ""; 6120 auto fragment = href.indexOf("#"); 6121 if(fragment != -1) { 6122 frag = href[fragment..$]; 6123 href = href[0..fragment]; 6124 } 6125 6126 string query = "?"; 6127 bool first = true; 6128 foreach(name, value; vars) { 6129 if(!first) 6130 query ~= "&"; 6131 else 6132 first = false; 6133 6134 query ~= encodeComponent(name); 6135 if(value.length) 6136 query ~= "=" ~ encodeComponent(value); 6137 } 6138 6139 if(query != "?") 6140 href ~= query; 6141 6142 href ~= frag; 6143 6144 setAttribute("href", href); 6145 } 6146 6147 /// Sets or adds the variable with the given name to the given value 6148 /// It automatically URI encodes the values and takes care of the ? and &. 6149 override void setValue(string name, string variable) { 6150 auto vars = variablesHash(); 6151 vars[name] = variable; 6152 6153 updateQueryString(vars); 6154 } 6155 6156 /// Removes the given variable from the query string 6157 void removeValue(string name) { 6158 auto vars = variablesHash(); 6159 vars.remove(name); 6160 6161 updateQueryString(vars); 6162 } 6163 6164 /* 6165 ///. 6166 override string toString() { 6167 6168 } 6169 6170 ///. 6171 override string getAttribute(string name) { 6172 if(name == "href") { 6173 6174 } else 6175 return super.getAttribute(name); 6176 } 6177 */ 6178 } 6179 6180 ///. 6181 /// Group: implementations 6182 class Form : Element { 6183 6184 ///. 6185 this(Document _parentDocument) { 6186 super(_parentDocument); 6187 tagName = "form"; 6188 } 6189 6190 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6191 auto t = this.querySelector("fieldset div"); 6192 if(t is null) 6193 return super.addField(label, name, type, fieldOptions); 6194 else 6195 return t.addField(label, name, type, fieldOptions); 6196 } 6197 6198 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6199 auto type = "text"; 6200 auto t = this.querySelector("fieldset div"); 6201 if(t is null) 6202 return super.addField(label, name, type, fieldOptions); 6203 else 6204 return t.addField(label, name, type, fieldOptions); 6205 } 6206 6207 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6208 auto t = this.querySelector("fieldset div"); 6209 if(t is null) 6210 return super.addField(label, name, options, fieldOptions); 6211 else 6212 return t.addField(label, name, options, fieldOptions); 6213 } 6214 6215 override void setValue(string field, string value) { 6216 setValue(field, value, true); 6217 } 6218 6219 // FIXME: doesn't handle arrays; multiple fields can have the same name 6220 6221 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6222 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6223 /// the checked/selected attribute from all, and adds it to the one matching the value. 6224 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6225 6226 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6227 /// Otherwise, it makes a new input with type=hidden to keep the value. 6228 void setValue(string field, string value, bool makeNew) { 6229 auto eles = getField(field); 6230 if(eles.length == 0) { 6231 if(makeNew) { 6232 addInput(field, value); 6233 return; 6234 } else 6235 throw new Exception("form field does not exist"); 6236 } 6237 6238 if(eles.length == 1) { 6239 auto e = eles[0]; 6240 switch(e.tagName) { 6241 default: assert(0); 6242 case "textarea": 6243 e.innerText = value; 6244 break; 6245 case "input": 6246 string type = e.getAttribute("type"); 6247 if(type is null) { 6248 e.value = value; 6249 return; 6250 } 6251 switch(type) { 6252 case "checkbox": 6253 case "radio": 6254 if(value.length && value != "false") 6255 e.setAttribute("checked", "checked"); 6256 else 6257 e.removeAttribute("checked"); 6258 break; 6259 default: 6260 e.value = value; 6261 return; 6262 } 6263 break; 6264 case "select": 6265 bool found = false; 6266 foreach(child; e.tree) { 6267 if(child.tagName != "option") 6268 continue; 6269 string val = child.getAttribute("value"); 6270 if(val is null) 6271 val = child.innerText; 6272 if(val == value) { 6273 child.setAttribute("selected", "selected"); 6274 found = true; 6275 } else 6276 child.removeAttribute("selected"); 6277 } 6278 6279 if(!found) { 6280 e.addChild("option", value) 6281 .setAttribute("selected", "selected"); 6282 } 6283 break; 6284 } 6285 } else { 6286 // assume radio boxes 6287 foreach(e; eles) { 6288 string val = e.getAttribute("value"); 6289 //if(val is null) 6290 // throw new Exception("don't know what to do with radio boxes with null value"); 6291 if(val == value) 6292 e.setAttribute("checked", "checked"); 6293 else 6294 e.removeAttribute("checked"); 6295 } 6296 } 6297 } 6298 6299 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6300 /// it makes no attempt to find and modify existing elements in the form to the new values. 6301 void addValueArray(string key, string[] arrayOfValues) { 6302 foreach(arr; arrayOfValues) 6303 addChild("input", key, arr); 6304 } 6305 6306 /// Gets the value of the field; what would be given if it submitted right now. (so 6307 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6308 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6309 string getValue(string field) { 6310 auto eles = getField(field); 6311 if(eles.length == 0) 6312 return ""; 6313 if(eles.length == 1) { 6314 auto e = eles[0]; 6315 switch(e.tagName) { 6316 default: assert(0); 6317 case "input": 6318 if(e.type == "checkbox") { 6319 if(e.checked) 6320 return e.value.length ? e.value : "checked"; 6321 return ""; 6322 } else 6323 return e.value; 6324 case "textarea": 6325 return e.innerText; 6326 case "select": 6327 foreach(child; e.tree) { 6328 if(child.tagName != "option") 6329 continue; 6330 if(child.selected) 6331 return child.value; 6332 } 6333 break; 6334 } 6335 } else { 6336 // assuming radio 6337 foreach(e; eles) { 6338 if(e.checked) 6339 return e.value; 6340 } 6341 } 6342 6343 return ""; 6344 } 6345 6346 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6347 ///. 6348 string getPostableData() { 6349 bool[string] namesDone; 6350 6351 string ret; 6352 bool outputted = false; 6353 6354 foreach(e; getElementsBySelector("[name]")) { 6355 if(e.name in namesDone) 6356 continue; 6357 6358 if(outputted) 6359 ret ~= "&"; 6360 else 6361 outputted = true; 6362 6363 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6364 6365 namesDone[e.name] = true; 6366 } 6367 6368 return ret; 6369 } 6370 6371 /// Gets the actual elements with the given name 6372 Element[] getField(string name) { 6373 Element[] ret; 6374 foreach(e; tree) { 6375 if(e.name == name) 6376 ret ~= e; 6377 } 6378 return ret; 6379 } 6380 6381 /// Grabs the <label> with the given for tag, if there is one. 6382 Element getLabel(string forId) { 6383 foreach(e; tree) 6384 if(e.tagName == "label" && e.getAttribute("for") == forId) 6385 return e; 6386 return null; 6387 } 6388 6389 /// Adds a new INPUT field to the end of the form with the given attributes. 6390 Element addInput(string name, string value, string type = "hidden") { 6391 auto e = new Element(parentDocument, "input", null, true); 6392 e.name = name; 6393 e.value = value; 6394 e.type = type; 6395 6396 appendChild(e); 6397 6398 return e; 6399 } 6400 6401 /// Removes the given field from the form. It finds the element and knocks it right out. 6402 void removeField(string name) { 6403 foreach(e; getField(name)) 6404 e.parentNode.removeChild(e); 6405 } 6406 6407 /+ 6408 /// Returns all form members. 6409 @property Element[] elements() { 6410 6411 } 6412 6413 ///. 6414 string opDispatch(string name)(string v = null) 6415 // filter things that should actually be attributes on the form 6416 if( name != "method" && name != "action" && name != "enctype" 6417 && name != "style" && name != "name" && name != "id" && name != "class") 6418 { 6419 6420 } 6421 +/ 6422 /+ 6423 void submit() { 6424 // take its elements and submit them through http 6425 } 6426 +/ 6427 } 6428 6429 import std.conv; 6430 6431 ///. 6432 /// Group: implementations 6433 class Table : Element { 6434 6435 ///. 6436 this(Document _parentDocument) { 6437 super(_parentDocument); 6438 tagName = "table"; 6439 } 6440 6441 /// Creates an element with the given type and content. 6442 Element th(T)(T t) { 6443 Element e; 6444 if(parentDocument !is null) 6445 e = parentDocument.createElement("th"); 6446 else 6447 e = Element.make("th"); 6448 static if(is(T == Html)) 6449 e.innerHTML = t; 6450 else 6451 e.innerText = to!string(t); 6452 return e; 6453 } 6454 6455 /// ditto 6456 Element td(T)(T t) { 6457 Element e; 6458 if(parentDocument !is null) 6459 e = parentDocument.createElement("td"); 6460 else 6461 e = Element.make("td"); 6462 static if(is(T == Html)) 6463 e.innerHTML = t; 6464 else 6465 e.innerText = to!string(t); 6466 return e; 6467 } 6468 6469 /// . 6470 Element appendHeaderRow(T...)(T t) { 6471 return appendRowInternal("th", "thead", t); 6472 } 6473 6474 /// . 6475 Element appendFooterRow(T...)(T t) { 6476 return appendRowInternal("td", "tfoot", t); 6477 } 6478 6479 /// . 6480 Element appendRow(T...)(T t) { 6481 return appendRowInternal("td", "tbody", t); 6482 } 6483 6484 void addColumnClasses(string[] classes...) { 6485 auto grid = getGrid(); 6486 foreach(row; grid) 6487 foreach(i, cl; classes) { 6488 if(cl.length) 6489 if(i < row.length) 6490 row[i].addClass(cl); 6491 } 6492 } 6493 6494 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6495 Element row = Element.make("tr"); 6496 6497 foreach(e; t) { 6498 static if(is(typeof(e) : Element)) { 6499 if(e.tagName == "td" || e.tagName == "th") 6500 row.appendChild(e); 6501 else { 6502 Element a = Element.make(innerType); 6503 6504 a.appendChild(e); 6505 6506 row.appendChild(a); 6507 } 6508 } else static if(is(typeof(e) == Html)) { 6509 Element a = Element.make(innerType); 6510 a.innerHTML = e.source; 6511 row.appendChild(a); 6512 } else static if(is(typeof(e) == Element[])) { 6513 Element a = Element.make(innerType); 6514 foreach(ele; e) 6515 a.appendChild(ele); 6516 row.appendChild(a); 6517 } else static if(is(typeof(e) == string[])) { 6518 foreach(ele; e) { 6519 Element a = Element.make(innerType); 6520 a.innerText = to!string(ele); 6521 row.appendChild(a); 6522 } 6523 } else { 6524 Element a = Element.make(innerType); 6525 a.innerText = to!string(e); 6526 row.appendChild(a); 6527 } 6528 } 6529 6530 foreach(e; children) { 6531 if(e.tagName == findType) { 6532 e.appendChild(row); 6533 return row; 6534 } 6535 } 6536 6537 // the type was not found if we are here... let's add it so it is well-formed 6538 auto lol = this.addChild(findType); 6539 lol.appendChild(row); 6540 6541 return row; 6542 } 6543 6544 ///. 6545 Element captionElement() { 6546 Element cap; 6547 foreach(c; children) { 6548 if(c.tagName == "caption") { 6549 cap = c; 6550 break; 6551 } 6552 } 6553 6554 if(cap is null) { 6555 cap = Element.make("caption"); 6556 appendChild(cap); 6557 } 6558 6559 return cap; 6560 } 6561 6562 ///. 6563 @property string caption() { 6564 return captionElement().innerText; 6565 } 6566 6567 ///. 6568 @property void caption(string text) { 6569 captionElement().innerText = text; 6570 } 6571 6572 /// Gets the logical layout of the table as a rectangular grid of 6573 /// cells. It considers rowspan and colspan. A cell with a large 6574 /// span is represented in the grid by being referenced several times. 6575 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6576 /// <tfoot> portion if you pass one. 6577 /// 6578 /// Note: the rectangular grid might include null cells. 6579 /// 6580 /// This is kinda expensive so you should call once when you want the grid, 6581 /// then do lookups on the returned array. 6582 TableCell[][] getGrid(Element tablePortition = null) 6583 in { 6584 if(tablePortition is null) 6585 assert(tablePortition is null); 6586 else { 6587 assert(tablePortition !is null); 6588 assert(tablePortition.parentNode is this); 6589 assert( 6590 tablePortition.tagName == "tbody" 6591 || 6592 tablePortition.tagName == "tfoot" 6593 || 6594 tablePortition.tagName == "thead" 6595 ); 6596 } 6597 } 6598 body { 6599 if(tablePortition is null) 6600 tablePortition = this; 6601 6602 TableCell[][] ret; 6603 6604 // FIXME: will also return rows of sub tables! 6605 auto rows = tablePortition.getElementsByTagName("tr"); 6606 ret.length = rows.length; 6607 6608 int maxLength = 0; 6609 6610 int insertCell(int row, int position, TableCell cell) { 6611 if(row >= ret.length) 6612 return position; // not supposed to happen - a rowspan is prolly too big. 6613 6614 if(position == -1) { 6615 position++; 6616 foreach(item; ret[row]) { 6617 if(item is null) 6618 break; 6619 position++; 6620 } 6621 } 6622 6623 if(position < ret[row].length) 6624 ret[row][position] = cell; 6625 else 6626 foreach(i; ret[row].length .. position + 1) { 6627 if(i == position) 6628 ret[row] ~= cell; 6629 else 6630 ret[row] ~= null; 6631 } 6632 return position; 6633 } 6634 6635 foreach(i, rowElement; rows) { 6636 auto row = cast(TableRow) rowElement; 6637 assert(row !is null); 6638 assert(i < ret.length); 6639 6640 int position = 0; 6641 foreach(cellElement; rowElement.childNodes) { 6642 auto cell = cast(TableCell) cellElement; 6643 if(cell is null) 6644 continue; 6645 6646 // FIXME: colspan == 0 or rowspan == 0 6647 // is supposed to mean fill in the rest of 6648 // the table, not skip it 6649 foreach(int j; 0 .. cell.colspan) { 6650 foreach(int k; 0 .. cell.rowspan) 6651 // if the first row, always append. 6652 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6653 position++; 6654 } 6655 } 6656 6657 if(ret[i].length > maxLength) 6658 maxLength = cast(int) ret[i].length; 6659 } 6660 6661 // want to ensure it's rectangular 6662 foreach(ref r; ret) { 6663 foreach(i; r.length .. maxLength) 6664 r ~= null; 6665 } 6666 6667 return ret; 6668 } 6669 } 6670 6671 /// Represents a table row element - a <tr> 6672 /// Group: implementations 6673 class TableRow : Element { 6674 ///. 6675 this(Document _parentDocument) { 6676 super(_parentDocument); 6677 tagName = "tr"; 6678 } 6679 6680 // FIXME: the standard says there should be a lot more in here, 6681 // but meh, I never use it and it's a pain to implement. 6682 } 6683 6684 /// Represents anything that can be a table cell - <td> or <th> html. 6685 /// Group: implementations 6686 class TableCell : Element { 6687 ///. 6688 this(Document _parentDocument, string _tagName) { 6689 super(_parentDocument, _tagName); 6690 } 6691 6692 @property int rowspan() const { 6693 int ret = 1; 6694 auto it = getAttribute("rowspan"); 6695 if(it.length) 6696 ret = to!int(it); 6697 return ret; 6698 } 6699 6700 @property int colspan() const { 6701 int ret = 1; 6702 auto it = getAttribute("colspan"); 6703 if(it.length) 6704 ret = to!int(it); 6705 return ret; 6706 } 6707 6708 @property int rowspan(int i) { 6709 setAttribute("rowspan", to!string(i)); 6710 return i; 6711 } 6712 6713 @property int colspan(int i) { 6714 setAttribute("colspan", to!string(i)); 6715 return i; 6716 } 6717 6718 } 6719 6720 6721 ///. 6722 /// Group: implementations 6723 class MarkupException : Exception { 6724 6725 ///. 6726 this(string message, string file = __FILE__, size_t line = __LINE__) { 6727 super(message, file, line); 6728 } 6729 } 6730 6731 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6732 /// Group: implementations 6733 class ElementNotFoundException : Exception { 6734 6735 /// type == kind of element you were looking for and search == a selector describing the search. 6736 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6737 this.searchContext = searchContext; 6738 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6739 } 6740 6741 Element searchContext; 6742 } 6743 6744 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6745 /// 6746 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6747 /// Group: core_functionality 6748 struct Html { 6749 /// This string holds the actual html. Use it to retrieve the contents. 6750 string source; 6751 } 6752 6753 // for the observers 6754 enum DomMutationOperations { 6755 setAttribute, 6756 removeAttribute, 6757 appendChild, // tagname, attributes[], innerHTML 6758 insertBefore, 6759 truncateChildren, 6760 removeChild, 6761 appendHtml, 6762 replaceHtml, 6763 appendText, 6764 replaceText, 6765 replaceTextOnly 6766 } 6767 6768 // and for observers too 6769 struct DomMutationEvent { 6770 DomMutationOperations operation; 6771 Element target; 6772 Element related; // what this means differs with the operation 6773 Element related2; 6774 string relatedString; 6775 string relatedString2; 6776 } 6777 6778 6779 private immutable static string[] htmlSelfClosedElements = [ 6780 // html 4 6781 "img", "hr", "input", "br", "col", "link", "meta", 6782 // html 5 6783 "source" ]; 6784 6785 private immutable static string[] inlineElements = [ 6786 "span", "strong", "em", "b", "i", "a" 6787 ]; 6788 6789 6790 static import std.conv; 6791 6792 ///. 6793 int intFromHex(string hex) { 6794 int place = 1; 6795 int value = 0; 6796 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6797 int v; 6798 char q = hex[a]; 6799 if( q >= '0' && q <= '9') 6800 v = q - '0'; 6801 else if (q >= 'a' && q <= 'f') 6802 v = q - 'a' + 10; 6803 else throw new Exception("Illegal hex character: " ~ q); 6804 6805 value += v * place; 6806 6807 place *= 16; 6808 } 6809 6810 return value; 6811 } 6812 6813 6814 // CSS selector handling 6815 6816 // EXTENSIONS 6817 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6818 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6819 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6820 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6821 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6822 6823 6824 6825 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6826 // That might be useful to implement, though I do have parent selectors too. 6827 6828 ///. 6829 static immutable string[] selectorTokens = [ 6830 // It is important that the 2 character possibilities go first here for accurate lexing 6831 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 6832 "::", ">>", 6833 "<<", // my any-parent extension (reciprocal of whitespace) 6834 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6835 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6836 ]; // other is white space or a name. 6837 6838 ///. 6839 sizediff_t idToken(string str, sizediff_t position) { 6840 sizediff_t tid = -1; 6841 char c = str[position]; 6842 foreach(a, token; selectorTokens) 6843 6844 if(c == token[0]) { 6845 if(token.length > 1) { 6846 if(position + 1 >= str.length || str[position+1] != token[1]) 6847 continue; // not this token 6848 } 6849 tid = a; 6850 break; 6851 } 6852 return tid; 6853 } 6854 6855 ///. 6856 // look, ma, no phobos! 6857 // new lexer by ketmar 6858 string[] lexSelector (string selstr) { 6859 6860 static sizediff_t idToken (string str, size_t stpos) { 6861 char c = str[stpos]; 6862 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6863 if (c == token[0]) { 6864 if (token.length > 1) { 6865 assert(token.length == 2, token); // we don't have 3-char tokens yet 6866 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6867 } 6868 return tidx; 6869 } 6870 } 6871 return -1; 6872 } 6873 6874 // skip spaces and comments 6875 static string removeLeadingBlanks (string str) { 6876 size_t curpos = 0; 6877 while (curpos < str.length) { 6878 immutable char ch = str[curpos]; 6879 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6880 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6881 // comment 6882 curpos += 2; 6883 while (curpos < str.length) { 6884 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6885 curpos += 2; 6886 break; 6887 } 6888 ++curpos; 6889 } 6890 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6891 ++curpos; 6892 6893 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6894 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6895 // That is not the same as ".foo.bar". If the space is stripped, important 6896 // information is lost, despite the tokens being separatable anyway. 6897 // 6898 // The parser really needs to be aware of the presence of a space. 6899 } else { 6900 break; 6901 } 6902 } 6903 return str[curpos..$]; 6904 } 6905 6906 static bool isBlankAt() (string str, size_t pos) { 6907 // we should consider unicode spaces too, but... unicode sux anyway. 6908 return 6909 (pos < str.length && // in string 6910 (str[pos] <= 32 || // space 6911 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6912 } 6913 6914 string[] tokens; 6915 // lexx it! 6916 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6917 if(selstr[0] == '\"' || selstr[0] == '\'') { 6918 auto end = selstr[0]; 6919 auto pos = 1; 6920 bool escaping; 6921 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6922 if(escaping) 6923 escaping = false; 6924 else if(selstr[pos] == '\\') 6925 escaping = true; 6926 pos++; 6927 } 6928 6929 // FIXME: do better unescaping 6930 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6931 if(pos+1 >= selstr.length) 6932 assert(0, selstr); 6933 selstr = selstr[pos + 1.. $]; 6934 continue; 6935 } 6936 6937 6938 // no tokens starts with escape 6939 immutable tid = idToken(selstr, 0); 6940 if (tid >= 0) { 6941 // special token 6942 tokens ~= selectorTokens[tid]; // it's funnier this way 6943 selstr = selstr[selectorTokens[tid].length..$]; 6944 continue; 6945 } 6946 // from start to space or special token 6947 size_t escapePos = size_t.max; 6948 size_t curpos = 0; // i can has chizburger^w escape at the start 6949 while (curpos < selstr.length) { 6950 if (selstr[curpos] == '\\') { 6951 // this is escape, just skip it and next char 6952 if (escapePos == size_t.max) escapePos = curpos; 6953 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6954 } else { 6955 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6956 ++curpos; 6957 } 6958 } 6959 // identifier 6960 if (escapePos != size_t.max) { 6961 // i hate it when it happens 6962 string id = selstr[0..escapePos]; 6963 while (escapePos < curpos) { 6964 if (curpos-escapePos < 2) break; 6965 id ~= selstr[escapePos+1]; // escaped char 6966 escapePos += 2; 6967 immutable stp = escapePos; 6968 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6969 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6970 } 6971 if (id.length > 0) tokens ~= id; 6972 } else { 6973 tokens ~= selstr[0..curpos]; 6974 } 6975 selstr = selstr[curpos..$]; 6976 } 6977 return tokens; 6978 } 6979 version(unittest_domd_lexer) unittest { 6980 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6981 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6982 assert(lexSelector(r" < <") == ["<", "<"]); 6983 assert(lexSelector(r" <<") == ["<<"]); 6984 assert(lexSelector(r" <</") == ["<<", "/"]); 6985 assert(lexSelector(r" <</*") == ["<<"]); 6986 assert(lexSelector(r" <\</*") == ["<", "<"]); 6987 assert(lexSelector(r"heh\") == ["heh"]); 6988 assert(lexSelector(r"alice \") == ["alice"]); 6989 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6990 } 6991 6992 ///. 6993 struct SelectorPart { 6994 string tagNameFilter; ///. 6995 string[] attributesPresent; /// [attr] 6996 string[2][] attributesEqual; /// [attr=value] 6997 string[2][] attributesStartsWith; /// [attr^=value] 6998 string[2][] attributesEndsWith; /// [attr$=value] 6999 // split it on space, then match to these 7000 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 7001 // split it on dash, then match to these 7002 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 7003 string[2][] attributesInclude; /// [attr*=value] 7004 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 7005 7006 string[] hasSelectors; /// :has(this) 7007 string[] notSelectors; /// :not(this) 7008 7009 string[] isSelectors; /// :is(this) 7010 string[] whereSelectors; /// :where(this) 7011 7012 ParsedNth[] nthOfType; /// . 7013 ParsedNth[] nthLastOfType; /// . 7014 ParsedNth[] nthChild; /// . 7015 7016 bool firstChild; ///. 7017 bool lastChild; ///. 7018 7019 bool firstOfType; /// . 7020 bool lastOfType; /// . 7021 7022 bool emptyElement; ///. 7023 bool whitespaceOnly; /// 7024 bool oddChild; ///. 7025 bool evenChild; ///. 7026 7027 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 7028 7029 bool rootElement; ///. 7030 7031 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 7032 7033 bool isCleanSlateExceptSeparation() { 7034 auto cp = this; 7035 cp.separation = -1; 7036 return cp is SelectorPart.init; 7037 } 7038 7039 ///. 7040 string toString() { 7041 string ret; 7042 switch(separation) { 7043 default: assert(0); 7044 case -1: break; 7045 case 0: ret ~= " "; break; 7046 case 1: ret ~= " > "; break; 7047 case 2: ret ~= " + "; break; 7048 case 3: ret ~= " ~ "; break; 7049 case 4: ret ~= " < "; break; 7050 } 7051 ret ~= tagNameFilter; 7052 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7053 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7054 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7055 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7056 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7057 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7058 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7059 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7060 7061 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7062 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7063 7064 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7065 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7066 7067 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7068 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7069 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7070 7071 if(firstChild) ret ~= ":first-child"; 7072 if(lastChild) ret ~= ":last-child"; 7073 if(firstOfType) ret ~= ":first-of-type"; 7074 if(lastOfType) ret ~= ":last-of-type"; 7075 if(emptyElement) ret ~= ":empty"; 7076 if(whitespaceOnly) ret ~= ":whitespace-only"; 7077 if(oddChild) ret ~= ":odd-child"; 7078 if(evenChild) ret ~= ":even-child"; 7079 if(rootElement) ret ~= ":root"; 7080 if(scopeElement) ret ~= ":scope"; 7081 7082 return ret; 7083 } 7084 7085 // USEFUL 7086 ///. 7087 bool matchElement(Element e) { 7088 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7089 // Each individual call is reasonably fast already, but it adds up. 7090 if(e is null) return false; 7091 if(e.nodeType != 1) return false; 7092 7093 if(tagNameFilter != "" && tagNameFilter != "*") 7094 if(e.tagName != tagNameFilter) 7095 return false; 7096 if(firstChild) { 7097 if(e.parentNode is null) 7098 return false; 7099 if(e.parentNode.childElements[0] !is e) 7100 return false; 7101 } 7102 if(lastChild) { 7103 if(e.parentNode is null) 7104 return false; 7105 auto ce = e.parentNode.childElements; 7106 if(ce[$-1] !is e) 7107 return false; 7108 } 7109 if(firstOfType) { 7110 if(e.parentNode is null) 7111 return false; 7112 auto ce = e.parentNode.childElements; 7113 foreach(c; ce) { 7114 if(c.tagName == e.tagName) { 7115 if(c is e) 7116 return true; 7117 else 7118 return false; 7119 } 7120 } 7121 } 7122 if(lastOfType) { 7123 if(e.parentNode is null) 7124 return false; 7125 auto ce = e.parentNode.childElements; 7126 foreach_reverse(c; ce) { 7127 if(c.tagName == e.tagName) { 7128 if(c is e) 7129 return true; 7130 else 7131 return false; 7132 } 7133 } 7134 } 7135 /+ 7136 if(scopeElement) { 7137 if(e !is this_) 7138 return false; 7139 } 7140 +/ 7141 if(emptyElement) { 7142 if(e.children.length) 7143 return false; 7144 } 7145 if(whitespaceOnly) { 7146 if(e.innerText.strip.length) 7147 return false; 7148 } 7149 if(rootElement) { 7150 if(e.parentNode !is null) 7151 return false; 7152 } 7153 if(oddChild || evenChild) { 7154 if(e.parentNode is null) 7155 return false; 7156 foreach(i, child; e.parentNode.childElements) { 7157 if(child is e) { 7158 if(oddChild && !(i&1)) 7159 return false; 7160 if(evenChild && (i&1)) 7161 return false; 7162 break; 7163 } 7164 } 7165 } 7166 7167 bool matchWithSeparator(string attr, string value, string separator) { 7168 foreach(s; attr.split(separator)) 7169 if(s == value) 7170 return true; 7171 return false; 7172 } 7173 7174 foreach(a; attributesPresent) 7175 if(a !in e.attributes) 7176 return false; 7177 foreach(a; attributesEqual) 7178 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7179 return false; 7180 foreach(a; attributesNotEqual) 7181 // FIXME: maybe it should say null counts... this just bit me. 7182 // I did [attr][attr!=value] to work around. 7183 // 7184 // if it's null, it's not equal, right? 7185 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7186 if(e.getAttribute(a[0]) == a[1]) 7187 return false; 7188 foreach(a; attributesInclude) 7189 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7190 return false; 7191 foreach(a; attributesStartsWith) 7192 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7193 return false; 7194 foreach(a; attributesEndsWith) 7195 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7196 return false; 7197 foreach(a; attributesIncludesSeparatedBySpaces) 7198 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7199 return false; 7200 foreach(a; attributesIncludesSeparatedByDashes) 7201 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7202 return false; 7203 foreach(a; hasSelectors) { 7204 if(e.querySelector(a) is null) 7205 return false; 7206 } 7207 foreach(a; notSelectors) { 7208 auto sel = Selector(a); 7209 if(sel.matchesElement(e)) 7210 return false; 7211 } 7212 foreach(a; isSelectors) { 7213 auto sel = Selector(a); 7214 if(!sel.matchesElement(e)) 7215 return false; 7216 } 7217 foreach(a; whereSelectors) { 7218 auto sel = Selector(a); 7219 if(!sel.matchesElement(e)) 7220 return false; 7221 } 7222 7223 foreach(a; nthChild) { 7224 if(e.parentNode is null) 7225 return false; 7226 7227 auto among = e.parentNode.childElements; 7228 7229 if(!a.solvesFor(among, e)) 7230 return false; 7231 } 7232 foreach(a; nthOfType) { 7233 if(e.parentNode is null) 7234 return false; 7235 7236 auto among = e.parentNode.childElements(e.tagName); 7237 7238 if(!a.solvesFor(among, e)) 7239 return false; 7240 } 7241 foreach(a; nthLastOfType) { 7242 if(e.parentNode is null) 7243 return false; 7244 7245 auto among = retro(e.parentNode.childElements(e.tagName)); 7246 7247 if(!a.solvesFor(among, e)) 7248 return false; 7249 } 7250 7251 return true; 7252 } 7253 } 7254 7255 struct ParsedNth { 7256 int multiplier; 7257 int adder; 7258 7259 string of; 7260 7261 this(string text) { 7262 auto original = text; 7263 consumeWhitespace(text); 7264 if(text.startsWith("odd")) { 7265 multiplier = 2; 7266 adder = 1; 7267 7268 text = text[3 .. $]; 7269 } else if(text.startsWith("even")) { 7270 multiplier = 2; 7271 adder = 1; 7272 7273 text = text[4 .. $]; 7274 } else { 7275 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7276 consumeWhitespace(text); 7277 if(text.length && text[0] == 'n') { 7278 multiplier = n; 7279 text = text[1 .. $]; 7280 consumeWhitespace(text); 7281 if(text.length) { 7282 if(text[0] == '+') { 7283 text = text[1 .. $]; 7284 adder = parseNumber(text); 7285 } else if(text[0] == '-') { 7286 text = text[1 .. $]; 7287 adder = -parseNumber(text); 7288 } else if(text[0] == 'o') { 7289 // continue, this is handled below 7290 } else 7291 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7292 } 7293 } else { 7294 adder = n; 7295 } 7296 } 7297 7298 consumeWhitespace(text); 7299 if(text.startsWith("of")) { 7300 text = text[2 .. $]; 7301 consumeWhitespace(text); 7302 of = text[0 .. $]; 7303 } 7304 } 7305 7306 string toString() { 7307 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7308 } 7309 7310 bool solvesFor(R)(R elements, Element e) { 7311 int idx = 1; 7312 bool found = false; 7313 foreach(ele; elements) { 7314 if(of.length) { 7315 auto sel = Selector(of); 7316 if(!sel.matchesElement(ele)) 7317 continue; 7318 } 7319 if(ele is e) { 7320 found = true; 7321 break; 7322 } 7323 idx++; 7324 } 7325 if(!found) return false; 7326 7327 // multiplier* n + adder = idx 7328 // if there is a solution for integral n, it matches 7329 7330 idx -= adder; 7331 if(multiplier) { 7332 if(idx % multiplier == 0) 7333 return true; 7334 } else { 7335 return idx == 0; 7336 } 7337 return false; 7338 } 7339 7340 private void consumeWhitespace(ref string text) { 7341 while(text.length && text[0] == ' ') 7342 text = text[1 .. $]; 7343 } 7344 7345 private int parseNumber(ref string text) { 7346 consumeWhitespace(text); 7347 if(text.length == 0) return 0; 7348 bool negative = text[0] == '-'; 7349 if(text[0] == '+') 7350 text = text[1 .. $]; 7351 if(negative) text = text[1 .. $]; 7352 int i = 0; 7353 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7354 i++; 7355 if(i == 0) 7356 return 0; 7357 int cool = to!int(text[0 .. i]); 7358 text = text[i .. $]; 7359 return negative ? -cool : cool; 7360 } 7361 } 7362 7363 // USEFUL 7364 ///. 7365 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7366 Element[] ret; 7367 if(!parts.length) { 7368 return [start]; // the null selector only matches the start point; it 7369 // is what terminates the recursion 7370 } 7371 7372 auto part = parts[0]; 7373 //writeln("checking ", part, " against ", start, " with ", part.separation); 7374 switch(part.separation) { 7375 default: assert(0); 7376 case -1: 7377 case 0: // tree 7378 foreach(e; start.tree) { 7379 if(part.separation == 0 && start is e) 7380 continue; // space doesn't match itself! 7381 if(part.matchElement(e)) { 7382 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7383 } 7384 } 7385 break; 7386 case 1: // children 7387 foreach(e; start.childNodes) { 7388 if(part.matchElement(e)) { 7389 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7390 } 7391 } 7392 break; 7393 case 2: // next-sibling 7394 auto e = start.nextSibling("*"); 7395 if(part.matchElement(e)) 7396 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7397 break; 7398 case 3: // younger sibling 7399 auto tmp = start.parentNode; 7400 if(tmp !is null) { 7401 sizediff_t pos = -1; 7402 auto children = tmp.childElements; 7403 foreach(i, child; children) { 7404 if(child is start) { 7405 pos = i; 7406 break; 7407 } 7408 } 7409 assert(pos != -1); 7410 foreach(e; children[pos+1..$]) { 7411 if(part.matchElement(e)) 7412 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7413 } 7414 } 7415 break; 7416 case 4: // immediate parent node, an extension of mine to walk back up the tree 7417 auto e = start.parentNode; 7418 if(part.matchElement(e)) { 7419 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7420 } 7421 /* 7422 Example of usefulness: 7423 7424 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7425 7426 table th < tr 7427 7428 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7429 */ 7430 break; 7431 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7432 /* 7433 Like with the < operator, this is best used to find some parent of a particular known element. 7434 7435 Say you have an anchor inside a 7436 */ 7437 } 7438 7439 return ret; 7440 } 7441 7442 /++ 7443 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7444 7445 See_Also: 7446 $(LIST 7447 * [Element.querySelector] 7448 * [Element.querySelectorAll] 7449 * [Element.matches] 7450 * [Element.closest] 7451 * [Document.querySelector] 7452 * [Document.querySelectorAll] 7453 ) 7454 +/ 7455 /// Group: core_functionality 7456 struct Selector { 7457 SelectorComponent[] components; 7458 string original; 7459 /++ 7460 Parses the selector string and constructs the usable structure. 7461 +/ 7462 this(string cssSelector) { 7463 components = parseSelectorString(cssSelector); 7464 original = cssSelector; 7465 } 7466 7467 /++ 7468 Returns true if the given element matches this selector, 7469 considered relative to an arbitrary element. 7470 7471 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7472 with [std.algorithm.iteration.filter]: 7473 7474 --- 7475 Selector sel = Selector("foo > bar"); 7476 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7477 --- 7478 +/ 7479 bool matchesElement(Element e, Element relativeTo = null) { 7480 foreach(component; components) 7481 if(component.matchElement(e, relativeTo)) 7482 return true; 7483 7484 return false; 7485 } 7486 7487 /++ 7488 Reciprocal of [Element.querySelectorAll] 7489 +/ 7490 Element[] getMatchingElements(Element start) { 7491 Element[] ret; 7492 foreach(component; components) 7493 ret ~= getElementsBySelectorParts(start, component.parts); 7494 return removeDuplicates(ret); 7495 } 7496 7497 /++ 7498 Like [getMatchingElements], but returns a lazy range. Be careful 7499 about mutating the dom as you iterate through this. 7500 +/ 7501 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7502 import std.algorithm.iteration; 7503 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7504 } 7505 7506 7507 /// Returns the string this was built from 7508 string toString() { 7509 return original; 7510 } 7511 7512 /++ 7513 Returns a string from the parsed result 7514 7515 7516 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7517 +/ 7518 string parsedToString() { 7519 string ret; 7520 7521 foreach(idx, component; components) { 7522 if(idx) ret ~= ", "; 7523 ret ~= component.toString(); 7524 } 7525 7526 return ret; 7527 } 7528 } 7529 7530 ///. 7531 struct SelectorComponent { 7532 ///. 7533 SelectorPart[] parts; 7534 7535 ///. 7536 string toString() { 7537 string ret; 7538 foreach(part; parts) 7539 ret ~= part.toString(); 7540 return ret; 7541 } 7542 7543 // USEFUL 7544 ///. 7545 Element[] getElements(Element start) { 7546 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7547 } 7548 7549 // USEFUL (but not implemented) 7550 /// If relativeTo == null, it assumes the root of the parent document. 7551 bool matchElement(Element e, Element relativeTo = null) { 7552 if(e is null) return false; 7553 Element where = e; 7554 int lastSeparation = -1; 7555 7556 auto lparts = parts; 7557 7558 if(parts.length && parts[0].separation > 0) { 7559 // if it starts with a non-trivial separator, inject 7560 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7561 // which implies html 7562 7563 // there is probably a MUCH better way to do this. 7564 auto dummy = SelectorPart.init; 7565 dummy.tagNameFilter = "*"; 7566 dummy.separation = 0; 7567 lparts = dummy ~ lparts; 7568 } 7569 7570 foreach(part; retro(lparts)) { 7571 7572 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7573 // writeln(parts); 7574 7575 if(lastSeparation == -1) { 7576 if(!part.matchElement(where)) 7577 return false; 7578 } else if(lastSeparation == 0) { // generic parent 7579 // need to go up the whole chain 7580 where = where.parentNode; 7581 7582 while(where !is null) { 7583 if(part.matchElement(where)) 7584 break; 7585 7586 if(where is relativeTo) 7587 return false; 7588 7589 where = where.parentNode; 7590 } 7591 7592 if(where is null) 7593 return false; 7594 } else if(lastSeparation == 1) { // the > operator 7595 where = where.parentNode; 7596 7597 if(!part.matchElement(where)) 7598 return false; 7599 } else if(lastSeparation == 2) { // the + operator 7600 //writeln("WHERE", where, " ", part); 7601 where = where.previousSibling("*"); 7602 7603 if(!part.matchElement(where)) 7604 return false; 7605 } else if(lastSeparation == 3) { // the ~ operator 7606 where = where.previousSibling("*"); 7607 while(where !is null) { 7608 if(part.matchElement(where)) 7609 break; 7610 7611 if(where is relativeTo) 7612 return false; 7613 7614 where = where.previousSibling("*"); 7615 } 7616 7617 if(where is null) 7618 return false; 7619 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7620 // FIXME 7621 } 7622 7623 lastSeparation = part.separation; 7624 7625 if(where is relativeTo) 7626 return false; // at end of line, if we aren't done by now, the match fails 7627 } 7628 return true; // if we got here, it is a success 7629 } 7630 7631 // the string should NOT have commas. Use parseSelectorString for that instead 7632 ///. 7633 static SelectorComponent fromString(string selector) { 7634 return parseSelector(lexSelector(selector)); 7635 } 7636 } 7637 7638 ///. 7639 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7640 SelectorComponent[] ret; 7641 auto tokens = lexSelector(selector); // this will parse commas too 7642 // and now do comma-separated slices (i haz phobosophobia!) 7643 int parensCount = 0; 7644 while (tokens.length > 0) { 7645 size_t end = 0; 7646 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7647 if(tokens[end] == "(") parensCount++; 7648 if(tokens[end] == ")") parensCount--; 7649 ++end; 7650 } 7651 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7652 if (tokens.length-end < 2) break; 7653 tokens = tokens[end+1..$]; 7654 } 7655 return ret; 7656 } 7657 7658 ///. 7659 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7660 SelectorComponent s; 7661 7662 SelectorPart current; 7663 void commit() { 7664 // might as well skip null items 7665 if(!current.isCleanSlateExceptSeparation()) { 7666 s.parts ~= current; 7667 current = current.init; // start right over 7668 } 7669 } 7670 enum State { 7671 Starting, 7672 ReadingClass, 7673 ReadingId, 7674 ReadingAttributeSelector, 7675 ReadingAttributeComparison, 7676 ExpectingAttributeCloser, 7677 ReadingPseudoClass, 7678 ReadingAttributeValue, 7679 7680 SkippingFunctionalSelector, 7681 } 7682 State state = State.Starting; 7683 string attributeName, attributeValue, attributeComparison; 7684 int parensCount; 7685 foreach(idx, token; tokens) { 7686 string readFunctionalSelector() { 7687 string s; 7688 if(tokens[idx + 1] != "(") 7689 throw new Exception("parse error"); 7690 int pc = 1; 7691 foreach(t; tokens[idx + 2 .. $]) { 7692 if(t == "(") 7693 pc++; 7694 if(t == ")") 7695 pc--; 7696 if(pc == 0) 7697 break; 7698 s ~= t; 7699 } 7700 7701 return s; 7702 } 7703 7704 sizediff_t tid = -1; 7705 foreach(i, item; selectorTokens) 7706 if(token == item) { 7707 tid = i; 7708 break; 7709 } 7710 final switch(state) { 7711 case State.Starting: // fresh, might be reading an operator or a tagname 7712 if(tid == -1) { 7713 if(!caseSensitiveTags) 7714 token = token.toLower(); 7715 7716 if(current.isCleanSlateExceptSeparation()) { 7717 current.tagNameFilter = token; 7718 // default thing, see comment under "*" below 7719 if(current.separation == -1) current.separation = 0; 7720 } else { 7721 // if it was already set, we must see two thingies 7722 // separated by whitespace... 7723 commit(); 7724 current.separation = 0; // tree 7725 current.tagNameFilter = token; 7726 } 7727 } else { 7728 // Selector operators 7729 switch(token) { 7730 case "*": 7731 current.tagNameFilter = "*"; 7732 // the idea here is if we haven't actually set a separation 7733 // yet (e.g. the > operator), it should assume the generic 7734 // whitespace (descendant) mode to avoid matching self with -1 7735 if(current.separation == -1) current.separation = 0; 7736 break; 7737 case " ": 7738 // If some other separation has already been set, 7739 // this is irrelevant whitespace, so we should skip it. 7740 // this happens in the case of "foo > bar" for example. 7741 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7742 continue; 7743 commit(); 7744 current.separation = 0; // tree 7745 break; 7746 case ">>": 7747 commit(); 7748 current.separation = 0; // alternate syntax for tree from html5 css 7749 break; 7750 case ">": 7751 commit(); 7752 current.separation = 1; // child 7753 break; 7754 case "+": 7755 commit(); 7756 current.separation = 2; // sibling directly after 7757 break; 7758 case "~": 7759 commit(); 7760 current.separation = 3; // any sibling after 7761 break; 7762 case "<": 7763 commit(); 7764 current.separation = 4; // immediate parent of 7765 break; 7766 case "[": 7767 state = State.ReadingAttributeSelector; 7768 if(current.separation == -1) current.separation = 0; 7769 break; 7770 case ".": 7771 state = State.ReadingClass; 7772 if(current.separation == -1) current.separation = 0; 7773 break; 7774 case "#": 7775 state = State.ReadingId; 7776 if(current.separation == -1) current.separation = 0; 7777 break; 7778 case ":": 7779 case "::": 7780 state = State.ReadingPseudoClass; 7781 if(current.separation == -1) current.separation = 0; 7782 break; 7783 7784 default: 7785 assert(0, token); 7786 } 7787 } 7788 break; 7789 case State.ReadingClass: 7790 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7791 state = State.Starting; 7792 break; 7793 case State.ReadingId: 7794 current.attributesEqual ~= ["id", token]; 7795 state = State.Starting; 7796 break; 7797 case State.ReadingPseudoClass: 7798 switch(token) { 7799 case "first-of-type": 7800 current.firstOfType = true; 7801 break; 7802 case "last-of-type": 7803 current.lastOfType = true; 7804 break; 7805 case "only-of-type": 7806 current.firstOfType = true; 7807 current.lastOfType = true; 7808 break; 7809 case "first-child": 7810 current.firstChild = true; 7811 break; 7812 case "last-child": 7813 current.lastChild = true; 7814 break; 7815 case "only-child": 7816 current.firstChild = true; 7817 current.lastChild = true; 7818 break; 7819 case "scope": 7820 current.scopeElement = true; 7821 break; 7822 case "empty": 7823 // one with no children 7824 current.emptyElement = true; 7825 break; 7826 case "whitespace-only": 7827 current.whitespaceOnly = true; 7828 break; 7829 case "link": 7830 current.attributesPresent ~= "href"; 7831 break; 7832 case "root": 7833 current.rootElement = true; 7834 break; 7835 case "nth-child": 7836 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7837 state = State.SkippingFunctionalSelector; 7838 continue; 7839 case "nth-of-type": 7840 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7841 state = State.SkippingFunctionalSelector; 7842 continue; 7843 case "nth-last-of-type": 7844 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7845 state = State.SkippingFunctionalSelector; 7846 continue; 7847 case "is": 7848 state = State.SkippingFunctionalSelector; 7849 current.isSelectors ~= readFunctionalSelector(); 7850 continue; // now the rest of the parser skips past the parens we just handled 7851 case "where": 7852 state = State.SkippingFunctionalSelector; 7853 current.whereSelectors ~= readFunctionalSelector(); 7854 continue; // now the rest of the parser skips past the parens we just handled 7855 case "not": 7856 state = State.SkippingFunctionalSelector; 7857 current.notSelectors ~= readFunctionalSelector(); 7858 continue; // now the rest of the parser skips past the parens we just handled 7859 case "has": 7860 state = State.SkippingFunctionalSelector; 7861 current.hasSelectors ~= readFunctionalSelector(); 7862 continue; // now the rest of the parser skips past the parens we just handled 7863 // back to standards though not quite right lol 7864 case "disabled": 7865 current.attributesPresent ~= "disabled"; 7866 break; 7867 case "checked": 7868 current.attributesPresent ~= "checked"; 7869 break; 7870 7871 case "visited", "active", "hover", "target", "focus", "selected": 7872 current.attributesPresent ~= "nothing"; 7873 // FIXME 7874 /+ 7875 // extensions not implemented 7876 //case "text": // takes the text in the element and wraps it in an element, returning it 7877 +/ 7878 goto case; 7879 case "before", "after": 7880 current.attributesPresent ~= "FIXME"; 7881 7882 break; 7883 // My extensions 7884 case "odd-child": 7885 current.oddChild = true; 7886 break; 7887 case "even-child": 7888 current.evenChild = true; 7889 break; 7890 default: 7891 //if(token.indexOf("lang") == -1) 7892 //assert(0, token); 7893 break; 7894 } 7895 state = State.Starting; 7896 break; 7897 case State.SkippingFunctionalSelector: 7898 if(token == "(") { 7899 parensCount++; 7900 } else if(token == ")") { 7901 parensCount--; 7902 } 7903 7904 if(parensCount == 0) 7905 state = State.Starting; 7906 break; 7907 case State.ReadingAttributeSelector: 7908 attributeName = token; 7909 attributeComparison = null; 7910 attributeValue = null; 7911 state = State.ReadingAttributeComparison; 7912 break; 7913 case State.ReadingAttributeComparison: 7914 // FIXME: these things really should be quotable in the proper lexer... 7915 if(token != "]") { 7916 if(token.indexOf("=") == -1) { 7917 // not a comparison; consider it 7918 // part of the attribute 7919 attributeValue ~= token; 7920 } else { 7921 attributeComparison = token; 7922 state = State.ReadingAttributeValue; 7923 } 7924 break; 7925 } 7926 goto case; 7927 case State.ExpectingAttributeCloser: 7928 if(token != "]") { 7929 // not the closer; consider it part of comparison 7930 if(attributeComparison == "") 7931 attributeName ~= token; 7932 else 7933 attributeValue ~= token; 7934 break; 7935 } 7936 7937 // Selector operators 7938 switch(attributeComparison) { 7939 default: assert(0); 7940 case "": 7941 current.attributesPresent ~= attributeName; 7942 break; 7943 case "=": 7944 current.attributesEqual ~= [attributeName, attributeValue]; 7945 break; 7946 case "|=": 7947 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7948 break; 7949 case "~=": 7950 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7951 break; 7952 case "$=": 7953 current.attributesEndsWith ~= [attributeName, attributeValue]; 7954 break; 7955 case "^=": 7956 current.attributesStartsWith ~= [attributeName, attributeValue]; 7957 break; 7958 case "*=": 7959 current.attributesInclude ~= [attributeName, attributeValue]; 7960 break; 7961 case "!=": 7962 current.attributesNotEqual ~= [attributeName, attributeValue]; 7963 break; 7964 } 7965 7966 state = State.Starting; 7967 break; 7968 case State.ReadingAttributeValue: 7969 attributeValue = token; 7970 state = State.ExpectingAttributeCloser; 7971 break; 7972 } 7973 } 7974 7975 commit(); 7976 7977 return s; 7978 } 7979 7980 ///. 7981 Element[] removeDuplicates(Element[] input) { 7982 Element[] ret; 7983 7984 bool[Element] already; 7985 foreach(e; input) { 7986 if(e in already) continue; 7987 already[e] = true; 7988 ret ~= e; 7989 } 7990 7991 return ret; 7992 } 7993 7994 // done with CSS selector handling 7995 7996 7997 // FIXME: use the better parser from html.d 7998 /// This is probably not useful to you unless you're writing a browser or something like that. 7999 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 8000 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 8001 class CssStyle { 8002 ///. 8003 this(string rule, string content) { 8004 rule = rule.strip(); 8005 content = content.strip(); 8006 8007 if(content.length == 0) 8008 return; 8009 8010 originatingRule = rule; 8011 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 8012 8013 foreach(part; content.split(";")) { 8014 part = part.strip(); 8015 if(part.length == 0) 8016 continue; 8017 auto idx = part.indexOf(":"); 8018 if(idx == -1) 8019 continue; 8020 //throw new Exception("Bad css rule (no colon): " ~ part); 8021 8022 Property p; 8023 8024 p.name = part[0 .. idx].strip(); 8025 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 8026 p.givenExplicitly = true; 8027 p.specificity = originatingSpecificity; 8028 8029 properties ~= p; 8030 } 8031 8032 foreach(property; properties) 8033 expandShortForm(property, originatingSpecificity); 8034 } 8035 8036 ///. 8037 Specificity getSpecificityOfRule(string rule) { 8038 Specificity s; 8039 if(rule.length == 0) { // inline 8040 // s.important = 2; 8041 } else { 8042 // FIXME 8043 } 8044 8045 return s; 8046 } 8047 8048 string originatingRule; ///. 8049 Specificity originatingSpecificity; ///. 8050 8051 ///. 8052 union Specificity { 8053 uint score; ///. 8054 // version(little_endian) 8055 ///. 8056 struct { 8057 ubyte tags; ///. 8058 ubyte classes; ///. 8059 ubyte ids; ///. 8060 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8061 } 8062 } 8063 8064 ///. 8065 struct Property { 8066 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8067 string name; ///. 8068 string value; ///. 8069 Specificity specificity; ///. 8070 // do we care about the original source rule? 8071 } 8072 8073 ///. 8074 Property[] properties; 8075 8076 ///. 8077 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8078 string name = unCamelCase(nameGiven); 8079 if(value is null) 8080 return getValue(name); 8081 else 8082 return setValue(name, value, 0x02000000 /* inline specificity */); 8083 } 8084 8085 /// takes dash style name 8086 string getValue(string name) { 8087 foreach(property; properties) 8088 if(property.name == name) 8089 return property.value; 8090 return null; 8091 } 8092 8093 /// takes dash style name 8094 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8095 value = value.replace("! important", "!important"); 8096 if(value.indexOf("!important") != -1) { 8097 newSpecificity.important = 1; // FIXME 8098 value = value.replace("!important", "").strip(); 8099 } 8100 8101 foreach(ref property; properties) 8102 if(property.name == name) { 8103 if(newSpecificity.score >= property.specificity.score) { 8104 property.givenExplicitly = explicit; 8105 expandShortForm(property, newSpecificity); 8106 return (property.value = value); 8107 } else { 8108 if(name == "display") 8109 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8110 return value; // do nothing - the specificity is too low 8111 } 8112 } 8113 8114 // it's not here... 8115 8116 Property p; 8117 p.givenExplicitly = true; 8118 p.name = name; 8119 p.value = value; 8120 p.specificity = originatingSpecificity; 8121 8122 properties ~= p; 8123 expandShortForm(p, originatingSpecificity); 8124 8125 return value; 8126 } 8127 8128 private void expandQuadShort(string name, string value, Specificity specificity) { 8129 auto parts = value.split(" "); 8130 switch(parts.length) { 8131 case 1: 8132 setValue(name ~"-left", parts[0], specificity, false); 8133 setValue(name ~"-right", parts[0], specificity, false); 8134 setValue(name ~"-top", parts[0], specificity, false); 8135 setValue(name ~"-bottom", parts[0], specificity, false); 8136 break; 8137 case 2: 8138 setValue(name ~"-left", parts[1], specificity, false); 8139 setValue(name ~"-right", parts[1], specificity, false); 8140 setValue(name ~"-top", parts[0], specificity, false); 8141 setValue(name ~"-bottom", parts[0], specificity, false); 8142 break; 8143 case 3: 8144 setValue(name ~"-top", parts[0], specificity, false); 8145 setValue(name ~"-right", parts[1], specificity, false); 8146 setValue(name ~"-bottom", parts[2], specificity, false); 8147 setValue(name ~"-left", parts[2], specificity, false); 8148 8149 break; 8150 case 4: 8151 setValue(name ~"-top", parts[0], specificity, false); 8152 setValue(name ~"-right", parts[1], specificity, false); 8153 setValue(name ~"-bottom", parts[2], specificity, false); 8154 setValue(name ~"-left", parts[3], specificity, false); 8155 break; 8156 default: 8157 assert(0, value); 8158 } 8159 } 8160 8161 ///. 8162 void expandShortForm(Property p, Specificity specificity) { 8163 switch(p.name) { 8164 case "margin": 8165 case "padding": 8166 expandQuadShort(p.name, p.value, specificity); 8167 break; 8168 case "border": 8169 case "outline": 8170 setValue(p.name ~ "-left", p.value, specificity, false); 8171 setValue(p.name ~ "-right", p.value, specificity, false); 8172 setValue(p.name ~ "-top", p.value, specificity, false); 8173 setValue(p.name ~ "-bottom", p.value, specificity, false); 8174 break; 8175 8176 case "border-top": 8177 case "border-bottom": 8178 case "border-left": 8179 case "border-right": 8180 case "outline-top": 8181 case "outline-bottom": 8182 case "outline-left": 8183 case "outline-right": 8184 8185 default: {} 8186 } 8187 } 8188 8189 ///. 8190 override string toString() { 8191 string ret; 8192 if(originatingRule.length) 8193 ret = originatingRule ~ " {"; 8194 8195 foreach(property; properties) { 8196 if(!property.givenExplicitly) 8197 continue; // skip the inferred shit 8198 8199 if(originatingRule.length) 8200 ret ~= "\n\t"; 8201 else 8202 ret ~= " "; 8203 8204 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8205 } 8206 8207 if(originatingRule.length) 8208 ret ~= "\n}\n"; 8209 8210 return ret; 8211 } 8212 } 8213 8214 string cssUrl(string url) { 8215 return "url(\"" ~ url ~ "\")"; 8216 } 8217 8218 /// This probably isn't useful, unless you're writing a browser or something like that. 8219 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8220 /// as text. 8221 /// 8222 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8223 /// that you can apply to your documents to build the complete computedStyle object. 8224 class StyleSheet { 8225 ///. 8226 CssStyle[] rules; 8227 8228 ///. 8229 this(string source) { 8230 // FIXME: handle @ rules and probably could improve lexer 8231 // add nesting? 8232 int state; 8233 string currentRule; 8234 string currentValue; 8235 8236 string* currentThing = ¤tRule; 8237 foreach(c; source) { 8238 handle: switch(state) { 8239 default: assert(0); 8240 case 0: // starting - we assume we're reading a rule 8241 switch(c) { 8242 case '@': 8243 state = 4; 8244 break; 8245 case '/': 8246 state = 1; 8247 break; 8248 case '{': 8249 currentThing = ¤tValue; 8250 break; 8251 case '}': 8252 if(currentThing is ¤tValue) { 8253 rules ~= new CssStyle(currentRule, currentValue); 8254 8255 currentRule = ""; 8256 currentValue = ""; 8257 8258 currentThing = ¤tRule; 8259 } else { 8260 // idk what is going on here. 8261 // check sveit.com to reproduce 8262 currentRule = ""; 8263 currentValue = ""; 8264 } 8265 break; 8266 default: 8267 (*currentThing) ~= c; 8268 } 8269 break; 8270 case 1: // expecting * 8271 if(c == '*') 8272 state = 2; 8273 else { 8274 state = 0; 8275 (*currentThing) ~= "/" ~ c; 8276 } 8277 break; 8278 case 2: // inside comment 8279 if(c == '*') 8280 state = 3; 8281 break; 8282 case 3: // expecting / to end comment 8283 if(c == '/') 8284 state = 0; 8285 else 8286 state = 2; // it's just a comment so no need to append 8287 break; 8288 case 4: 8289 if(c == '{') 8290 state = 5; 8291 if(c == ';') 8292 state = 0; // just skipping import 8293 break; 8294 case 5: 8295 if(c == '}') 8296 state = 0; // skipping font face probably 8297 } 8298 } 8299 } 8300 8301 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8302 void apply(Document document) { 8303 foreach(rule; rules) { 8304 if(rule.originatingRule.length == 0) 8305 continue; // this shouldn't happen here in a stylesheet 8306 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8307 // note: this should be a different object than the inline style 8308 // since givenExplicitly is likely destroyed here 8309 auto current = element.computedStyle; 8310 8311 foreach(item; rule.properties) 8312 current.setValue(item.name, item.value, item.specificity); 8313 } 8314 } 8315 } 8316 } 8317 8318 8319 /// This is kinda private; just a little utility container for use by the ElementStream class. 8320 final class Stack(T) { 8321 this() { 8322 internalLength = 0; 8323 arr = initialBuffer[]; 8324 } 8325 8326 ///. 8327 void push(T t) { 8328 if(internalLength >= arr.length) { 8329 auto oldarr = arr; 8330 if(arr.length < 4096) 8331 arr = new T[arr.length * 2]; 8332 else 8333 arr = new T[arr.length + 4096]; 8334 arr[0 .. oldarr.length] = oldarr[]; 8335 } 8336 8337 arr[internalLength] = t; 8338 internalLength++; 8339 } 8340 8341 ///. 8342 T pop() { 8343 assert(internalLength); 8344 internalLength--; 8345 return arr[internalLength]; 8346 } 8347 8348 ///. 8349 T peek() { 8350 assert(internalLength); 8351 return arr[internalLength - 1]; 8352 } 8353 8354 ///. 8355 @property bool empty() { 8356 return internalLength ? false : true; 8357 } 8358 8359 ///. 8360 private T[] arr; 8361 private size_t internalLength; 8362 private T[64] initialBuffer; 8363 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8364 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8365 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8366 } 8367 8368 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8369 final class ElementStream { 8370 8371 ///. 8372 @property Element front() { 8373 return current.element; 8374 } 8375 8376 /// Use Element.tree instead. 8377 this(Element start) { 8378 current.element = start; 8379 current.childPosition = -1; 8380 isEmpty = false; 8381 stack = new Stack!(Current); 8382 } 8383 8384 /* 8385 Handle it 8386 handle its children 8387 8388 */ 8389 8390 ///. 8391 void popFront() { 8392 more: 8393 if(isEmpty) return; 8394 8395 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8396 8397 current.childPosition++; 8398 if(current.childPosition >= current.element.children.length) { 8399 if(stack.empty()) 8400 isEmpty = true; 8401 else { 8402 current = stack.pop(); 8403 goto more; 8404 } 8405 } else { 8406 stack.push(current); 8407 current.element = current.element.children[current.childPosition]; 8408 current.childPosition = -1; 8409 } 8410 } 8411 8412 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8413 void currentKilled() { 8414 if(stack.empty) // should never happen 8415 isEmpty = true; 8416 else { 8417 current = stack.pop(); 8418 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8419 } 8420 } 8421 8422 ///. 8423 @property bool empty() { 8424 return isEmpty; 8425 } 8426 8427 private: 8428 8429 struct Current { 8430 Element element; 8431 int childPosition; 8432 } 8433 8434 Current current; 8435 8436 Stack!(Current) stack; 8437 8438 bool isEmpty; 8439 } 8440 8441 8442 8443 // unbelievable. 8444 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8445 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8446 static import std.algorithm; 8447 auto found = std.algorithm.find(haystack, needle); 8448 if(found.length == 0) 8449 return -1; 8450 return haystack.length - found.length; 8451 } 8452 8453 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8454 assert(position < arr.length); 8455 T[] ret; 8456 ret.length = arr.length + what.length; 8457 int a = 0; 8458 foreach(i; arr[0..position+1]) 8459 ret[a++] = i; 8460 8461 foreach(i; what) 8462 ret[a++] = i; 8463 8464 foreach(i; arr[position+1..$]) 8465 ret[a++] = i; 8466 8467 return ret; 8468 } 8469 8470 package bool isInArray(T)(T item, T[] arr) { 8471 foreach(i; arr) 8472 if(item == i) 8473 return true; 8474 return false; 8475 } 8476 8477 private string[string] aadup(in string[string] arr) { 8478 string[string] ret; 8479 foreach(k, v; arr) 8480 ret[k] = v; 8481 return ret; 8482 } 8483 8484 // dom event support, if you want to use it 8485 8486 /// used for DOM events 8487 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8488 8489 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8490 class Event { 8491 this(string eventName, Element target) { 8492 this.eventName = eventName; 8493 this.srcElement = target; 8494 } 8495 8496 /// Prevents the default event handler (if there is one) from being called 8497 void preventDefault() { 8498 defaultPrevented = true; 8499 } 8500 8501 /// Stops the event propagation immediately. 8502 void stopPropagation() { 8503 propagationStopped = true; 8504 } 8505 8506 bool defaultPrevented; 8507 bool propagationStopped; 8508 string eventName; 8509 8510 Element srcElement; 8511 alias srcElement target; 8512 8513 Element relatedTarget; 8514 8515 int clientX; 8516 int clientY; 8517 8518 int button; 8519 8520 bool isBubbling; 8521 8522 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8523 void send() { 8524 if(srcElement is null) 8525 return; 8526 8527 auto e = srcElement; 8528 8529 if(eventName in e.bubblingEventHandlers) 8530 foreach(handler; e.bubblingEventHandlers[eventName]) 8531 handler(e, this); 8532 8533 if(!defaultPrevented) 8534 if(eventName in e.defaultEventHandlers) 8535 e.defaultEventHandlers[eventName](e, this); 8536 } 8537 8538 /// this dispatches the element using the capture -> target -> bubble process 8539 void dispatch() { 8540 if(srcElement is null) 8541 return; 8542 8543 // first capture, then bubble 8544 8545 Element[] chain; 8546 Element curr = srcElement; 8547 while(curr) { 8548 auto l = curr; 8549 chain ~= l; 8550 curr = curr.parentNode; 8551 8552 } 8553 8554 isBubbling = false; 8555 8556 foreach(e; chain.retro()) { 8557 if(eventName in e.capturingEventHandlers) 8558 foreach(handler; e.capturingEventHandlers[eventName]) 8559 handler(e, this); 8560 8561 // the default on capture should really be to always do nothing 8562 8563 //if(!defaultPrevented) 8564 // if(eventName in e.defaultEventHandlers) 8565 // e.defaultEventHandlers[eventName](e.element, this); 8566 8567 if(propagationStopped) 8568 break; 8569 } 8570 8571 isBubbling = true; 8572 if(!propagationStopped) 8573 foreach(e; chain) { 8574 if(eventName in e.bubblingEventHandlers) 8575 foreach(handler; e.bubblingEventHandlers[eventName]) 8576 handler(e, this); 8577 8578 if(propagationStopped) 8579 break; 8580 } 8581 8582 if(!defaultPrevented) 8583 foreach(e; chain) { 8584 if(eventName in e.defaultEventHandlers) 8585 e.defaultEventHandlers[eventName](e, this); 8586 } 8587 } 8588 } 8589 8590 struct FormFieldOptions { 8591 // usable for any 8592 8593 /// this is a regex pattern used to validate the field 8594 string pattern; 8595 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8596 bool isRequired; 8597 /// this is displayed as an example to the user 8598 string placeholder; 8599 8600 // usable for numeric ones 8601 8602 8603 // convenience methods to quickly get some options 8604 @property static FormFieldOptions none() { 8605 FormFieldOptions f; 8606 return f; 8607 } 8608 8609 static FormFieldOptions required() { 8610 FormFieldOptions f; 8611 f.isRequired = true; 8612 return f; 8613 } 8614 8615 static FormFieldOptions regex(string pattern, bool required = false) { 8616 FormFieldOptions f; 8617 f.pattern = pattern; 8618 f.isRequired = required; 8619 return f; 8620 } 8621 8622 static FormFieldOptions fromElement(Element e) { 8623 FormFieldOptions f; 8624 if(e.hasAttribute("required")) 8625 f.isRequired = true; 8626 if(e.hasAttribute("pattern")) 8627 f.pattern = e.pattern; 8628 if(e.hasAttribute("placeholder")) 8629 f.placeholder = e.placeholder; 8630 return f; 8631 } 8632 8633 Element applyToElement(Element e) { 8634 if(this.isRequired) 8635 e.required = "required"; 8636 if(this.pattern.length) 8637 e.pattern = this.pattern; 8638 if(this.placeholder.length) 8639 e.placeholder = this.placeholder; 8640 return e; 8641 } 8642 } 8643 8644 // this needs to look just like a string, but can expand as needed 8645 version(no_dom_stream) 8646 alias string Utf8Stream; 8647 else 8648 class Utf8Stream { 8649 protected: 8650 // these two should be overridden in subclasses to actually do the stream magic 8651 string getMore() { 8652 if(getMoreHelper !is null) 8653 return getMoreHelper(); 8654 return null; 8655 } 8656 8657 bool hasMore() { 8658 if(hasMoreHelper !is null) 8659 return hasMoreHelper(); 8660 return false; 8661 } 8662 // the rest should be ok 8663 8664 public: 8665 this(string d) { 8666 this.data = d; 8667 } 8668 8669 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8670 this.getMoreHelper = getMoreHelper; 8671 this.hasMoreHelper = hasMoreHelper; 8672 8673 if(hasMore()) 8674 this.data ~= getMore(); 8675 8676 stdout.flush(); 8677 } 8678 8679 @property final size_t length() { 8680 // the parser checks length primarily directly before accessing the next character 8681 // so this is the place we'll hook to append more if possible and needed. 8682 if(lastIdx + 1 >= data.length && hasMore()) { 8683 data ~= getMore(); 8684 } 8685 return data.length; 8686 } 8687 8688 final char opIndex(size_t idx) { 8689 if(idx > lastIdx) 8690 lastIdx = idx; 8691 return data[idx]; 8692 } 8693 8694 final string opSlice(size_t start, size_t end) { 8695 if(end > lastIdx) 8696 lastIdx = end; 8697 return data[start .. end]; 8698 } 8699 8700 final size_t opDollar() { 8701 return length(); 8702 } 8703 8704 final Utf8Stream opBinary(string op : "~")(string s) { 8705 this.data ~= s; 8706 return this; 8707 } 8708 8709 final Utf8Stream opOpAssign(string op : "~")(string s) { 8710 this.data ~= s; 8711 return this; 8712 } 8713 8714 final Utf8Stream opAssign(string rhs) { 8715 this.data = rhs; 8716 return this; 8717 } 8718 private: 8719 string data; 8720 8721 size_t lastIdx; 8722 8723 bool delegate() hasMoreHelper; 8724 string delegate() getMoreHelper; 8725 8726 8727 /+ 8728 // used to maybe clear some old stuff 8729 // you might have to remove elements parsed with it too since they can hold slices into the 8730 // old stuff, preventing gc 8731 void dropFront(int bytes) { 8732 posAdjustment += bytes; 8733 data = data[bytes .. $]; 8734 } 8735 8736 int posAdjustment; 8737 +/ 8738 } 8739 8740 void fillForm(T)(Form form, T obj, string name) { 8741 import arsd.database; 8742 fillData((k, v) => form.setValue(k, v), obj, name); 8743 } 8744 8745 8746 /+ 8747 /+ 8748 Syntax: 8749 8750 Tag: tagname#id.class 8751 Tree: Tag(Children, comma, separated...) 8752 Children: Tee or Variable 8753 Variable: $varname with optional |funcname following. 8754 8755 If a variable has a tree after it, it breaks the variable down: 8756 * if array, foreach it does the tree 8757 * if struct, it breaks down the member variables 8758 8759 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8760 +/ 8761 struct Stringplate { 8762 /++ 8763 8764 +/ 8765 this(string s) { 8766 8767 } 8768 8769 /++ 8770 8771 +/ 8772 Element expand(T...)(T vars) { 8773 return null; 8774 } 8775 } 8776 /// 8777 unittest { 8778 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8779 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8780 } 8781 +/ 8782 8783 bool allAreInlineHtml(const(Element)[] children) { 8784 foreach(child; children) { 8785 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8786 // cool 8787 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 8788 // cool 8789 } else { 8790 // prolly block 8791 return false; 8792 } 8793 } 8794 return true; 8795 } 8796 8797 private bool isSimpleWhite(dchar c) { 8798 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8799 } 8800 8801 unittest { 8802 // Test for issue #120 8803 string s = `<html> 8804 <body> 8805 <P>AN 8806 <P>bubbles</P> 8807 <P>giggles</P> 8808 </body> 8809 </html>`; 8810 auto doc = new Document(); 8811 doc.parseUtf8(s, false, false); 8812 auto s2 = doc.toString(); 8813 assert( 8814 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8815 "paragraph order incorrect:\n" ~ s2); 8816 } 8817 8818 unittest { 8819 // test for suncarpet email dec 24 2019 8820 // arbitrary id asduiwh 8821 auto document = new Document("<html> 8822 <head> 8823 <meta charset=\"utf-8\"></meta> 8824 <title>Element.querySelector Test</title> 8825 </head> 8826 <body> 8827 <div id=\"foo\"> 8828 <div>Foo</div> 8829 <div>Bar</div> 8830 </div> 8831 </body> 8832 </html>"); 8833 8834 auto doc = document; 8835 8836 assert(doc.querySelectorAll("div div").length == 2); 8837 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8838 assert(doc.querySelectorAll("> html").length == 0); 8839 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8840 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8841 8842 8843 assert(doc.root.matches("html")); 8844 assert(!doc.root.matches("nothtml")); 8845 assert(doc.querySelector("#foo > div").matches("div")); 8846 assert(doc.querySelector("body > #foo").matches("#foo")); 8847 8848 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8849 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8850 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8851 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8852 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8853 8854 // also confirming the querySelector works via the mdn definition 8855 auto foo = doc.requireSelector("#foo"); 8856 assert(foo.querySelector("#foo > div") !is null); 8857 assert(foo.querySelector("body #foo > div") !is null); 8858 8859 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8860 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8861 //assert(foo.querySelectorAll("#foo > div").length == 2); 8862 } 8863 8864 unittest { 8865 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8866 auto document = new Document(`<article> 8867 <div id="div-01">Here is div-01 8868 <div id="div-02">Here is div-02 8869 <div id="div-03">Here is div-03</div> 8870 </div> 8871 </div> 8872 </article>`, true, true); 8873 8874 auto el = document.getElementById("div-03"); 8875 assert(el.closest("#div-02").id == "div-02"); 8876 assert(el.closest("div div").id == "div-03"); 8877 assert(el.closest("article > div").id == "div-01"); 8878 assert(el.closest(":not(div)").tagName == "article"); 8879 8880 assert(el.closest("p") is null); 8881 assert(el.closest("p, div") is el); 8882 } 8883 8884 unittest { 8885 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8886 auto document = new Document(`<test> 8887 <div class="foo"><p>cool</p><span>bar</span></div> 8888 <main><p>two</p></main> 8889 </test>`); 8890 8891 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8892 assert(document.querySelector("div:where(.foo)") !is null); 8893 } 8894 8895 unittest { 8896 immutable string html = q{ 8897 <root> 8898 <div class="roundedbox"> 8899 <table> 8900 <caption class="boxheader">Recent Reviews</caption> 8901 <tr> 8902 <th>Game</th> 8903 <th>User</th> 8904 <th>Rating</th> 8905 <th>Created</th> 8906 </tr> 8907 8908 <tr> 8909 <td>June 13, 2020 15:10</td> 8910 <td><a href="/reviews/8833">[Show]</a></td> 8911 </tr> 8912 8913 <tr> 8914 <td>June 13, 2020 15:02</td> 8915 <td><a href="/reviews/8832">[Show]</a></td> 8916 </tr> 8917 8918 <tr> 8919 <td>June 13, 2020 14:41</td> 8920 <td><a href="/reviews/8831">[Show]</a></td> 8921 </tr> 8922 </table> 8923 </div> 8924 </root> 8925 }; 8926 8927 auto doc = new Document(cast(string)html); 8928 // this should select the second table row, but... 8929 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8930 assert(rd !is null); 8931 assert(rd.href == "/reviews/8832"); 8932 8933 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8934 assert(rd !is null); 8935 assert(rd.href == "/reviews/8832"); 8936 } 8937 8938 unittest { 8939 try { 8940 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 8941 assert(0); 8942 } catch(Exception e) { 8943 // good; it should throw an exception, not an error. 8944 } 8945 } 8946 8947 /* 8948 Copyright: Adam D. Ruppe, 2010 - 2021 8949 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8950 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8951 8952 Copyright Adam D. Ruppe 2010-2021. 8953 Distributed under the Boost Software License, Version 1.0. 8954 (See accompanying file LICENSE_1_0.txt or copy at 8955 http://www.boost.org/LICENSE_1_0.txt) 8956 */ 8957 8958