1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will be removed from Phobos in 2.101.0. 6 If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD)) 7 8 Classes and functions for creating and parsing XML 9 10 The basic architecture of this module is that there are standalone functions, 11 classes for constructing an XML document from scratch (Tag, Element and 12 Document), and also classes for parsing a pre-existing XML file (ElementParser 13 and DocumentParser). The parsing classes <i>may</i> be used to build a 14 Document, but that is not their primary purpose. The handling capabilities of 15 DocumentParser and ElementParser are sufficiently customizable that you can 16 make them do pretty much whatever you want. 17 18 Example: This example creates a DOM (Document Object Model) tree 19 from an XML file. 20 ------------------------------------------------------------------------------ 21 import std.xml; 22 import std.stdio; 23 import std.string; 24 import std.file; 25 26 // books.xml is used in various samples throughout the Microsoft XML Core 27 // Services (MSXML) SDK. 28 // 29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 30 31 void main() 32 { 33 string s = cast(string) std.file.read("books.xml"); 34 35 // Check for well-formedness 36 check(s); 37 38 // Make a DOM tree 39 auto doc = new Document(s); 40 41 // Plain-print it 42 writeln(doc); 43 } 44 ------------------------------------------------------------------------------ 45 46 Example: This example does much the same thing, except that the file is 47 deconstructed and reconstructed by hand. This is more work, but the 48 techniques involved offer vastly more power. 49 ------------------------------------------------------------------------------ 50 import std.xml; 51 import std.stdio; 52 import std.string; 53 54 struct Book 55 { 56 string id; 57 string author; 58 string title; 59 string genre; 60 string price; 61 string pubDate; 62 string description; 63 } 64 65 void main() 66 { 67 string s = cast(string) std.file.read("books.xml"); 68 69 // Check for well-formedness 70 check(s); 71 72 // Take it apart 73 Book[] books; 74 75 auto xml = new DocumentParser(s); 76 xml.onStartTag["book"] = (ElementParser xml) 77 { 78 Book book; 79 book.id = xml.tag.attr["id"]; 80 81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 87 88 xml.parse(); 89 90 books ~= book; 91 }; 92 xml.parse(); 93 94 // Put it back together again; 95 auto doc = new Document(new Tag("catalog")); 96 foreach (book;books) 97 { 98 auto element = new Element("book"); 99 element.tag.attr["id"] = book.id; 100 101 element ~= new Element("author", book.author); 102 element ~= new Element("title", book.title); 103 element ~= new Element("genre", book.genre); 104 element ~= new Element("price", book.price); 105 element ~= new Element("publish-date",book.pubDate); 106 element ~= new Element("description", book.description); 107 108 doc ~= element; 109 } 110 111 // Pretty-print it 112 writefln(join(doc.pretty(3),"\n")); 113 } 114 ------------------------------------------------------------------------------- 115 Copyright: Copyright Janice Caron 2008 - 2009. 116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 117 Authors: Janice Caron 118 Source: $(PHOBOSSRC std/xml.d) 119 */ 120 /* 121 Copyright Janice Caron 2008 - 2009. 122 Distributed under the Boost Software License, Version 1.0. 123 (See accompanying file LICENSE_1_0.txt or copy at 124 http://www.boost.org/LICENSE_1_0.txt) 125 */ 126 deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD") 127 module std.xml; 128 129 enum cdata = "<![CDATA["; 130 131 /** 132 * Returns true if the character is a character according to the XML standard 133 * 134 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 135 * 136 * Params: 137 * c = the character to be tested 138 */ 139 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 140 { 141 if (c <= 0xD7FF) 142 { 143 if (c >= 0x20) 144 return true; 145 switch (c) 146 { 147 case 0xA: 148 case 0x9: 149 case 0xD: 150 return true; 151 default: 152 return false; 153 } 154 } 155 else if (0xE000 <= c && c <= 0x10FFFF) 156 { 157 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 158 return true; 159 } 160 return false; 161 } 162 163 @safe @nogc nothrow pure unittest 164 { 165 assert(!isChar(cast(dchar) 0x8)); 166 assert( isChar(cast(dchar) 0x9)); 167 assert( isChar(cast(dchar) 0xA)); 168 assert(!isChar(cast(dchar) 0xB)); 169 assert(!isChar(cast(dchar) 0xC)); 170 assert( isChar(cast(dchar) 0xD)); 171 assert(!isChar(cast(dchar) 0xE)); 172 assert(!isChar(cast(dchar) 0x1F)); 173 assert( isChar(cast(dchar) 0x20)); 174 assert( isChar('J')); 175 assert( isChar(cast(dchar) 0xD7FF)); 176 assert(!isChar(cast(dchar) 0xD800)); 177 assert(!isChar(cast(dchar) 0xDFFF)); 178 assert( isChar(cast(dchar) 0xE000)); 179 assert( isChar(cast(dchar) 0xFFFD)); 180 assert(!isChar(cast(dchar) 0xFFFE)); 181 assert(!isChar(cast(dchar) 0xFFFF)); 182 assert( isChar(cast(dchar) 0x10000)); 183 assert( isChar(cast(dchar) 0x10FFFF)); 184 assert(!isChar(cast(dchar) 0x110000)); 185 186 debug (stdxml_TestHardcodedChecks) 187 { 188 foreach (c; 0 .. dchar.max + 1) 189 assert(isChar(c) == lookup(CharTable, c)); 190 } 191 } 192 193 /** 194 * Returns true if the character is whitespace according to the XML standard 195 * 196 * Only the following characters are considered whitespace in XML - space, tab, 197 * carriage return and linefeed 198 * 199 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 200 * 201 * Params: 202 * c = the character to be tested 203 */ 204 bool isSpace(dchar c) @safe @nogc pure nothrow 205 { 206 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 207 } 208 209 /** 210 * Returns true if the character is a digit according to the XML standard 211 * 212 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 213 * 214 * Params: 215 * c = the character to be tested 216 */ 217 bool isDigit(dchar c) @safe @nogc pure nothrow 218 { 219 if (c <= 0x0039 && c >= 0x0030) 220 return true; 221 else 222 return lookup(DigitTable,c); 223 } 224 225 @safe @nogc nothrow pure unittest 226 { 227 debug (stdxml_TestHardcodedChecks) 228 { 229 foreach (c; 0 .. dchar.max + 1) 230 assert(isDigit(c) == lookup(DigitTable, c)); 231 } 232 } 233 234 /** 235 * Returns true if the character is a letter according to the XML standard 236 * 237 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 238 * 239 * Params: 240 * c = the character to be tested 241 */ 242 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 243 { 244 return isIdeographic(c) || isBaseChar(c); 245 } 246 247 /** 248 * Returns true if the character is an ideographic character according to the 249 * XML standard 250 * 251 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 252 * 253 * Params: 254 * c = the character to be tested 255 */ 256 bool isIdeographic(dchar c) @safe @nogc nothrow pure 257 { 258 if (c == 0x3007) 259 return true; 260 if (c <= 0x3029 && c >= 0x3021 ) 261 return true; 262 if (c <= 0x9FA5 && c >= 0x4E00) 263 return true; 264 return false; 265 } 266 267 @safe @nogc nothrow pure unittest 268 { 269 assert(isIdeographic('\u4E00')); 270 assert(isIdeographic('\u9FA5')); 271 assert(isIdeographic('\u3007')); 272 assert(isIdeographic('\u3021')); 273 assert(isIdeographic('\u3029')); 274 275 debug (stdxml_TestHardcodedChecks) 276 { 277 foreach (c; 0 .. dchar.max + 1) 278 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 279 } 280 } 281 282 /** 283 * Returns true if the character is a base character according to the XML 284 * standard 285 * 286 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 287 * 288 * Params: 289 * c = the character to be tested 290 */ 291 bool isBaseChar(dchar c) @safe @nogc nothrow pure 292 { 293 return lookup(BaseCharTable,c); 294 } 295 296 /** 297 * Returns true if the character is a combining character according to the 298 * XML standard 299 * 300 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 301 * 302 * Params: 303 * c = the character to be tested 304 */ 305 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 306 { 307 return lookup(CombiningCharTable,c); 308 } 309 310 /** 311 * Returns true if the character is an extender according to the XML standard 312 * 313 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 314 * 315 * Params: 316 * c = the character to be tested 317 */ 318 bool isExtender(dchar c) @safe @nogc nothrow pure 319 { 320 return lookup(ExtenderTable,c); 321 } 322 323 /** 324 * Encodes a string by replacing all characters which need to be escaped with 325 * appropriate predefined XML entities. 326 * 327 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 328 * and greater-than), and similarly, decode() unescapes them. These functions 329 * are provided for convenience only. You do not need to use them when using 330 * the std.xml classes, because then all the encoding and decoding will be done 331 * for you automatically. 332 * 333 * If the string is not modified, the original will be returned. 334 * 335 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 336 * 337 * Params: 338 * s = The string to be encoded 339 * 340 * Returns: The encoded string 341 * 342 * Example: 343 * -------------- 344 * writefln(encode("a > b")); // writes "a > b" 345 * -------------- 346 */ 347 S encode(S)(S s) 348 { 349 import std.array : appender; 350 351 string r; 352 size_t lastI; 353 auto result = appender!S(); 354 355 foreach (i, c; s) 356 { 357 switch (c) 358 { 359 case '&': r = "&"; break; 360 case '"': r = """; break; 361 case '\'': r = "'"; break; 362 case '<': r = "<"; break; 363 case '>': r = ">"; break; 364 default: continue; 365 } 366 // Replace with r 367 result.put(s[lastI .. i]); 368 result.put(r); 369 lastI = i + 1; 370 } 371 372 if (!result.data.ptr) return s; 373 result.put(s[lastI .. $]); 374 return result.data; 375 } 376 377 @safe pure unittest 378 { 379 auto s = "hello"; 380 assert(encode(s) is s); 381 assert(encode("a > b") == "a > b", encode("a > b")); 382 assert(encode("a < b") == "a < b"); 383 assert(encode("don't") == "don't"); 384 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 385 assert(encode("cat & dog") == "cat & dog"); 386 } 387 388 /** 389 * Mode to use for decoding. 390 * 391 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 392 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 393 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 394 */ 395 enum DecodeMode 396 { 397 NONE, LOOSE, STRICT 398 } 399 400 /** 401 * Decodes a string by unescaping all predefined XML entities. 402 * 403 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 404 * and greater-than), and similarly, decode() unescapes them. These functions 405 * are provided for convenience only. You do not need to use them when using 406 * the std.xml classes, because then all the encoding and decoding will be done 407 * for you automatically. 408 * 409 * This function decodes the entities &amp;, &quot;, &apos;, 410 * &lt; and &gt, 411 * as well as decimal and hexadecimal entities such as &#x20AC; 412 * 413 * If the string does not contain an ampersand, the original will be returned. 414 * 415 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 416 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 417 * (decode, and throw a DecodeException in the event of an error). 418 * 419 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 420 * 421 * Params: 422 * s = The string to be decoded 423 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 424 * 425 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 426 * 427 * Returns: The decoded string 428 * 429 * Example: 430 * -------------- 431 * writefln(decode("a > b")); // writes "a > b" 432 * -------------- 433 */ 434 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 435 { 436 import std.algorithm.searching : startsWith; 437 438 if (mode == DecodeMode.NONE) return s; 439 440 string buffer; 441 foreach (ref i; 0 .. s.length) 442 { 443 char c = s[i]; 444 if (c != '&') 445 { 446 if (buffer.length != 0) buffer ~= c; 447 } 448 else 449 { 450 if (buffer.length == 0) 451 { 452 buffer = s[0 .. i].dup; 453 } 454 if (startsWith(s[i..$],"&#")) 455 { 456 try 457 { 458 dchar d; 459 string t = s[i..$]; 460 checkCharRef(t, d); 461 char[4] temp; 462 import std.utf : encode; 463 buffer ~= temp[0 .. encode(temp, d)]; 464 i = s.length - t.length - 1; 465 } 466 catch (Err e) 467 { 468 if (mode == DecodeMode.STRICT) 469 throw new DecodeException("Unescaped &"); 470 buffer ~= '&'; 471 } 472 } 473 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 474 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 475 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 476 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 477 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 478 else 479 { 480 if (mode == DecodeMode.STRICT) 481 throw new DecodeException("Unescaped &"); 482 buffer ~= '&'; 483 } 484 } 485 } 486 return (buffer.length == 0) ? s : buffer; 487 } 488 489 @safe pure unittest 490 { 491 void assertNot(string s) pure 492 { 493 bool b = false; 494 try { decode(s,DecodeMode.STRICT); } 495 catch (DecodeException e) { b = true; } 496 assert(b,s); 497 } 498 499 // Assert that things that should work, do 500 auto s = "hello"; 501 assert(decode(s, DecodeMode.STRICT) is s); 502 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 503 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 504 assert(decode("don't", DecodeMode.STRICT) == "don't"); 505 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 506 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 507 assert(decode("*", DecodeMode.STRICT) == "*"); 508 assert(decode("*", DecodeMode.STRICT) == "*"); 509 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 510 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 511 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 512 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 513 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 514 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 515 516 // Assert that things that shouldn't work, don't 517 assertNot("cat & dog"); 518 assertNot("a > b"); 519 assertNot("&#;"); 520 assertNot("&#x;"); 521 assertNot("G;"); 522 assertNot("G;"); 523 } 524 525 /** 526 * Class representing an XML document. 527 * 528 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 529 * 530 */ 531 class Document : Element 532 { 533 /** 534 * Contains all text which occurs before the root element. 535 * Defaults to <?xml version="1.0"?> 536 */ 537 string prolog = "<?xml version=\"1.0\"?>"; 538 /** 539 * Contains all text which occurs after the root element. 540 * Defaults to the empty string 541 */ 542 string epilog; 543 544 /** 545 * Constructs a Document by parsing XML text. 546 * 547 * This function creates a complete DOM (Document Object Model) tree. 548 * 549 * The input to this function MUST be valid XML. 550 * This is enforced by DocumentParser's in contract. 551 * 552 * Params: 553 * s = the complete XML text. 554 */ 555 this(string s) 556 in 557 { 558 assert(s.length != 0); 559 } 560 do 561 { 562 auto xml = new DocumentParser(s); 563 string tagString = xml.tag.tagString; 564 565 this(xml.tag); 566 prolog = s[0 .. tagString.ptr - s.ptr]; 567 parse(xml); 568 epilog = *xml.s; 569 } 570 571 /** 572 * Constructs a Document from a Tag. 573 * 574 * Params: 575 * tag = the start tag of the document. 576 */ 577 this(const(Tag) tag) 578 { 579 super(tag); 580 } 581 582 const 583 { 584 /** 585 * Compares two Documents for equality 586 * 587 * Example: 588 * -------------- 589 * Document d1,d2; 590 * if (d1 == d2) { } 591 * -------------- 592 */ 593 override bool opEquals(scope const Object o) const 594 { 595 const doc = toType!(const Document)(o); 596 return prolog == doc.prolog 597 && (cast(const) this).Element.opEquals(cast(const) doc) 598 && epilog == doc.epilog; 599 } 600 601 /** 602 * Compares two Documents 603 * 604 * You should rarely need to call this function. It exists so that 605 * Documents can be used as associative array keys. 606 * 607 * Example: 608 * -------------- 609 * Document d1,d2; 610 * if (d1 < d2) { } 611 * -------------- 612 */ 613 override int opCmp(scope const Object o) scope const 614 { 615 const doc = toType!(const Document)(o); 616 if (prolog != doc.prolog) 617 return prolog < doc.prolog ? -1 : 1; 618 if (int cmp = this.Element.opCmp(doc)) 619 return cmp; 620 if (epilog != doc.epilog) 621 return epilog < doc.epilog ? -1 : 1; 622 return 0; 623 } 624 625 /** 626 * Returns the hash of a Document 627 * 628 * You should rarely need to call this function. It exists so that 629 * Documents can be used as associative array keys. 630 */ 631 override size_t toHash() scope const @trusted 632 { 633 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 634 } 635 636 /** 637 * Returns the string representation of a Document. (That is, the 638 * complete XML of a document). 639 */ 640 override string toString() scope const @safe 641 { 642 return prolog ~ super.toString() ~ epilog; 643 } 644 } 645 } 646 647 @system unittest 648 { 649 // https://issues.dlang.org/show_bug.cgi?id=14966 650 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 651 652 auto a = new Document(xml); 653 auto b = new Document(xml); 654 assert(a == b); 655 assert(!(a < b)); 656 int[Document] aa; 657 aa[a] = 1; 658 assert(aa[b] == 1); 659 660 b ~= new Element("b"); 661 assert(a < b); 662 assert(b > a); 663 } 664 665 /** 666 * Class representing an XML element. 667 * 668 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 669 */ 670 class Element : Item 671 { 672 Tag tag; /// The start tag of the element 673 Item[] items; /// The element's items 674 Text[] texts; /// The element's text items 675 CData[] cdatas; /// The element's CData items 676 Comment[] comments; /// The element's comments 677 ProcessingInstruction[] pis; /// The element's processing instructions 678 Element[] elements; /// The element's child elements 679 680 /** 681 * Constructs an Element given a name and a string to be used as a Text 682 * interior. 683 * 684 * Params: 685 * name = the name of the element. 686 * interior = (optional) the string interior. 687 * 688 * Example: 689 * ------------------------------------------------------- 690 * auto element = new Element("title","Serenity") 691 * // constructs the element <title>Serenity</title> 692 * ------------------------------------------------------- 693 */ 694 this(string name, string interior=null) @safe pure 695 { 696 this(new Tag(name)); 697 if (interior.length != 0) opOpAssign!("~")(new Text(interior)); 698 } 699 700 /** 701 * Constructs an Element from a Tag. 702 * 703 * Params: 704 * tag_ = the start or empty tag of the element. 705 */ 706 this(const(Tag) tag_) @safe pure 707 { 708 this.tag = new Tag(tag_.name); 709 tag.type = TagType.EMPTY; 710 foreach (k,v;tag_.attr) tag.attr[k] = v; 711 tag.tagString = tag_.tagString; 712 } 713 714 /** 715 * Append a text item to the interior of this element 716 * 717 * Params: 718 * item = the item you wish to append. 719 * 720 * Example: 721 * -------------- 722 * Element element; 723 * element ~= new Text("hello"); 724 * -------------- 725 */ 726 void opOpAssign(string op)(Text item) @safe pure 727 if (op == "~") 728 { 729 texts ~= item; 730 appendItem(item); 731 } 732 733 /** 734 * Append a CData item to the interior of this element 735 * 736 * Params: 737 * item = the item you wish to append. 738 * 739 * Example: 740 * -------------- 741 * Element element; 742 * element ~= new CData("hello"); 743 * -------------- 744 */ 745 void opOpAssign(string op)(CData item) @safe pure 746 if (op == "~") 747 { 748 cdatas ~= item; 749 appendItem(item); 750 } 751 752 /** 753 * Append a comment to the interior of this element 754 * 755 * Params: 756 * item = the item you wish to append. 757 * 758 * Example: 759 * -------------- 760 * Element element; 761 * element ~= new Comment("hello"); 762 * -------------- 763 */ 764 void opOpAssign(string op)(Comment item) @safe pure 765 if (op == "~") 766 { 767 comments ~= item; 768 appendItem(item); 769 } 770 771 /** 772 * Append a processing instruction to the interior of this element 773 * 774 * Params: 775 * item = the item you wish to append. 776 * 777 * Example: 778 * -------------- 779 * Element element; 780 * element ~= new ProcessingInstruction("hello"); 781 * -------------- 782 */ 783 void opOpAssign(string op)(ProcessingInstruction item) @safe pure 784 if (op == "~") 785 { 786 pis ~= item; 787 appendItem(item); 788 } 789 790 /** 791 * Append a complete element to the interior of this element 792 * 793 * Params: 794 * item = the item you wish to append. 795 * 796 * Example: 797 * -------------- 798 * Element element; 799 * Element other = new Element("br"); 800 * element ~= other; 801 * // appends element representing <br /> 802 * -------------- 803 */ 804 void opOpAssign(string op)(Element item) @safe pure 805 if (op == "~") 806 { 807 elements ~= item; 808 appendItem(item); 809 } 810 811 private void appendItem(Item item) @safe pure 812 { 813 items ~= item; 814 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 815 tag.type = TagType.START; 816 } 817 818 private void parse(ElementParser xml) 819 { 820 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); }; 821 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); }; 822 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); }; 823 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); }; 824 825 xml.onStartTag[null] = (ElementParser xml) 826 { 827 auto e = new Element(xml.tag); 828 e.parse(xml); 829 opOpAssign!("~")(e); 830 }; 831 832 xml.parse(); 833 } 834 835 /** 836 * Compares two Elements for equality 837 * 838 * Example: 839 * -------------- 840 * Element e1,e2; 841 * if (e1 == e2) { } 842 * -------------- 843 */ 844 override bool opEquals(scope const Object o) const 845 { 846 const element = toType!(const Element)(o); 847 immutable len = items.length; 848 if (len != element.items.length) return false; 849 foreach (i; 0 .. len) 850 { 851 if (!items[i].opEquals(element.items[i])) return false; 852 } 853 return true; 854 } 855 856 /** 857 * Compares two Elements 858 * 859 * You should rarely need to call this function. It exists so that Elements 860 * can be used as associative array keys. 861 * 862 * Example: 863 * -------------- 864 * Element e1,e2; 865 * if (e1 < e2) { } 866 * -------------- 867 */ 868 override int opCmp(scope const Object o) @safe const 869 { 870 const element = toType!(const Element)(o); 871 for (uint i=0; ; ++i) 872 { 873 if (i == items.length && i == element.items.length) return 0; 874 if (i == items.length) return -1; 875 if (i == element.items.length) return 1; 876 if (!items[i].opEquals(element.items[i])) 877 return items[i].opCmp(element.items[i]); 878 } 879 } 880 881 /** 882 * Returns the hash of an Element 883 * 884 * You should rarely need to call this function. It exists so that Elements 885 * can be used as associative array keys. 886 */ 887 override size_t toHash() scope const @safe 888 { 889 size_t hash = tag.toHash(); 890 foreach (item;items) hash += item.toHash(); 891 return hash; 892 } 893 894 const 895 { 896 /** 897 * Returns the decoded interior of an element. 898 * 899 * The element is assumed to contain text <i>only</i>. So, for 900 * example, given XML such as "<title>Good &amp; 901 * Bad</title>", will return "Good & Bad". 902 * 903 * Params: 904 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 905 * 906 * Throws: DecodeException if decode fails 907 */ 908 string text(DecodeMode mode=DecodeMode.LOOSE) 909 { 910 string buffer; 911 foreach (item;items) 912 { 913 Text t = cast(Text) item; 914 if (t is null) throw new DecodeException(item.toString()); 915 buffer ~= decode(t.toString(),mode); 916 } 917 return buffer; 918 } 919 920 /** 921 * Returns an indented string representation of this item 922 * 923 * Params: 924 * indent = (optional) number of spaces by which to indent this 925 * element. Defaults to 2. 926 */ 927 override string[] pretty(uint indent=2) scope 928 { 929 import std.algorithm.searching : count; 930 import std..string : rightJustify; 931 932 if (isEmptyXML) return [ tag.toEmptyString() ]; 933 934 if (items.length == 1) 935 { 936 auto t = cast(const(Text))(items[0]); 937 if (t !is null) 938 { 939 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 940 } 941 } 942 943 string[] a = [ tag.toStartString() ]; 944 foreach (item;items) 945 { 946 string[] b = item.pretty(indent); 947 foreach (s;b) 948 { 949 a ~= rightJustify(s,count(s) + indent); 950 } 951 } 952 a ~= tag.toEndString(); 953 return a; 954 } 955 956 /** 957 * Returns the string representation of an Element 958 * 959 * Example: 960 * -------------- 961 * auto element = new Element("br"); 962 * writefln(element.toString()); // writes "<br />" 963 * -------------- 964 */ 965 override string toString() scope @safe 966 { 967 if (isEmptyXML) return tag.toEmptyString(); 968 969 string buffer = tag.toStartString(); 970 foreach (item;items) { buffer ~= item.toString(); } 971 buffer ~= tag.toEndString(); 972 return buffer; 973 } 974 975 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 976 } 977 } 978 979 /** 980 * Tag types. 981 * 982 * $(DDOC_ENUM_MEMBERS START) Used for start tags 983 * $(DDOC_ENUM_MEMBERS END) Used for end tags 984 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 985 * 986 */ 987 enum TagType { START, END, EMPTY } 988 989 /** 990 * Class representing an XML tag. 991 * 992 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 993 * 994 * The class invariant guarantees 995 * <ul> 996 * <li> that $(B type) is a valid enum TagType value</li> 997 * <li> that $(B name) consists of valid characters</li> 998 * <li> that each attribute name consists of valid characters</li> 999 * </ul> 1000 */ 1001 class Tag 1002 { 1003 TagType type = TagType.START; /// Type of tag 1004 string name; /// Tag name 1005 string[string] attr; /// Associative array of attributes 1006 private string tagString; 1007 1008 invariant() 1009 { 1010 string s; 1011 string t; 1012 1013 assert(type == TagType.START 1014 || type == TagType.END 1015 || type == TagType.EMPTY); 1016 1017 s = name; 1018 try { checkName(s,t); } 1019 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1020 1021 foreach (k,v;attr) 1022 { 1023 s = k; 1024 try { checkName(s,t); } 1025 catch (Err e) 1026 { assert(false,"Invalid attribute name:" ~ e.toString()); } 1027 } 1028 } 1029 1030 /** 1031 * Constructs an instance of Tag with a specified name and type 1032 * 1033 * The constructor does not initialize the attributes. To initialize the 1034 * attributes, you access the $(B attr) member variable. 1035 * 1036 * Params: 1037 * name = the Tag's name 1038 * type = (optional) the Tag's type. If omitted, defaults to 1039 * TagType.START. 1040 * 1041 * Example: 1042 * -------------- 1043 * auto tag = new Tag("img",Tag.EMPTY); 1044 * tag.attr["src"] = "http://example.com/example.jpg"; 1045 * -------------- 1046 */ 1047 this(string name, TagType type=TagType.START) @safe pure 1048 { 1049 this.name = name; 1050 this.type = type; 1051 } 1052 1053 /* Private constructor (so don't ddoc this!) 1054 * 1055 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1056 * 1057 * The string is passed by reference, and is advanced over all characters 1058 * consumed. 1059 * 1060 * The second parameter is a dummy parameter only, required solely to 1061 * distinguish this constructor from the public one. 1062 */ 1063 private this(ref string s, bool dummy) @safe pure 1064 { 1065 import std.algorithm.searching : countUntil; 1066 import std.ascii : isWhite; 1067 import std.utf : byCodeUnit; 1068 1069 tagString = s; 1070 try 1071 { 1072 reqc(s,'<'); 1073 if (optc(s,'/')) type = TagType.END; 1074 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1075 name = s[0 .. i]; 1076 s = s[i .. $]; 1077 1078 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1079 s = s[i .. $]; 1080 1081 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1082 { 1083 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1084 string key = s[0 .. i]; 1085 s = s[i .. $]; 1086 1087 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1088 s = s[i .. $]; 1089 reqc(s,'='); 1090 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1091 s = s[i .. $]; 1092 1093 immutable char quote = requireOneOf(s,"'\""); 1094 i = s.byCodeUnit.countUntil(quote); 1095 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1096 s = s[i .. $]; 1097 reqc(s,quote); 1098 1099 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1100 s = s[i .. $]; 1101 attr[key] = val; 1102 } 1103 if (optc(s,'/')) 1104 { 1105 if (type == TagType.END) throw new TagException(""); 1106 type = TagType.EMPTY; 1107 } 1108 reqc(s,'>'); 1109 tagString.length = tagString.length - s.length; 1110 } 1111 catch (XMLException e) 1112 { 1113 tagString.length = tagString.length - s.length; 1114 throw new TagException(tagString); 1115 } 1116 } 1117 1118 const 1119 { 1120 /** 1121 * Compares two Tags for equality 1122 * 1123 * You should rarely need to call this function. It exists so that Tags 1124 * can be used as associative array keys. 1125 * 1126 * Example: 1127 * -------------- 1128 * Tag tag1,tag2 1129 * if (tag1 == tag2) { } 1130 * -------------- 1131 */ 1132 override bool opEquals(scope Object o) 1133 { 1134 const tag = toType!(const Tag)(o); 1135 return 1136 (name != tag.name) ? false : ( 1137 (attr != tag.attr) ? false : ( 1138 (type != tag.type) ? false : ( 1139 true ))); 1140 } 1141 1142 /** 1143 * Compares two Tags 1144 * 1145 * Example: 1146 * -------------- 1147 * Tag tag1,tag2 1148 * if (tag1 < tag2) { } 1149 * -------------- 1150 */ 1151 override int opCmp(Object o) 1152 { 1153 const tag = toType!(const Tag)(o); 1154 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1155 return 1156 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1157 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1158 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1159 0 ))); 1160 } 1161 1162 /** 1163 * Returns the hash of a Tag 1164 * 1165 * You should rarely need to call this function. It exists so that Tags 1166 * can be used as associative array keys. 1167 */ 1168 override size_t toHash() 1169 { 1170 return .hashOf(name); 1171 } 1172 1173 /** 1174 * Returns the string representation of a Tag 1175 * 1176 * Example: 1177 * -------------- 1178 * auto tag = new Tag("book",TagType.START); 1179 * writefln(tag.toString()); // writes "<book>" 1180 * -------------- 1181 */ 1182 override string toString() @safe 1183 { 1184 if (isEmpty) return toEmptyString(); 1185 return (isEnd) ? toEndString() : toStartString(); 1186 } 1187 1188 private 1189 { 1190 string toNonEndString() @safe 1191 { 1192 import std.format : format; 1193 1194 string s = "<" ~ name; 1195 foreach (key,val;attr) 1196 s ~= format(" %s=\"%s\"",key,encode(val)); 1197 return s; 1198 } 1199 1200 string toStartString() @safe { return toNonEndString() ~ ">"; } 1201 1202 string toEndString() @safe { return "</" ~ name ~ ">"; } 1203 1204 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1205 } 1206 1207 /** 1208 * Returns true if the Tag is a start tag 1209 * 1210 * Example: 1211 * -------------- 1212 * if (tag.isStart) { } 1213 * -------------- 1214 */ 1215 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1216 1217 /** 1218 * Returns true if the Tag is an end tag 1219 * 1220 * Example: 1221 * -------------- 1222 * if (tag.isEnd) { } 1223 * -------------- 1224 */ 1225 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1226 1227 /** 1228 * Returns true if the Tag is an empty tag 1229 * 1230 * Example: 1231 * -------------- 1232 * if (tag.isEmpty) { } 1233 * -------------- 1234 */ 1235 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1236 } 1237 } 1238 1239 /** 1240 * Class representing a comment 1241 */ 1242 class Comment : Item 1243 { 1244 private string content; 1245 1246 /** 1247 * Construct a comment 1248 * 1249 * Params: 1250 * content = the body of the comment 1251 * 1252 * Throws: CommentException if the comment body is illegal (contains "--" 1253 * or exactly equals "-") 1254 * 1255 * Example: 1256 * -------------- 1257 * auto item = new Comment("This is a comment"); 1258 * // constructs <!--This is a comment--> 1259 * -------------- 1260 */ 1261 this(string content) @safe pure 1262 { 1263 import std..string : indexOf; 1264 1265 if (content == "-" || content.indexOf("--") != -1) 1266 throw new CommentException(content); 1267 this.content = content; 1268 } 1269 1270 /** 1271 * Compares two comments for equality 1272 * 1273 * Example: 1274 * -------------- 1275 * Comment item1,item2; 1276 * if (item1 == item2) { } 1277 * -------------- 1278 */ 1279 override bool opEquals(scope const Object o) const 1280 { 1281 const item = toType!(const Item)(o); 1282 const t = cast(const Comment) item; 1283 return t !is null && content == t.content; 1284 } 1285 1286 /** 1287 * Compares two comments 1288 * 1289 * You should rarely need to call this function. It exists so that Comments 1290 * can be used as associative array keys. 1291 * 1292 * Example: 1293 * -------------- 1294 * Comment item1,item2; 1295 * if (item1 < item2) { } 1296 * -------------- 1297 */ 1298 override int opCmp(scope const Object o) scope const 1299 { 1300 const item = toType!(const Item)(o); 1301 const t = cast(const Comment) item; 1302 return t !is null && (content != t.content 1303 ? (content < t.content ? -1 : 1 ) : 0 ); 1304 } 1305 1306 /** 1307 * Returns the hash of a Comment 1308 * 1309 * You should rarely need to call this function. It exists so that Comments 1310 * can be used as associative array keys. 1311 */ 1312 override size_t toHash() scope const nothrow { return hash(content); } 1313 1314 /** 1315 * Returns a string representation of this comment 1316 */ 1317 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1318 1319 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1320 } 1321 1322 // https://issues.dlang.org/show_bug.cgi?id=16241 1323 @safe unittest 1324 { 1325 import std.exception : assertThrown; 1326 auto c = new Comment("=="); 1327 assert(c.content == "=="); 1328 assertThrown!CommentException(new Comment("--")); 1329 } 1330 1331 /** 1332 * Class representing a Character Data section 1333 */ 1334 class CData : Item 1335 { 1336 private string content; 1337 1338 /** 1339 * Construct a character data section 1340 * 1341 * Params: 1342 * content = the body of the character data segment 1343 * 1344 * Throws: CDataException if the segment body is illegal (contains "]]>") 1345 * 1346 * Example: 1347 * -------------- 1348 * auto item = new CData("<b>hello</b>"); 1349 * // constructs <![CDATA[<b>hello</b>]]> 1350 * -------------- 1351 */ 1352 this(string content) @safe pure 1353 { 1354 import std..string : indexOf; 1355 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1356 this.content = content; 1357 } 1358 1359 /** 1360 * Compares two CDatas for equality 1361 * 1362 * Example: 1363 * -------------- 1364 * CData item1,item2; 1365 * if (item1 == item2) { } 1366 * -------------- 1367 */ 1368 override bool opEquals(scope const Object o) const 1369 { 1370 const item = toType!(const Item)(o); 1371 const t = cast(const CData) item; 1372 return t !is null && content == t.content; 1373 } 1374 1375 /** 1376 * Compares two CDatas 1377 * 1378 * You should rarely need to call this function. It exists so that CDatas 1379 * can be used as associative array keys. 1380 * 1381 * Example: 1382 * -------------- 1383 * CData item1,item2; 1384 * if (item1 < item2) { } 1385 * -------------- 1386 */ 1387 override int opCmp(scope const Object o) scope const 1388 { 1389 const item = toType!(const Item)(o); 1390 const t = cast(const CData) item; 1391 return t !is null && (content != t.content 1392 ? (content < t.content ? -1 : 1 ) : 0 ); 1393 } 1394 1395 /** 1396 * Returns the hash of a CData 1397 * 1398 * You should rarely need to call this function. It exists so that CDatas 1399 * can be used as associative array keys. 1400 */ 1401 override size_t toHash() scope const nothrow { return hash(content); } 1402 1403 /** 1404 * Returns a string representation of this CData section 1405 */ 1406 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1407 1408 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1409 } 1410 1411 /** 1412 * Class representing a text (aka Parsed Character Data) section 1413 */ 1414 class Text : Item 1415 { 1416 private string content; 1417 1418 /** 1419 * Construct a text (aka PCData) section 1420 * 1421 * Params: 1422 * content = the text. This function encodes the text before 1423 * insertion, so it is safe to insert any text 1424 * 1425 * Example: 1426 * -------------- 1427 * auto Text = new CData("a < b"); 1428 * // constructs a < b 1429 * -------------- 1430 */ 1431 this(string content) @safe pure 1432 { 1433 this.content = encode(content); 1434 } 1435 1436 /** 1437 * Compares two text sections for equality 1438 * 1439 * Example: 1440 * -------------- 1441 * Text item1,item2; 1442 * if (item1 == item2) { } 1443 * -------------- 1444 */ 1445 override bool opEquals(scope const Object o) const 1446 { 1447 const item = toType!(const Item)(o); 1448 const t = cast(const Text) item; 1449 return t !is null && content == t.content; 1450 } 1451 1452 /** 1453 * Compares two text sections 1454 * 1455 * You should rarely need to call this function. It exists so that Texts 1456 * can be used as associative array keys. 1457 * 1458 * Example: 1459 * -------------- 1460 * Text item1,item2; 1461 * if (item1 < item2) { } 1462 * -------------- 1463 */ 1464 override int opCmp(scope const Object o) scope const 1465 { 1466 const item = toType!(const Item)(o); 1467 const t = cast(const Text) item; 1468 return t !is null 1469 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1470 } 1471 1472 /** 1473 * Returns the hash of a text section 1474 * 1475 * You should rarely need to call this function. It exists so that Texts 1476 * can be used as associative array keys. 1477 */ 1478 override size_t toHash() scope const nothrow { return hash(content); } 1479 1480 /** 1481 * Returns a string representation of this Text section 1482 */ 1483 override string toString() scope const @safe @nogc pure nothrow { return content; } 1484 1485 /** 1486 * Returns true if the content is the empty string 1487 */ 1488 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1489 } 1490 1491 /** 1492 * Class representing an XML Instruction section 1493 */ 1494 class XMLInstruction : Item 1495 { 1496 private string content; 1497 1498 /** 1499 * Construct an XML Instruction section 1500 * 1501 * Params: 1502 * content = the body of the instruction segment 1503 * 1504 * Throws: XIException if the segment body is illegal (contains ">") 1505 * 1506 * Example: 1507 * -------------- 1508 * auto item = new XMLInstruction("ATTLIST"); 1509 * // constructs <!ATTLIST> 1510 * -------------- 1511 */ 1512 this(string content) @safe pure 1513 { 1514 import std..string : indexOf; 1515 if (content.indexOf(">") != -1) throw new XIException(content); 1516 this.content = content; 1517 } 1518 1519 /** 1520 * Compares two XML instructions for equality 1521 * 1522 * Example: 1523 * -------------- 1524 * XMLInstruction item1,item2; 1525 * if (item1 == item2) { } 1526 * -------------- 1527 */ 1528 override bool opEquals(scope const Object o) const 1529 { 1530 const item = toType!(const Item)(o); 1531 const t = cast(const XMLInstruction) item; 1532 return t !is null && content == t.content; 1533 } 1534 1535 /** 1536 * Compares two XML instructions 1537 * 1538 * You should rarely need to call this function. It exists so that 1539 * XmlInstructions can be used as associative array keys. 1540 * 1541 * Example: 1542 * -------------- 1543 * XMLInstruction item1,item2; 1544 * if (item1 < item2) { } 1545 * -------------- 1546 */ 1547 override int opCmp(scope const Object o) scope const 1548 { 1549 const item = toType!(const Item)(o); 1550 const t = cast(const XMLInstruction) item; 1551 return t !is null 1552 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1553 } 1554 1555 /** 1556 * Returns the hash of an XMLInstruction 1557 * 1558 * You should rarely need to call this function. It exists so that 1559 * XmlInstructions can be used as associative array keys. 1560 */ 1561 override size_t toHash() scope const nothrow { return hash(content); } 1562 1563 /** 1564 * Returns a string representation of this XmlInstruction 1565 */ 1566 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1567 1568 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1569 } 1570 1571 /** 1572 * Class representing a Processing Instruction section 1573 */ 1574 class ProcessingInstruction : Item 1575 { 1576 private string content; 1577 1578 /** 1579 * Construct a Processing Instruction section 1580 * 1581 * Params: 1582 * content = the body of the instruction segment 1583 * 1584 * Throws: PIException if the segment body is illegal (contains "?>") 1585 * 1586 * Example: 1587 * -------------- 1588 * auto item = new ProcessingInstruction("php"); 1589 * // constructs <?php?> 1590 * -------------- 1591 */ 1592 this(string content) @safe pure 1593 { 1594 import std..string : indexOf; 1595 if (content.indexOf("?>") != -1) throw new PIException(content); 1596 this.content = content; 1597 } 1598 1599 /** 1600 * Compares two processing instructions for equality 1601 * 1602 * Example: 1603 * -------------- 1604 * ProcessingInstruction item1,item2; 1605 * if (item1 == item2) { } 1606 * -------------- 1607 */ 1608 override bool opEquals(scope const Object o) const 1609 { 1610 const item = toType!(const Item)(o); 1611 const t = cast(const ProcessingInstruction) item; 1612 return t !is null && content == t.content; 1613 } 1614 1615 /** 1616 * Compares two processing instructions 1617 * 1618 * You should rarely need to call this function. It exists so that 1619 * ProcessingInstructions can be used as associative array keys. 1620 * 1621 * Example: 1622 * -------------- 1623 * ProcessingInstruction item1,item2; 1624 * if (item1 < item2) { } 1625 * -------------- 1626 */ 1627 override int opCmp(scope const Object o) scope const 1628 { 1629 const item = toType!(const Item)(o); 1630 const t = cast(const ProcessingInstruction) item; 1631 return t !is null 1632 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1633 } 1634 1635 /** 1636 * Returns the hash of a ProcessingInstruction 1637 * 1638 * You should rarely need to call this function. It exists so that 1639 * ProcessingInstructions can be used as associative array keys. 1640 */ 1641 override size_t toHash() scope const nothrow { return hash(content); } 1642 1643 /** 1644 * Returns a string representation of this ProcessingInstruction 1645 */ 1646 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1647 1648 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always 1649 } 1650 1651 /** 1652 * Abstract base class for XML items 1653 */ 1654 abstract class Item 1655 { 1656 /// Compares with another Item of same type for equality 1657 abstract override bool opEquals(scope const Object o) @safe const; 1658 1659 /// Compares with another Item of same type 1660 abstract override int opCmp(scope const Object o) @safe const; 1661 1662 /// Returns the hash of this item 1663 abstract override size_t toHash() @safe scope const; 1664 1665 /// Returns a string representation of this item 1666 abstract override string toString() @safe scope const; 1667 1668 /** 1669 * Returns an indented string representation of this item 1670 * 1671 * Params: 1672 * indent = number of spaces by which to indent child elements 1673 */ 1674 string[] pretty(uint indent) @safe scope const 1675 { 1676 import std..string : strip; 1677 string s = strip(toString()); 1678 return s.length == 0 ? [] : [ s ]; 1679 } 1680 1681 /// Returns true if the item represents empty XML text 1682 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1683 } 1684 1685 /** 1686 * Class for parsing an XML Document. 1687 * 1688 * This is a subclass of ElementParser. Most of the useful functions are 1689 * documented there. 1690 * 1691 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1692 * 1693 * Bugs: 1694 * Currently only supports UTF documents. 1695 * 1696 * If there is an encoding attribute in the prolog, it is ignored. 1697 * 1698 */ 1699 class DocumentParser : ElementParser 1700 { 1701 string xmlText; 1702 1703 /** 1704 * Constructs a DocumentParser. 1705 * 1706 * The input to this function MUST be valid XML. 1707 * This is enforced by the function's in contract. 1708 * 1709 * Params: 1710 * xmlText_ = the entire XML document as text 1711 * 1712 */ 1713 this(string xmlText_) 1714 in 1715 { 1716 assert(xmlText_.length != 0); 1717 try 1718 { 1719 // Confirm that the input is valid XML 1720 check(xmlText_); 1721 } 1722 catch (CheckException e) 1723 { 1724 // And if it's not, tell the user why not 1725 assert(false, "\n" ~ e.toString()); 1726 } 1727 } 1728 do 1729 { 1730 xmlText = xmlText_; 1731 s = &xmlText; 1732 super(); // Initialize everything 1733 parse(); // Parse through the root tag (but not beyond) 1734 } 1735 } 1736 1737 @system unittest 1738 { 1739 auto doc = new Document("<root><child><grandchild/></child></root>"); 1740 assert(doc.elements.length == 1); 1741 assert(doc.elements[0].tag.name == "child"); 1742 assert(doc.items == doc.elements); 1743 } 1744 1745 /** 1746 * Class for parsing an XML element. 1747 * 1748 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1749 * 1750 * Note that you cannot construct instances of this class directly. You can 1751 * construct a DocumentParser (which is a subclass of ElementParser), but 1752 * otherwise, Instances of ElementParser will be created for you by the 1753 * library, and passed your way via onStartTag handlers. 1754 * 1755 */ 1756 class ElementParser 1757 { 1758 alias Handler = void delegate(string); 1759 alias ElementHandler = void delegate(in Element element); 1760 alias ParserHandler = void delegate(ElementParser parser); 1761 1762 private 1763 { 1764 Tag tag_; 1765 string elementStart; 1766 string* s; 1767 1768 Handler commentHandler = null; 1769 Handler cdataHandler = null; 1770 Handler xiHandler = null; 1771 Handler piHandler = null; 1772 Handler rawTextHandler = null; 1773 Handler textHandler = null; 1774 1775 // Private constructor for start tags 1776 this(ElementParser parent) @safe @nogc pure nothrow 1777 { 1778 s = parent.s; 1779 this(); 1780 tag_ = parent.tag_; 1781 } 1782 1783 // Private constructor for empty tags 1784 this(Tag tag, string* t) @safe @nogc pure nothrow 1785 { 1786 s = t; 1787 this(); 1788 tag_ = tag; 1789 } 1790 } 1791 1792 /** 1793 * The Tag at the start of the element being parsed. You can read this to 1794 * determine the tag's name and attributes. 1795 */ 1796 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1797 1798 /** 1799 * Register a handler which will be called whenever a start tag is 1800 * encountered which matches the specified name. You can also pass null as 1801 * the name, in which case the handler will be called for any unmatched 1802 * start tag. 1803 * 1804 * Example: 1805 * -------------- 1806 * // Call this function whenever a <podcast> start tag is encountered 1807 * onStartTag["podcast"] = (ElementParser xml) 1808 * { 1809 * // Your code here 1810 * // 1811 * // This is a a closure, so code here may reference 1812 * // variables which are outside of this scope 1813 * }; 1814 * 1815 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1816 * // start tag is encountered 1817 * onStartTag["episode"] = &myEpisodeStartHandler; 1818 * 1819 * // call delegate dg for all other start tags 1820 * onStartTag[null] = dg; 1821 * -------------- 1822 * 1823 * This library will supply your function with a new instance of 1824 * ElementHandler, which may be used to parse inside the element whose 1825 * start tag was just found, or to identify the tag attributes of the 1826 * element, etc. 1827 * 1828 * Note that your function will be called for both start tags and empty 1829 * tags. That is, we make no distinction between <br></br> 1830 * and <br/>. 1831 */ 1832 ParserHandler[string] onStartTag; 1833 1834 /** 1835 * Register a handler which will be called whenever an end tag is 1836 * encountered which matches the specified name. You can also pass null as 1837 * the name, in which case the handler will be called for any unmatched 1838 * end tag. 1839 * 1840 * Example: 1841 * -------------- 1842 * // Call this function whenever a </podcast> end tag is encountered 1843 * onEndTag["podcast"] = (in Element e) 1844 * { 1845 * // Your code here 1846 * // 1847 * // This is a a closure, so code here may reference 1848 * // variables which are outside of this scope 1849 * }; 1850 * 1851 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1852 * // end tag is encountered 1853 * onEndTag["episode"] = &myEpisodeEndHandler; 1854 * 1855 * // call delegate dg for all other end tags 1856 * onEndTag[null] = dg; 1857 * -------------- 1858 * 1859 * Note that your function will be called for both start tags and empty 1860 * tags. That is, we make no distinction between <br></br> 1861 * and <br/>. 1862 */ 1863 ElementHandler[string] onEndTag; 1864 1865 protected this() @safe @nogc pure nothrow 1866 { 1867 elementStart = *s; 1868 } 1869 1870 /** 1871 * Register a handler which will be called whenever text is encountered. 1872 * 1873 * Example: 1874 * -------------- 1875 * // Call this function whenever text is encountered 1876 * onText = (string s) 1877 * { 1878 * // Your code here 1879 * 1880 * // The passed parameter s will have been decoded by the time you see 1881 * // it, and so may contain any character. 1882 * // 1883 * // This is a a closure, so code here may reference 1884 * // variables which are outside of this scope 1885 * }; 1886 * -------------- 1887 */ 1888 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1889 1890 /** 1891 * Register an alternative handler which will be called whenever text 1892 * is encountered. This differs from onText in that onText will decode 1893 * the text, whereas onTextRaw will not. This allows you to make design 1894 * choices, since onText will be more accurate, but slower, while 1895 * onTextRaw will be faster, but less accurate. Of course, you can 1896 * still call decode() within your handler, if you want, but you'd 1897 * probably want to use onTextRaw only in circumstances where you 1898 * know that decoding is unnecessary. 1899 * 1900 * Example: 1901 * -------------- 1902 * // Call this function whenever text is encountered 1903 * onText = (string s) 1904 * { 1905 * // Your code here 1906 * 1907 * // The passed parameter s will NOT have been decoded. 1908 * // 1909 * // This is a a closure, so code here may reference 1910 * // variables which are outside of this scope 1911 * }; 1912 * -------------- 1913 */ 1914 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1915 1916 /** 1917 * Register a handler which will be called whenever a character data 1918 * segment is encountered. 1919 * 1920 * Example: 1921 * -------------- 1922 * // Call this function whenever a CData section is encountered 1923 * onCData = (string s) 1924 * { 1925 * // Your code here 1926 * 1927 * // The passed parameter s does not include the opening <![CDATA[ 1928 * // nor closing ]]> 1929 * // 1930 * // This is a a closure, so code here may reference 1931 * // variables which are outside of this scope 1932 * }; 1933 * -------------- 1934 */ 1935 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1936 1937 /** 1938 * Register a handler which will be called whenever a comment is 1939 * encountered. 1940 * 1941 * Example: 1942 * -------------- 1943 * // Call this function whenever a comment is encountered 1944 * onComment = (string s) 1945 * { 1946 * // Your code here 1947 * 1948 * // The passed parameter s does not include the opening <!-- nor 1949 * // closing --> 1950 * // 1951 * // This is a a closure, so code here may reference 1952 * // variables which are outside of this scope 1953 * }; 1954 * -------------- 1955 */ 1956 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1957 1958 /** 1959 * Register a handler which will be called whenever a processing 1960 * instruction is encountered. 1961 * 1962 * Example: 1963 * -------------- 1964 * // Call this function whenever a processing instruction is encountered 1965 * onPI = (string s) 1966 * { 1967 * // Your code here 1968 * 1969 * // The passed parameter s does not include the opening <? nor 1970 * // closing ?> 1971 * // 1972 * // This is a a closure, so code here may reference 1973 * // variables which are outside of this scope 1974 * }; 1975 * -------------- 1976 */ 1977 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1978 1979 /** 1980 * Register a handler which will be called whenever an XML instruction is 1981 * encountered. 1982 * 1983 * Example: 1984 * -------------- 1985 * // Call this function whenever an XML instruction is encountered 1986 * // (Note: XML instructions may only occur preceding the root tag of a 1987 * // document). 1988 * onPI = (string s) 1989 * { 1990 * // Your code here 1991 * 1992 * // The passed parameter s does not include the opening <! nor 1993 * // closing > 1994 * // 1995 * // This is a a closure, so code here may reference 1996 * // variables which are outside of this scope 1997 * }; 1998 * -------------- 1999 */ 2000 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 2001 2002 /** 2003 * Parse an XML element. 2004 * 2005 * Parsing will continue until the end of the current element. Any items 2006 * encountered for which a handler has been registered will invoke that 2007 * handler. 2008 * 2009 * Throws: various kinds of XMLException 2010 */ 2011 void parse() 2012 { 2013 import std.algorithm.searching : startsWith; 2014 import std..string : indexOf; 2015 2016 string t; 2017 const Tag root = tag_; 2018 Tag[string] startTags; 2019 if (tag_ !is null) startTags[tag_.name] = tag_; 2020 2021 while (s.length != 0) 2022 { 2023 if (startsWith(*s,"<!--")) 2024 { 2025 chop(*s,4); 2026 t = chop(*s,indexOf(*s,"-->")); 2027 if (commentHandler.funcptr !is null) commentHandler(t); 2028 chop(*s,3); 2029 } 2030 else if (startsWith(*s,"<![CDATA[")) 2031 { 2032 chop(*s,9); 2033 t = chop(*s,indexOf(*s,"]]>")); 2034 if (cdataHandler.funcptr !is null) cdataHandler(t); 2035 chop(*s,3); 2036 } 2037 else if (startsWith(*s,"<!")) 2038 { 2039 chop(*s,2); 2040 t = chop(*s,indexOf(*s,">")); 2041 if (xiHandler.funcptr !is null) xiHandler(t); 2042 chop(*s,1); 2043 } 2044 else if (startsWith(*s,"<?")) 2045 { 2046 chop(*s,2); 2047 t = chop(*s,indexOf(*s,"?>")); 2048 if (piHandler.funcptr !is null) piHandler(t); 2049 chop(*s,2); 2050 } 2051 else if (startsWith(*s,"<")) 2052 { 2053 tag_ = new Tag(*s,true); 2054 if (root is null) 2055 return; // Return to constructor of derived class 2056 2057 if (tag_.isStart) 2058 { 2059 startTags[tag_.name] = tag_; 2060 2061 auto parser = new ElementParser(this); 2062 2063 auto handler = tag_.name in onStartTag; 2064 if (handler !is null) (*handler)(parser); 2065 else 2066 { 2067 handler = null in onStartTag; 2068 if (handler !is null) (*handler)(parser); 2069 } 2070 } 2071 else if (tag_.isEnd) 2072 { 2073 const startTag = startTags[tag_.name]; 2074 string text; 2075 2076 if (startTag.tagString.length == 0) 2077 assert(0); 2078 2079 immutable(char)* p = startTag.tagString.ptr 2080 + startTag.tagString.length; 2081 immutable(char)* q = &tag_.tagString[0]; 2082 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2083 2084 auto element = new Element(startTag); 2085 if (text.length != 0) element ~= new Text(text); 2086 2087 auto handler = tag_.name in onEndTag; 2088 if (handler !is null) (*handler)(element); 2089 else 2090 { 2091 handler = null in onEndTag; 2092 if (handler !is null) (*handler)(element); 2093 } 2094 2095 if (tag_.name == root.name) return; 2096 } 2097 else if (tag_.isEmpty) 2098 { 2099 Tag startTag = new Tag(tag_.name); 2100 2101 // FIX by hed010gy 2102 // https://issues.dlang.org/show_bug.cgi?id=2979 2103 if (tag_.attr.length > 0) 2104 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2105 // END FIX 2106 2107 // Handle the pretend start tag 2108 string s2; 2109 auto parser = new ElementParser(startTag,&s2); 2110 auto handler1 = startTag.name in onStartTag; 2111 if (handler1 !is null) (*handler1)(parser); 2112 else 2113 { 2114 handler1 = null in onStartTag; 2115 if (handler1 !is null) (*handler1)(parser); 2116 } 2117 2118 // Handle the pretend end tag 2119 auto element = new Element(startTag); 2120 auto handler2 = tag_.name in onEndTag; 2121 if (handler2 !is null) (*handler2)(element); 2122 else 2123 { 2124 handler2 = null in onEndTag; 2125 if (handler2 !is null) (*handler2)(element); 2126 } 2127 } 2128 } 2129 else 2130 { 2131 t = chop(*s,indexOf(*s,"<")); 2132 if (rawTextHandler.funcptr !is null) 2133 rawTextHandler(t); 2134 else if (textHandler.funcptr !is null) 2135 textHandler(decode(t,DecodeMode.LOOSE)); 2136 } 2137 } 2138 } 2139 2140 /** 2141 * Returns that part of the element which has already been parsed 2142 */ 2143 override string toString() const @nogc @safe pure nothrow 2144 { 2145 assert(elementStart.length >= s.length); 2146 return elementStart[0 .. elementStart.length - s.length]; 2147 } 2148 2149 } 2150 2151 private 2152 { 2153 template Check(string msg) 2154 { 2155 string old = s; 2156 2157 void fail() @safe pure 2158 { 2159 s = old; 2160 throw new Err(s,msg); 2161 } 2162 2163 void fail(Err e) @safe pure 2164 { 2165 s = old; 2166 throw new Err(s,msg,e); 2167 } 2168 2169 void fail(string msg2) @safe pure 2170 { 2171 fail(new Err(s,msg2)); 2172 } 2173 } 2174 2175 void checkMisc(ref string s) @safe pure // rule 27 2176 { 2177 import std.algorithm.searching : startsWith; 2178 2179 mixin Check!("Misc"); 2180 2181 try 2182 { 2183 if (s.startsWith("<!--")) { checkComment(s); } 2184 else if (s.startsWith("<?")) { checkPI(s); } 2185 else { checkSpace(s); } 2186 } 2187 catch (Err e) { fail(e); } 2188 } 2189 2190 void checkDocument(ref string s) @safe pure // rule 1 2191 { 2192 mixin Check!("Document"); 2193 try 2194 { 2195 checkProlog(s); 2196 checkElement(s); 2197 star!(checkMisc)(s); 2198 } 2199 catch (Err e) { fail(e); } 2200 } 2201 2202 void checkChars(ref string s) @safe pure // rule 2 2203 { 2204 // TO DO - Fix std.utf stride and decode functions, then use those 2205 // instead 2206 import std.format : format; 2207 2208 mixin Check!("Chars"); 2209 2210 dchar c; 2211 ptrdiff_t n = -1; 2212 // 'i' must not be smaller than size_t because size_t is used internally in 2213 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2214 foreach (size_t i, dchar d; s) 2215 { 2216 if (!isChar(d)) 2217 { 2218 c = d; 2219 n = i; 2220 break; 2221 } 2222 } 2223 if (n != -1) 2224 { 2225 s = s[n..$]; 2226 fail(format("invalid character: U+%04X",c)); 2227 } 2228 } 2229 2230 void checkSpace(ref string s) @safe pure // rule 3 2231 { 2232 import std.algorithm.searching : countUntil; 2233 import std.ascii : isWhite; 2234 import std.utf : byCodeUnit; 2235 2236 mixin Check!("Whitespace"); 2237 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2238 if (i == -1 && s.length > 0 && isWhite(s[0])) 2239 s = s[$ .. $]; 2240 else if (i > -1) 2241 s = s[i .. $]; 2242 if (s is old) fail(); 2243 } 2244 2245 void checkName(ref string s, out string name) @safe pure // rule 5 2246 { 2247 mixin Check!("Name"); 2248 2249 if (s.length == 0) fail(); 2250 ptrdiff_t n; 2251 // 'i' must not be smaller than size_t because size_t is used internally in 2252 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2253 foreach (size_t i, dchar c; s) 2254 { 2255 if (c == '_' || c == ':' || isLetter(c)) continue; 2256 if (i == 0) fail(); 2257 if (c == '-' || c == '.' || isDigit(c) 2258 || isCombiningChar(c) || isExtender(c)) continue; 2259 n = i; 2260 break; 2261 } 2262 name = s[0 .. n]; 2263 s = s[n..$]; 2264 } 2265 2266 void checkAttValue(ref string s) @safe pure // rule 10 2267 { 2268 import std.algorithm.searching : countUntil; 2269 import std.utf : byCodeUnit; 2270 2271 mixin Check!("AttValue"); 2272 2273 if (s.length == 0) fail(); 2274 char c = s[0]; 2275 if (c != '\u0022' && c != '\u0027') 2276 fail("attribute value requires quotes"); 2277 s = s[1..$]; 2278 for (;;) 2279 { 2280 s = s[s.byCodeUnit.countUntil(c) .. $]; 2281 if (s.length == 0) fail("unterminated attribute value"); 2282 if (s[0] == '<') fail("< found in attribute value"); 2283 if (s[0] == c) break; 2284 try { checkReference(s); } catch (Err e) { fail(e); } 2285 } 2286 s = s[1..$]; 2287 } 2288 2289 void checkCharData(ref string s) @safe pure // rule 14 2290 { 2291 import std.algorithm.searching : startsWith; 2292 2293 mixin Check!("CharData"); 2294 2295 while (s.length != 0) 2296 { 2297 if (s.startsWith("&")) break; 2298 if (s.startsWith("<")) break; 2299 if (s.startsWith("]]>")) fail("]]> found within char data"); 2300 s = s[1..$]; 2301 } 2302 } 2303 2304 void checkComment(ref string s) @safe pure // rule 15 2305 { 2306 import std..string : indexOf; 2307 2308 mixin Check!("Comment"); 2309 2310 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2311 ptrdiff_t n = s.indexOf("--"); 2312 if (n == -1) fail("unterminated comment"); 2313 s = s[n..$]; 2314 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2315 } 2316 2317 void checkPI(ref string s) @safe pure // rule 16 2318 { 2319 mixin Check!("PI"); 2320 2321 try 2322 { 2323 checkLiteral("<?",s); 2324 checkEnd("?>",s); 2325 } 2326 catch (Err e) { fail(e); } 2327 } 2328 2329 void checkCDSect(ref string s) @safe pure // rule 18 2330 { 2331 mixin Check!("CDSect"); 2332 2333 try 2334 { 2335 checkLiteral(cdata,s); 2336 checkEnd("]]>",s); 2337 } 2338 catch (Err e) { fail(e); } 2339 } 2340 2341 void checkProlog(ref string s) @safe pure // rule 22 2342 { 2343 mixin Check!("Prolog"); 2344 2345 try 2346 { 2347 /* The XML declaration is optional 2348 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2349 */ 2350 opt!(checkXMLDecl)(s); 2351 2352 star!(checkMisc)(s); 2353 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2354 } 2355 catch (Err e) { fail(e); } 2356 } 2357 2358 void checkXMLDecl(ref string s) @safe pure // rule 23 2359 { 2360 mixin Check!("XMLDecl"); 2361 2362 try 2363 { 2364 checkLiteral("<?xml",s); 2365 checkVersionInfo(s); 2366 opt!(checkEncodingDecl)(s); 2367 opt!(checkSDDecl)(s); 2368 opt!(checkSpace)(s); 2369 checkLiteral("?>",s); 2370 } 2371 catch (Err e) { fail(e); } 2372 } 2373 2374 void checkVersionInfo(ref string s) @safe pure // rule 24 2375 { 2376 mixin Check!("VersionInfo"); 2377 2378 try 2379 { 2380 checkSpace(s); 2381 checkLiteral("version",s); 2382 checkEq(s); 2383 quoted!(checkVersionNum)(s); 2384 } 2385 catch (Err e) { fail(e); } 2386 } 2387 2388 void checkEq(ref string s) @safe pure // rule 25 2389 { 2390 mixin Check!("Eq"); 2391 2392 try 2393 { 2394 opt!(checkSpace)(s); 2395 checkLiteral("=",s); 2396 opt!(checkSpace)(s); 2397 } 2398 catch (Err e) { fail(e); } 2399 } 2400 2401 void checkVersionNum(ref string s) @safe pure // rule 26 2402 { 2403 import std.algorithm.searching : countUntil; 2404 import std.utf : byCodeUnit; 2405 2406 mixin Check!("VersionNum"); 2407 2408 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2409 if (s is old) fail(); 2410 } 2411 2412 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2413 { 2414 mixin Check!("DocTypeDecl"); 2415 2416 try 2417 { 2418 checkLiteral("<!DOCTYPE",s); 2419 // 2420 // TO DO -- ensure DOCTYPE is well formed 2421 // (But not yet. That's one of our "future directions") 2422 // 2423 checkEnd(">",s); 2424 } 2425 catch (Err e) { fail(e); } 2426 } 2427 2428 void checkSDDecl(ref string s) @safe pure // rule 32 2429 { 2430 import std.algorithm.searching : startsWith; 2431 2432 mixin Check!("SDDecl"); 2433 2434 try 2435 { 2436 checkSpace(s); 2437 checkLiteral("standalone",s); 2438 checkEq(s); 2439 } 2440 catch (Err e) { fail(e); } 2441 2442 int n = 0; 2443 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2444 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2445 else fail("standalone attribute value must be 'yes', \"yes\","~ 2446 " 'no' or \"no\""); 2447 s = s[n..$]; 2448 } 2449 2450 void checkElement(ref string s) @safe pure // rule 39 2451 { 2452 mixin Check!("Element"); 2453 2454 string sname,ename,t; 2455 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2456 2457 if (t == "STag") 2458 { 2459 try 2460 { 2461 checkContent(s); 2462 t = s; 2463 checkETag(s,ename); 2464 } 2465 catch (Err e) { fail(e); } 2466 2467 if (sname != ename) 2468 { 2469 s = t; 2470 fail("end tag name \"" ~ ename 2471 ~ "\" differs from start tag name \""~sname~"\""); 2472 } 2473 } 2474 } 2475 2476 // rules 40 and 44 2477 void checkTag(ref string s, out string type, out string name) @safe pure 2478 { 2479 mixin Check!("Tag"); 2480 2481 try 2482 { 2483 type = "STag"; 2484 checkLiteral("<",s); 2485 checkName(s,name); 2486 star!(seq!(checkSpace,checkAttribute))(s); 2487 opt!(checkSpace)(s); 2488 if (s.length != 0 && s[0] == '/') 2489 { 2490 s = s[1..$]; 2491 type = "ETag"; 2492 } 2493 checkLiteral(">",s); 2494 } 2495 catch (Err e) { fail(e); } 2496 } 2497 2498 void checkAttribute(ref string s) @safe pure // rule 41 2499 { 2500 mixin Check!("Attribute"); 2501 2502 try 2503 { 2504 string name; 2505 checkName(s,name); 2506 checkEq(s); 2507 checkAttValue(s); 2508 } 2509 catch (Err e) { fail(e); } 2510 } 2511 2512 void checkETag(ref string s, out string name) @safe pure // rule 42 2513 { 2514 mixin Check!("ETag"); 2515 2516 try 2517 { 2518 checkLiteral("</",s); 2519 checkName(s,name); 2520 opt!(checkSpace)(s); 2521 checkLiteral(">",s); 2522 } 2523 catch (Err e) { fail(e); } 2524 } 2525 2526 void checkContent(ref string s) @safe pure // rule 43 2527 { 2528 import std.algorithm.searching : startsWith; 2529 2530 mixin Check!("Content"); 2531 2532 try 2533 { 2534 while (s.length != 0) 2535 { 2536 old = s; 2537 if (s.startsWith("&")) { checkReference(s); } 2538 else if (s.startsWith("<!--")) { checkComment(s); } 2539 else if (s.startsWith("<?")) { checkPI(s); } 2540 else if (s.startsWith(cdata)) { checkCDSect(s); } 2541 else if (s.startsWith("</")) { break; } 2542 else if (s.startsWith("<")) { checkElement(s); } 2543 else { checkCharData(s); } 2544 } 2545 } 2546 catch (Err e) { fail(e); } 2547 } 2548 2549 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2550 { 2551 import std.format : format; 2552 2553 mixin Check!("CharRef"); 2554 2555 c = 0; 2556 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2557 int radix = 10; 2558 if (s.length != 0 && s[0] == 'x') 2559 { 2560 s = s[1..$]; 2561 radix = 16; 2562 } 2563 if (s.length == 0) fail("unterminated character reference"); 2564 if (s[0] == ';') 2565 fail("character reference must have at least one digit"); 2566 while (s.length != 0) 2567 { 2568 immutable char d = s[0]; 2569 int n = 0; 2570 switch (d) 2571 { 2572 case 'F','f': ++n; goto case; 2573 case 'E','e': ++n; goto case; 2574 case 'D','d': ++n; goto case; 2575 case 'C','c': ++n; goto case; 2576 case 'B','b': ++n; goto case; 2577 case 'A','a': ++n; goto case; 2578 case '9': ++n; goto case; 2579 case '8': ++n; goto case; 2580 case '7': ++n; goto case; 2581 case '6': ++n; goto case; 2582 case '5': ++n; goto case; 2583 case '4': ++n; goto case; 2584 case '3': ++n; goto case; 2585 case '2': ++n; goto case; 2586 case '1': ++n; goto case; 2587 case '0': break; 2588 default: n = 100; break; 2589 } 2590 if (n >= radix) break; 2591 c *= radix; 2592 c += n; 2593 s = s[1..$]; 2594 } 2595 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2596 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2597 else s = s[1..$]; 2598 } 2599 2600 void checkReference(ref string s) @safe pure // rule 67 2601 { 2602 import std.algorithm.searching : startsWith; 2603 2604 mixin Check!("Reference"); 2605 2606 try 2607 { 2608 dchar c; 2609 if (s.startsWith("&#")) checkCharRef(s,c); 2610 else checkEntityRef(s); 2611 } 2612 catch (Err e) { fail(e); } 2613 } 2614 2615 void checkEntityRef(ref string s) @safe pure // rule 68 2616 { 2617 mixin Check!("EntityRef"); 2618 2619 try 2620 { 2621 string name; 2622 checkLiteral("&",s); 2623 checkName(s,name); 2624 checkLiteral(";",s); 2625 } 2626 catch (Err e) { fail(e); } 2627 } 2628 2629 void checkEncName(ref string s) @safe pure // rule 81 2630 { 2631 import std.algorithm.searching : countUntil; 2632 import std.ascii : isAlpha; 2633 import std.utf : byCodeUnit; 2634 2635 mixin Check!("EncName"); 2636 2637 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2638 if (s is old) fail(); 2639 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2640 } 2641 2642 void checkEncodingDecl(ref string s) @safe pure // rule 80 2643 { 2644 mixin Check!("EncodingDecl"); 2645 2646 try 2647 { 2648 checkSpace(s); 2649 checkLiteral("encoding",s); 2650 checkEq(s); 2651 quoted!(checkEncName)(s); 2652 } 2653 catch (Err e) { fail(e); } 2654 } 2655 2656 // Helper functions 2657 2658 void checkLiteral(string literal,ref string s) @safe pure 2659 { 2660 import std..string : startsWith; 2661 2662 mixin Check!("Literal"); 2663 2664 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2665 s = s[literal.length..$]; 2666 } 2667 2668 void checkEnd(string end,ref string s) @safe pure 2669 { 2670 import std..string : indexOf; 2671 // Deliberately no mixin Check here. 2672 2673 auto n = s.indexOf(end); 2674 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2675 s = s[n..$]; 2676 checkLiteral(end,s); 2677 } 2678 2679 // Metafunctions -- none of these use mixin Check 2680 2681 void opt(alias f)(ref string s) 2682 { 2683 try { f(s); } catch (Err e) {} 2684 } 2685 2686 void plus(alias f)(ref string s) 2687 { 2688 f(s); 2689 star!(f)(s); 2690 } 2691 2692 void star(alias f)(ref string s) 2693 { 2694 while (s.length != 0) 2695 { 2696 try { f(s); } 2697 catch (Err e) { return; } 2698 } 2699 } 2700 2701 void quoted(alias f)(ref string s) 2702 { 2703 import std..string : startsWith; 2704 2705 if (s.startsWith("'")) 2706 { 2707 checkLiteral("'",s); 2708 f(s); 2709 checkLiteral("'",s); 2710 } 2711 else 2712 { 2713 checkLiteral("\"",s); 2714 f(s); 2715 checkLiteral("\"",s); 2716 } 2717 } 2718 2719 void seq(alias f,alias g)(ref string s) 2720 { 2721 f(s); 2722 g(s); 2723 } 2724 } 2725 2726 /** 2727 * Check an entire XML document for well-formedness 2728 * 2729 * Params: 2730 * s = the document to be checked, passed as a string 2731 * 2732 * Throws: CheckException if the document is not well formed 2733 * 2734 * CheckException's toString() method will yield the complete hierarchy of 2735 * parse failure (the XML equivalent of a stack trace), giving the line and 2736 * column number of every failure at every level. 2737 */ 2738 void check(string s) @safe pure 2739 { 2740 try 2741 { 2742 checkChars(s); 2743 checkDocument(s); 2744 if (s.length != 0) throw new Err(s,"Junk found after document"); 2745 } 2746 catch (Err e) 2747 { 2748 e.complete(s); 2749 throw e; 2750 } 2751 } 2752 2753 @system pure unittest 2754 { 2755 import std..string : indexOf; 2756 2757 try 2758 { 2759 check(q"[<?xml version="1.0"?> 2760 <catalog> 2761 <book id="bk101"> 2762 <author>Gambardella, Matthew</author> 2763 <title>XML Developer's Guide</title> 2764 <genre>Computer</genre> 2765 <price>44.95</price> 2766 <publish_date>2000-10-01</publish_date> 2767 <description>An in-depth look at creating applications 2768 with XML.</description> 2769 </book> 2770 <book id="bk102"> 2771 <author>Ralls, Kim</author> 2772 <title>Midnight Rain</title> 2773 <genre>Fantasy</genres> 2774 <price>5.95</price> 2775 <publish_date>2000-12-16</publish_date> 2776 <description>A former architect battles corporate zombies, 2777 an evil sorceress, and her own childhood to become queen 2778 of the world.</description> 2779 </book> 2780 <book id="bk103"> 2781 <author>Corets, Eva</author> 2782 <title>Maeve Ascendant</title> 2783 <genre>Fantasy</genre> 2784 <price>5.95</price> 2785 <publish_date>2000-11-17</publish_date> 2786 <description>After the collapse of a nanotechnology 2787 society in England, the young survivors lay the 2788 foundation for a new society.</description> 2789 </book> 2790 </catalog> 2791 ]"); 2792 assert(false); 2793 } 2794 catch (CheckException e) 2795 { 2796 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2797 " from start tag name \"genre\""); 2798 assert(n != -1); 2799 } 2800 } 2801 2802 @system unittest 2803 { 2804 string s = q"EOS 2805 <?xml version="1.0"?> 2806 <set> 2807 <one>A</one> 2808 <!-- comment --> 2809 <two>B</two> 2810 </set> 2811 EOS"; 2812 try 2813 { 2814 check(s); 2815 } 2816 catch (CheckException e) 2817 { 2818 assert(0, e.toString()); 2819 } 2820 } 2821 2822 @system unittest 2823 { 2824 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2825 xmlns:stream="http://etherx.'jabber'.org/streams" 2826 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2827 xml:lang="en" version="1.0" attr='a"b"c'> 2828 </stream:stream></r>`; 2829 2830 DocumentParser parser = new DocumentParser(test_xml); 2831 bool tested = false; 2832 parser.onStartTag["stream:stream"] = (ElementParser p) { 2833 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2834 assert(p.tag.attr["from"] == "jid.pl"); 2835 assert(p.tag.attr["attr"] == "a\"b\"c"); 2836 tested = true; 2837 }; 2838 parser.parse(); 2839 assert(tested); 2840 } 2841 2842 @system unittest 2843 { 2844 string s = q"EOS 2845 <?xml version="1.0" encoding="utf-8"?> <Tests> 2846 <Test thing="What & Up">What & Up Second</Test> 2847 </Tests> 2848 EOS"; 2849 auto xml = new DocumentParser(s); 2850 2851 xml.onStartTag["Test"] = (ElementParser xml) { 2852 assert(xml.tag.attr["thing"] == "What & Up"); 2853 }; 2854 2855 xml.onEndTag["Test"] = (in Element e) { 2856 assert(e.text() == "What & Up Second"); 2857 }; 2858 xml.parse(); 2859 } 2860 2861 @system unittest 2862 { 2863 string s = `<tag attr=""value>" />`; 2864 auto doc = new Document(s); 2865 assert(doc.toString() == s); 2866 } 2867 2868 /** The base class for exceptions thrown by this module */ 2869 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2870 2871 // Other exceptions 2872 2873 /// Thrown during Comment constructor 2874 class CommentException : XMLException 2875 { private this(string msg) @safe pure { super(msg); } } 2876 2877 /// Thrown during CData constructor 2878 class CDataException : XMLException 2879 { private this(string msg) @safe pure { super(msg); } } 2880 2881 /// Thrown during XMLInstruction constructor 2882 class XIException : XMLException 2883 { private this(string msg) @safe pure { super(msg); } } 2884 2885 /// Thrown during ProcessingInstruction constructor 2886 class PIException : XMLException 2887 { private this(string msg) @safe pure { super(msg); } } 2888 2889 /// Thrown during Text constructor 2890 class TextException : XMLException 2891 { private this(string msg) @safe pure { super(msg); } } 2892 2893 /// Thrown during decode() 2894 class DecodeException : XMLException 2895 { private this(string msg) @safe pure { super(msg); } } 2896 2897 /// Thrown if comparing with wrong type 2898 class InvalidTypeException : XMLException 2899 { private this(string msg) @safe pure { super(msg); } } 2900 2901 /// Thrown when parsing for Tags 2902 class TagException : XMLException 2903 { private this(string msg) @safe pure { super(msg); } } 2904 2905 /** 2906 * Thrown during check() 2907 */ 2908 class CheckException : XMLException 2909 { 2910 CheckException err; /// Parent in hierarchy 2911 private string tail; 2912 /** 2913 * Name of production rule which failed to parse, 2914 * or specific error message 2915 */ 2916 string msg; 2917 size_t line = 0; /// Line number at which parse failure occurred 2918 size_t column = 0; /// Column number at which parse failure occurred 2919 2920 private this(string tail,string msg,Err err=null) @safe pure 2921 { 2922 super(null); 2923 this.tail = tail; 2924 this.msg = msg; 2925 this.err = err; 2926 } 2927 2928 private void complete(string entire) @safe pure 2929 { 2930 import std..string : count, lastIndexOf; 2931 import std.utf : toUTF32; 2932 2933 string head = entire[0..$-tail.length]; 2934 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2935 line = head.count("\n") + 1; 2936 dstring t = toUTF32(head[n..$]); 2937 column = t.length + 1; 2938 if (err !is null) err.complete(entire); 2939 } 2940 2941 override string toString() const @safe pure 2942 { 2943 import std.format : format; 2944 2945 string s; 2946 if (line != 0) s = format("Line %d, column %d: ",line,column); 2947 s ~= msg; 2948 s ~= '\n'; 2949 if (err !is null) s = err.toString() ~ s; 2950 return s; 2951 } 2952 } 2953 2954 private alias Err = CheckException; 2955 2956 // Private helper functions 2957 2958 private 2959 { 2960 inout(T) toType(T)(inout Object o) 2961 { 2962 T t = cast(T)(o); 2963 if (t is null) 2964 { 2965 throw new InvalidTypeException("Attempt to compare a " 2966 ~ T.stringof ~ " with an instance of another type"); 2967 } 2968 return t; 2969 } 2970 2971 string chop(ref string s, size_t n) @safe pure nothrow 2972 { 2973 if (n == -1) n = s.length; 2974 string t = s[0 .. n]; 2975 s = s[n..$]; 2976 return t; 2977 } 2978 2979 bool optc(ref string s, char c) @safe pure nothrow 2980 { 2981 immutable bool b = s.length != 0 && s[0] == c; 2982 if (b) s = s[1..$]; 2983 return b; 2984 } 2985 2986 void reqc(ref string s, char c) @safe pure 2987 { 2988 if (s.length == 0 || s[0] != c) throw new TagException(""); 2989 s = s[1..$]; 2990 } 2991 2992 char requireOneOf(ref string s, string chars) @safe pure 2993 { 2994 import std..string : indexOf; 2995 2996 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2997 throw new TagException(""); 2998 immutable char ch = s[0]; 2999 s = s[1..$]; 3000 return ch; 3001 } 3002 3003 alias hash = .hashOf; 3004 3005 // Definitions from the XML specification 3006 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3007 0x10000,0x10FFFF]; 3008 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3009 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3010 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3011 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3012 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3013 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3014 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3015 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3016 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3017 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3018 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3019 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3020 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3021 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3022 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3023 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3024 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3025 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3026 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3027 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3028 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3029 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3030 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3031 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3032 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3033 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3034 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3035 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3036 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3037 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3038 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3039 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3040 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3041 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3042 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3043 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3044 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3045 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3046 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3047 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3048 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3049 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3050 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3051 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3052 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3053 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3054 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3055 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3056 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3057 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3058 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3059 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3060 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3061 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3062 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3063 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3064 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3065 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3066 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3067 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3068 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3069 0x3099,0x3099,0x309A,0x309A]; 3070 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3071 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3072 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3073 0x0ED9,0x0F20,0x0F29]; 3074 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3075 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3076 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3077 3078 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3079 { 3080 while (table.length != 0) 3081 { 3082 auto m = (table.length >> 1) & ~1; 3083 if (c < table[m]) 3084 { 3085 table = table[0 .. m]; 3086 } 3087 else if (c > table[m+1]) 3088 { 3089 table = table[m+2..$]; 3090 } 3091 else return true; 3092 } 3093 return false; 3094 } 3095 3096 string startOf(string s) @safe nothrow pure 3097 { 3098 string r; 3099 foreach (char c;s) 3100 { 3101 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3102 if (r.length >= 40) { r ~= "___"; break; } 3103 } 3104 return r; 3105 } 3106 3107 void exit(string s=null) 3108 { 3109 throw new XMLException(s); 3110 } 3111 }