std.xml source code

1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will be removed from Phobos in 2.101.0.
6       If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7 
8 Classes and functions for creating and parsing XML
9 
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17 
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25 
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30 
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
34 
35     // Check for well-formedness
36     check(s);
37 
38     // Make a DOM tree
39     auto doc = new Document(s);
40 
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45 
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
53 
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
64 
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
68 
69     // Check for well-formedness
70     check(s);
71 
72     // Take it apart
73     Book[] books;
74 
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
80 
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87 
88         xml.parse();
89 
90         books ~= book;
91     };
92     xml.parse();
93 
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
100 
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
107 
108         doc ~= element;
109     }
110 
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD")
127 module std.xml;
128 
129 enum cdata = "<![CDATA[";
130 
131 /**
132  * Returns true if the character is a character according to the XML standard
133  *
134  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
135  *
136  * Params:
137  *    c = the character to be tested
138  */
139 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
140 {
141     if (c <= 0xD7FF)
142     {
143         if (c >= 0x20)
144             return true;
145         switch (c)
146         {
147         case 0xA:
148         case 0x9:
149         case 0xD:
150             return true;
151         default:
152             return false;
153         }
154     }
155     else if (0xE000 <= c && c <= 0x10FFFF)
156     {
157         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
158             return true;
159     }
160     return false;
161 }
162 
163 @safe @nogc nothrow pure unittest
164 {
165     assert(!isChar(cast(dchar) 0x8));
166     assert( isChar(cast(dchar) 0x9));
167     assert( isChar(cast(dchar) 0xA));
168     assert(!isChar(cast(dchar) 0xB));
169     assert(!isChar(cast(dchar) 0xC));
170     assert( isChar(cast(dchar) 0xD));
171     assert(!isChar(cast(dchar) 0xE));
172     assert(!isChar(cast(dchar) 0x1F));
173     assert( isChar(cast(dchar) 0x20));
174     assert( isChar('J'));
175     assert( isChar(cast(dchar) 0xD7FF));
176     assert(!isChar(cast(dchar) 0xD800));
177     assert(!isChar(cast(dchar) 0xDFFF));
178     assert( isChar(cast(dchar) 0xE000));
179     assert( isChar(cast(dchar) 0xFFFD));
180     assert(!isChar(cast(dchar) 0xFFFE));
181     assert(!isChar(cast(dchar) 0xFFFF));
182     assert( isChar(cast(dchar) 0x10000));
183     assert( isChar(cast(dchar) 0x10FFFF));
184     assert(!isChar(cast(dchar) 0x110000));
185 
186     debug (stdxml_TestHardcodedChecks)
187     {
188         foreach (c; 0 .. dchar.max + 1)
189             assert(isChar(c) == lookup(CharTable, c));
190     }
191 }
192 
193 /**
194  * Returns true if the character is whitespace according to the XML standard
195  *
196  * Only the following characters are considered whitespace in XML - space, tab,
197  * carriage return and linefeed
198  *
199  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
200  *
201  * Params:
202  *    c = the character to be tested
203  */
204 bool isSpace(dchar c) @safe @nogc pure nothrow
205 {
206     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
207 }
208 
209 /**
210  * Returns true if the character is a digit according to the XML standard
211  *
212  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
213  *
214  * Params:
215  *    c = the character to be tested
216  */
217 bool isDigit(dchar c) @safe @nogc pure nothrow
218 {
219     if (c <= 0x0039 && c >= 0x0030)
220         return true;
221     else
222         return lookup(DigitTable,c);
223 }
224 
225 @safe @nogc nothrow pure unittest
226 {
227     debug (stdxml_TestHardcodedChecks)
228     {
229         foreach (c; 0 .. dchar.max + 1)
230             assert(isDigit(c) == lookup(DigitTable, c));
231     }
232 }
233 
234 /**
235  * Returns true if the character is a letter according to the XML standard
236  *
237  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
238  *
239  * Params:
240  *    c = the character to be tested
241  */
242 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
243 {
244     return isIdeographic(c) || isBaseChar(c);
245 }
246 
247 /**
248  * Returns true if the character is an ideographic character according to the
249  * XML standard
250  *
251  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
252  *
253  * Params:
254  *    c = the character to be tested
255  */
256 bool isIdeographic(dchar c) @safe @nogc nothrow pure
257 {
258     if (c == 0x3007)
259         return true;
260     if (c <= 0x3029 && c >= 0x3021 )
261         return true;
262     if (c <= 0x9FA5 && c >= 0x4E00)
263         return true;
264     return false;
265 }
266 
267 @safe @nogc nothrow pure unittest
268 {
269     assert(isIdeographic('\u4E00'));
270     assert(isIdeographic('\u9FA5'));
271     assert(isIdeographic('\u3007'));
272     assert(isIdeographic('\u3021'));
273     assert(isIdeographic('\u3029'));
274 
275     debug (stdxml_TestHardcodedChecks)
276     {
277         foreach (c; 0 .. dchar.max + 1)
278             assert(isIdeographic(c) == lookup(IdeographicTable, c));
279     }
280 }
281 
282 /**
283  * Returns true if the character is a base character according to the XML
284  * standard
285  *
286  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
287  *
288  * Params:
289  *    c = the character to be tested
290  */
291 bool isBaseChar(dchar c) @safe @nogc nothrow pure
292 {
293     return lookup(BaseCharTable,c);
294 }
295 
296 /**
297  * Returns true if the character is a combining character according to the
298  * XML standard
299  *
300  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
301  *
302  * Params:
303  *    c = the character to be tested
304  */
305 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
306 {
307     return lookup(CombiningCharTable,c);
308 }
309 
310 /**
311  * Returns true if the character is an extender according to the XML standard
312  *
313  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
314  *
315  * Params:
316  *    c = the character to be tested
317  */
318 bool isExtender(dchar c) @safe @nogc nothrow pure
319 {
320     return lookup(ExtenderTable,c);
321 }
322 
323 /**
324  * Encodes a string by replacing all characters which need to be escaped with
325  * appropriate predefined XML entities.
326  *
327  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
328  * and greater-than), and similarly, decode() unescapes them. These functions
329  * are provided for convenience only. You do not need to use them when using
330  * the std.xml classes, because then all the encoding and decoding will be done
331  * for you automatically.
332  *
333  * If the string is not modified, the original will be returned.
334  *
335  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
336  *
337  * Params:
338  *      s = The string to be encoded
339  *
340  * Returns: The encoded string
341  *
342  * Example:
343  * --------------
344  * writefln(encode("a > b")); // writes "a &gt; b"
345  * --------------
346  */
347 S encode(S)(S s)
348 {
349     import std.array : appender;
350 
351     string r;
352     size_t lastI;
353     auto result = appender!S();
354 
355     foreach (i, c; s)
356     {
357         switch (c)
358         {
359         case '&':  r = "&amp;"; break;
360         case '"':  r = "&quot;"; break;
361         case '\'': r = "&apos;"; break;
362         case '<':  r = "&lt;"; break;
363         case '>':  r = "&gt;"; break;
364         default: continue;
365         }
366         // Replace with r
367         result.put(s[lastI .. i]);
368         result.put(r);
369         lastI = i + 1;
370     }
371 
372     if (!result.data.ptr) return s;
373     result.put(s[lastI .. $]);
374     return result.data;
375 }
376 
377 @safe pure unittest
378 {
379     auto s = "hello";
380     assert(encode(s) is s);
381     assert(encode("a > b") == "a &gt; b", encode("a > b"));
382     assert(encode("a < b") == "a &lt; b");
383     assert(encode("don't") == "don&apos;t");
384     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
385     assert(encode("cat & dog") == "cat &amp; dog");
386 }
387 
388 /**
389  * Mode to use for decoding.
390  *
391  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
392  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
393  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
394  */
395 enum DecodeMode
396 {
397     NONE, LOOSE, STRICT
398 }
399 
400 /**
401  * Decodes a string by unescaping all predefined XML entities.
402  *
403  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
404  * and greater-than), and similarly, decode() unescapes them. These functions
405  * are provided for convenience only. You do not need to use them when using
406  * the std.xml classes, because then all the encoding and decoding will be done
407  * for you automatically.
408  *
409  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
410  * &amp;lt; and &amp;gt,
411  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
412  *
413  * If the string does not contain an ampersand, the original will be returned.
414  *
415  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
416  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
417  * (decode, and throw a DecodeException in the event of an error).
418  *
419  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
420  *
421  * Params:
422  *      s = The string to be decoded
423  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
424  *
425  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
426  *
427  * Returns: The decoded string
428  *
429  * Example:
430  * --------------
431  * writefln(decode("a &gt; b")); // writes "a > b"
432  * --------------
433  */
434 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
435 {
436     import std.algorithm.searching : startsWith;
437 
438     if (mode == DecodeMode.NONE) return s;
439 
440     string buffer;
441     foreach (ref i; 0 .. s.length)
442     {
443         char c = s[i];
444         if (c != '&')
445         {
446             if (buffer.length != 0) buffer ~= c;
447         }
448         else
449         {
450             if (buffer.length == 0)
451             {
452                 buffer = s[0 .. i].dup;
453             }
454             if (startsWith(s[i..$],"&#"))
455             {
456                 try
457                 {
458                     dchar d;
459                     string t = s[i..$];
460                     checkCharRef(t, d);
461                     char[4] temp;
462                     import std.utf : encode;
463                     buffer ~= temp[0 .. encode(temp, d)];
464                     i = s.length - t.length - 1;
465                 }
466                 catch (Err e)
467                 {
468                     if (mode == DecodeMode.STRICT)
469                         throw new DecodeException("Unescaped &");
470                     buffer ~= '&';
471                 }
472             }
473             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
474             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
475             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
476             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
477             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
478             else
479             {
480                 if (mode == DecodeMode.STRICT)
481                     throw new DecodeException("Unescaped &");
482                 buffer ~= '&';
483             }
484         }
485     }
486     return (buffer.length == 0) ? s : buffer;
487 }
488 
489 @safe pure unittest
490 {
491     void assertNot(string s) pure
492     {
493         bool b = false;
494         try { decode(s,DecodeMode.STRICT); }
495         catch (DecodeException e) { b = true; }
496         assert(b,s);
497     }
498 
499     // Assert that things that should work, do
500     auto s = "hello";
501     assert(decode(s,                DecodeMode.STRICT) is s);
502     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
503     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
504     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
505     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
506     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
507     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
508     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
509     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
510     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
511     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
512     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
513     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
514     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
515 
516     // Assert that things that shouldn't work, don't
517     assertNot("cat & dog");
518     assertNot("a &gt b");
519     assertNot("&#;");
520     assertNot("&#x;");
521     assertNot("&#2G;");
522     assertNot("&#x2G;");
523 }
524 
525 /**
526  * Class representing an XML document.
527  *
528  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
529  *
530  */
531 class Document : Element
532 {
533     /**
534      * Contains all text which occurs before the root element.
535      * Defaults to &lt;?xml version="1.0"?&gt;
536      */
537     string prolog = "<?xml version=\"1.0\"?>";
538     /**
539      * Contains all text which occurs after the root element.
540      * Defaults to the empty string
541      */
542     string epilog;
543 
544     /**
545      * Constructs a Document by parsing XML text.
546      *
547      * This function creates a complete DOM (Document Object Model) tree.
548      *
549      * The input to this function MUST be valid XML.
550      * This is enforced by DocumentParser's in contract.
551      *
552      * Params:
553      *      s = the complete XML text.
554      */
555     this(string s)
556     in
557     {
558         assert(s.length != 0);
559     }
560     do
561     {
562         auto xml = new DocumentParser(s);
563         string tagString = xml.tag.tagString;
564 
565         this(xml.tag);
566         prolog = s[0 .. tagString.ptr - s.ptr];
567         parse(xml);
568         epilog = *xml.s;
569     }
570 
571     /**
572      * Constructs a Document from a Tag.
573      *
574      * Params:
575      *      tag = the start tag of the document.
576      */
577     this(const(Tag) tag)
578     {
579         super(tag);
580     }
581 
582     const
583     {
584         /**
585          * Compares two Documents for equality
586          *
587          * Example:
588          * --------------
589          * Document d1,d2;
590          * if (d1 == d2) { }
591          * --------------
592          */
593         override bool opEquals(scope const Object o) const
594         {
595             const doc = toType!(const Document)(o);
596             return prolog == doc.prolog
597                 && (cast(const) this).Element.opEquals(cast(const) doc)
598                 && epilog == doc.epilog;
599         }
600 
601         /**
602          * Compares two Documents
603          *
604          * You should rarely need to call this function. It exists so that
605          * Documents can be used as associative array keys.
606          *
607          * Example:
608          * --------------
609          * Document d1,d2;
610          * if (d1 < d2) { }
611          * --------------
612          */
613         override int opCmp(scope const Object o) scope const
614         {
615             const doc = toType!(const Document)(o);
616             if (prolog != doc.prolog)
617                 return prolog < doc.prolog ? -1 : 1;
618             if (int cmp = this.Element.opCmp(doc))
619                 return cmp;
620             if (epilog != doc.epilog)
621                 return epilog < doc.epilog ? -1 : 1;
622             return 0;
623         }
624 
625         /**
626          * Returns the hash of a Document
627          *
628          * You should rarely need to call this function. It exists so that
629          * Documents can be used as associative array keys.
630          */
631         override size_t toHash() scope const @trusted
632         {
633             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
634         }
635 
636         /**
637          * Returns the string representation of a Document. (That is, the
638          * complete XML of a document).
639          */
640         override string toString() scope const @safe
641         {
642             return prolog ~ super.toString() ~ epilog;
643         }
644     }
645 }
646 
647 @system unittest
648 {
649     // https://issues.dlang.org/show_bug.cgi?id=14966
650     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
651 
652     auto a = new Document(xml);
653     auto b = new Document(xml);
654     assert(a == b);
655     assert(!(a < b));
656     int[Document] aa;
657     aa[a] = 1;
658     assert(aa[b] == 1);
659 
660     b ~= new Element("b");
661     assert(a < b);
662     assert(b > a);
663 }
664 
665 /**
666  * Class representing an XML element.
667  *
668  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
669  */
670 class Element : Item
671 {
672     Tag tag; /// The start tag of the element
673     Item[] items; /// The element's items
674     Text[] texts; /// The element's text items
675     CData[] cdatas; /// The element's CData items
676     Comment[] comments; /// The element's comments
677     ProcessingInstruction[] pis; /// The element's processing instructions
678     Element[] elements; /// The element's child elements
679 
680     /**
681      * Constructs an Element given a name and a string to be used as a Text
682      * interior.
683      *
684      * Params:
685      *      name = the name of the element.
686      *      interior = (optional) the string interior.
687      *
688      * Example:
689      * -------------------------------------------------------
690      * auto element = new Element("title","Serenity")
691      *     // constructs the element <title>Serenity</title>
692      * -------------------------------------------------------
693      */
694     this(string name, string interior=null) @safe pure
695     {
696         this(new Tag(name));
697         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
698     }
699 
700     /**
701      * Constructs an Element from a Tag.
702      *
703      * Params:
704      *      tag_ = the start or empty tag of the element.
705      */
706     this(const(Tag) tag_) @safe pure
707     {
708         this.tag = new Tag(tag_.name);
709         tag.type = TagType.EMPTY;
710         foreach (k,v;tag_.attr) tag.attr[k] = v;
711         tag.tagString = tag_.tagString;
712     }
713 
714     /**
715      * Append a text item to the interior of this element
716      *
717      * Params:
718      *      item = the item you wish to append.
719      *
720      * Example:
721      * --------------
722      * Element element;
723      * element ~= new Text("hello");
724      * --------------
725      */
726     void opOpAssign(string op)(Text item) @safe pure
727         if (op == "~")
728     {
729         texts ~= item;
730         appendItem(item);
731     }
732 
733     /**
734      * Append a CData item to the interior of this element
735      *
736      * Params:
737      *      item = the item you wish to append.
738      *
739      * Example:
740      * --------------
741      * Element element;
742      * element ~= new CData("hello");
743      * --------------
744      */
745     void opOpAssign(string op)(CData item) @safe pure
746         if (op == "~")
747     {
748         cdatas ~= item;
749         appendItem(item);
750     }
751 
752     /**
753      * Append a comment to the interior of this element
754      *
755      * Params:
756      *      item = the item you wish to append.
757      *
758      * Example:
759      * --------------
760      * Element element;
761      * element ~= new Comment("hello");
762      * --------------
763      */
764     void opOpAssign(string op)(Comment item) @safe pure
765         if (op == "~")
766     {
767         comments ~= item;
768         appendItem(item);
769     }
770 
771     /**
772      * Append a processing instruction to the interior of this element
773      *
774      * Params:
775      *      item = the item you wish to append.
776      *
777      * Example:
778      * --------------
779      * Element element;
780      * element ~= new ProcessingInstruction("hello");
781      * --------------
782      */
783     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
784         if (op == "~")
785     {
786         pis ~= item;
787         appendItem(item);
788     }
789 
790     /**
791      * Append a complete element to the interior of this element
792      *
793      * Params:
794      *      item = the item you wish to append.
795      *
796      * Example:
797      * --------------
798      * Element element;
799      * Element other = new Element("br");
800      * element ~= other;
801      *    // appends element representing <br />
802      * --------------
803      */
804     void opOpAssign(string op)(Element item) @safe pure
805         if (op == "~")
806     {
807         elements ~= item;
808         appendItem(item);
809     }
810 
811     private void appendItem(Item item) @safe pure
812     {
813         items ~= item;
814         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
815             tag.type = TagType.START;
816     }
817 
818     private void parse(ElementParser xml)
819     {
820         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
821         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
822         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
823         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
824 
825         xml.onStartTag[null] = (ElementParser xml)
826         {
827             auto e = new Element(xml.tag);
828             e.parse(xml);
829             opOpAssign!("~")(e);
830         };
831 
832         xml.parse();
833     }
834 
835     /**
836      * Compares two Elements for equality
837      *
838      * Example:
839      * --------------
840      * Element e1,e2;
841      * if (e1 == e2) { }
842      * --------------
843      */
844     override bool opEquals(scope const Object o) const
845     {
846         const element = toType!(const Element)(o);
847         immutable len = items.length;
848         if (len != element.items.length) return false;
849         foreach (i; 0 .. len)
850         {
851             if (!items[i].opEquals(element.items[i])) return false;
852         }
853         return true;
854     }
855 
856     /**
857      * Compares two Elements
858      *
859      * You should rarely need to call this function. It exists so that Elements
860      * can be used as associative array keys.
861      *
862      * Example:
863      * --------------
864      * Element e1,e2;
865      * if (e1 < e2) { }
866      * --------------
867      */
868     override int opCmp(scope const Object o) @safe const
869     {
870         const element = toType!(const Element)(o);
871         for (uint i=0; ; ++i)
872         {
873             if (i == items.length && i == element.items.length) return 0;
874             if (i == items.length) return -1;
875             if (i == element.items.length) return 1;
876             if (!items[i].opEquals(element.items[i]))
877                 return items[i].opCmp(element.items[i]);
878         }
879     }
880 
881     /**
882      * Returns the hash of an Element
883      *
884      * You should rarely need to call this function. It exists so that Elements
885      * can be used as associative array keys.
886      */
887     override size_t toHash() scope const @safe
888     {
889         size_t hash = tag.toHash();
890         foreach (item;items) hash += item.toHash();
891         return hash;
892     }
893 
894     const
895     {
896         /**
897          * Returns the decoded interior of an element.
898          *
899          * The element is assumed to contain text <i>only</i>. So, for
900          * example, given XML such as "&lt;title&gt;Good &amp;amp;
901          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
902          *
903          * Params:
904          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
905          *
906          * Throws: DecodeException if decode fails
907          */
908         string text(DecodeMode mode=DecodeMode.LOOSE)
909         {
910             string buffer;
911             foreach (item;items)
912             {
913                 Text t = cast(Text) item;
914                 if (t is null) throw new DecodeException(item.toString());
915                 buffer ~= decode(t.toString(),mode);
916             }
917             return buffer;
918         }
919 
920         /**
921          * Returns an indented string representation of this item
922          *
923          * Params:
924          *      indent = (optional) number of spaces by which to indent this
925          *          element. Defaults to 2.
926          */
927         override string[] pretty(uint indent=2) scope
928         {
929             import std.algorithm.searching : count;
930             import std..string : rightJustify;
931 
932             if (isEmptyXML) return [ tag.toEmptyString() ];
933 
934             if (items.length == 1)
935             {
936                 auto t = cast(const(Text))(items[0]);
937                 if (t !is null)
938                 {
939                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
940                 }
941             }
942 
943             string[] a = [ tag.toStartString() ];
944             foreach (item;items)
945             {
946                 string[] b = item.pretty(indent);
947                 foreach (s;b)
948                 {
949                     a ~= rightJustify(s,count(s) + indent);
950                 }
951             }
952             a ~= tag.toEndString();
953             return a;
954         }
955 
956         /**
957          * Returns the string representation of an Element
958          *
959          * Example:
960          * --------------
961          * auto element = new Element("br");
962          * writefln(element.toString()); // writes "<br />"
963          * --------------
964          */
965         override string toString() scope @safe
966         {
967             if (isEmptyXML) return tag.toEmptyString();
968 
969             string buffer = tag.toStartString();
970             foreach (item;items) { buffer ~= item.toString(); }
971             buffer ~= tag.toEndString();
972             return buffer;
973         }
974 
975         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
976     }
977 }
978 
979 /**
980  * Tag types.
981  *
982  * $(DDOC_ENUM_MEMBERS START) Used for start tags
983  * $(DDOC_ENUM_MEMBERS END) Used for end tags
984  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
985  *
986  */
987 enum TagType { START, END, EMPTY }
988 
989 /**
990  * Class representing an XML tag.
991  *
992  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
993  *
994  * The class invariant guarantees
995  * <ul>
996  * <li> that $(B type) is a valid enum TagType value</li>
997  * <li> that $(B name) consists of valid characters</li>
998  * <li> that each attribute name consists of valid characters</li>
999  * </ul>
1000  */
1001 class Tag
1002 {
1003     TagType type = TagType.START;   /// Type of tag
1004     string name;                    /// Tag name
1005     string[string] attr;            /// Associative array of attributes
1006     private string tagString;
1007 
1008     invariant()
1009     {
1010         string s;
1011         string t;
1012 
1013         assert(type == TagType.START
1014             || type == TagType.END
1015             || type == TagType.EMPTY);
1016 
1017         s = name;
1018         try { checkName(s,t); }
1019         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1020 
1021         foreach (k,v;attr)
1022         {
1023             s = k;
1024             try { checkName(s,t); }
1025             catch (Err e)
1026                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1027         }
1028     }
1029 
1030     /**
1031      * Constructs an instance of Tag with a specified name and type
1032      *
1033      * The constructor does not initialize the attributes. To initialize the
1034      * attributes, you access the $(B attr) member variable.
1035      *
1036      * Params:
1037      *      name = the Tag's name
1038      *      type = (optional) the Tag's type. If omitted, defaults to
1039      *          TagType.START.
1040      *
1041      * Example:
1042      * --------------
1043      * auto tag = new Tag("img",Tag.EMPTY);
1044      * tag.attr["src"] = "http://example.com/example.jpg";
1045      * --------------
1046      */
1047     this(string name, TagType type=TagType.START) @safe pure
1048     {
1049         this.name = name;
1050         this.type = type;
1051     }
1052 
1053     /* Private constructor (so don't ddoc this!)
1054      *
1055      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1056      *
1057      * The string is passed by reference, and is advanced over all characters
1058      * consumed.
1059      *
1060      * The second parameter is a dummy parameter only, required solely to
1061      * distinguish this constructor from the public one.
1062      */
1063     private this(ref string s, bool dummy) @safe pure
1064     {
1065         import std.algorithm.searching : countUntil;
1066         import std.ascii : isWhite;
1067         import std.utf : byCodeUnit;
1068 
1069         tagString = s;
1070         try
1071         {
1072             reqc(s,'<');
1073             if (optc(s,'/')) type = TagType.END;
1074             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1075             name = s[0 .. i];
1076             s = s[i .. $];
1077 
1078             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1079             s = s[i .. $];
1080 
1081             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1082             {
1083                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1084                 string key = s[0 .. i];
1085                 s = s[i .. $];
1086 
1087                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1088                 s = s[i .. $];
1089                 reqc(s,'=');
1090                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1091                 s = s[i .. $];
1092 
1093                 immutable char quote = requireOneOf(s,"'\"");
1094                 i = s.byCodeUnit.countUntil(quote);
1095                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1096                 s = s[i .. $];
1097                 reqc(s,quote);
1098 
1099                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1100                 s = s[i .. $];
1101                 attr[key] = val;
1102             }
1103             if (optc(s,'/'))
1104             {
1105                 if (type == TagType.END) throw new TagException("");
1106                 type = TagType.EMPTY;
1107             }
1108             reqc(s,'>');
1109             tagString.length = tagString.length - s.length;
1110         }
1111         catch (XMLException e)
1112         {
1113             tagString.length = tagString.length - s.length;
1114             throw new TagException(tagString);
1115         }
1116     }
1117 
1118     const
1119     {
1120         /**
1121          * Compares two Tags for equality
1122          *
1123          * You should rarely need to call this function. It exists so that Tags
1124          * can be used as associative array keys.
1125          *
1126          * Example:
1127          * --------------
1128          * Tag tag1,tag2
1129          * if (tag1 == tag2) { }
1130          * --------------
1131          */
1132         override bool opEquals(scope Object o)
1133         {
1134             const tag = toType!(const Tag)(o);
1135             return
1136                 (name != tag.name) ? false : (
1137                 (attr != tag.attr) ? false : (
1138                 (type != tag.type) ? false : (
1139             true )));
1140         }
1141 
1142         /**
1143          * Compares two Tags
1144          *
1145          * Example:
1146          * --------------
1147          * Tag tag1,tag2
1148          * if (tag1 < tag2) { }
1149          * --------------
1150          */
1151         override int opCmp(Object o)
1152         {
1153             const tag = toType!(const Tag)(o);
1154             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1155             return
1156                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1157                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1158                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1159             0 )));
1160         }
1161 
1162         /**
1163          * Returns the hash of a Tag
1164          *
1165          * You should rarely need to call this function. It exists so that Tags
1166          * can be used as associative array keys.
1167          */
1168         override size_t toHash()
1169         {
1170             return .hashOf(name);
1171         }
1172 
1173         /**
1174          * Returns the string representation of a Tag
1175          *
1176          * Example:
1177          * --------------
1178          * auto tag = new Tag("book",TagType.START);
1179          * writefln(tag.toString()); // writes "<book>"
1180          * --------------
1181          */
1182         override string toString() @safe
1183         {
1184             if (isEmpty) return toEmptyString();
1185             return (isEnd) ? toEndString() : toStartString();
1186         }
1187 
1188         private
1189         {
1190             string toNonEndString() @safe
1191             {
1192                 import std.format : format;
1193 
1194                 string s = "<" ~ name;
1195                 foreach (key,val;attr)
1196                     s ~= format(" %s=\"%s\"",key,encode(val));
1197                 return s;
1198             }
1199 
1200             string toStartString() @safe { return toNonEndString() ~ ">"; }
1201 
1202             string toEndString() @safe { return "</" ~ name ~ ">"; }
1203 
1204             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1205         }
1206 
1207         /**
1208          * Returns true if the Tag is a start tag
1209          *
1210          * Example:
1211          * --------------
1212          * if (tag.isStart) { }
1213          * --------------
1214          */
1215         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1216 
1217         /**
1218          * Returns true if the Tag is an end tag
1219          *
1220          * Example:
1221          * --------------
1222          * if (tag.isEnd) { }
1223          * --------------
1224          */
1225         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1226 
1227         /**
1228          * Returns true if the Tag is an empty tag
1229          *
1230          * Example:
1231          * --------------
1232          * if (tag.isEmpty) { }
1233          * --------------
1234          */
1235         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1236     }
1237 }
1238 
1239 /**
1240  * Class representing a comment
1241  */
1242 class Comment : Item
1243 {
1244     private string content;
1245 
1246     /**
1247      * Construct a comment
1248      *
1249      * Params:
1250      *      content = the body of the comment
1251      *
1252      * Throws: CommentException if the comment body is illegal (contains "--"
1253      * or exactly equals "-")
1254      *
1255      * Example:
1256      * --------------
1257      * auto item = new Comment("This is a comment");
1258      *    // constructs <!--This is a comment-->
1259      * --------------
1260      */
1261     this(string content) @safe pure
1262     {
1263         import std..string : indexOf;
1264 
1265         if (content == "-" || content.indexOf("--") != -1)
1266             throw new CommentException(content);
1267         this.content = content;
1268     }
1269 
1270     /**
1271      * Compares two comments for equality
1272      *
1273      * Example:
1274      * --------------
1275      * Comment item1,item2;
1276      * if (item1 == item2) { }
1277      * --------------
1278      */
1279     override bool opEquals(scope const Object o) const
1280     {
1281         const item = toType!(const Item)(o);
1282         const t = cast(const Comment) item;
1283         return t !is null && content == t.content;
1284     }
1285 
1286     /**
1287      * Compares two comments
1288      *
1289      * You should rarely need to call this function. It exists so that Comments
1290      * can be used as associative array keys.
1291      *
1292      * Example:
1293      * --------------
1294      * Comment item1,item2;
1295      * if (item1 < item2) { }
1296      * --------------
1297      */
1298     override int opCmp(scope const Object o) scope const
1299     {
1300         const item = toType!(const Item)(o);
1301         const t = cast(const Comment) item;
1302         return t !is null && (content != t.content
1303             ? (content < t.content ? -1 : 1 ) : 0 );
1304     }
1305 
1306     /**
1307      * Returns the hash of a Comment
1308      *
1309      * You should rarely need to call this function. It exists so that Comments
1310      * can be used as associative array keys.
1311      */
1312     override size_t toHash() scope const nothrow { return hash(content); }
1313 
1314     /**
1315      * Returns a string representation of this comment
1316      */
1317     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1318 
1319     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1320 }
1321 
1322 // https://issues.dlang.org/show_bug.cgi?id=16241
1323 @safe unittest
1324 {
1325     import std.exception : assertThrown;
1326     auto c = new Comment("==");
1327     assert(c.content == "==");
1328     assertThrown!CommentException(new Comment("--"));
1329 }
1330 
1331 /**
1332  * Class representing a Character Data section
1333  */
1334 class CData : Item
1335 {
1336     private string content;
1337 
1338     /**
1339      * Construct a character data section
1340      *
1341      * Params:
1342      *      content = the body of the character data segment
1343      *
1344      * Throws: CDataException if the segment body is illegal (contains "]]>")
1345      *
1346      * Example:
1347      * --------------
1348      * auto item = new CData("<b>hello</b>");
1349      *    // constructs <![CDATA[<b>hello</b>]]>
1350      * --------------
1351      */
1352     this(string content) @safe pure
1353     {
1354         import std..string : indexOf;
1355         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1356         this.content = content;
1357     }
1358 
1359     /**
1360      * Compares two CDatas for equality
1361      *
1362      * Example:
1363      * --------------
1364      * CData item1,item2;
1365      * if (item1 == item2) { }
1366      * --------------
1367      */
1368     override bool opEquals(scope const Object o) const
1369     {
1370         const item = toType!(const Item)(o);
1371         const t = cast(const CData) item;
1372         return t !is null && content == t.content;
1373     }
1374 
1375     /**
1376      * Compares two CDatas
1377      *
1378      * You should rarely need to call this function. It exists so that CDatas
1379      * can be used as associative array keys.
1380      *
1381      * Example:
1382      * --------------
1383      * CData item1,item2;
1384      * if (item1 < item2) { }
1385      * --------------
1386      */
1387     override int opCmp(scope const Object o) scope const
1388     {
1389         const item = toType!(const Item)(o);
1390         const t = cast(const CData) item;
1391         return t !is null && (content != t.content
1392             ? (content < t.content ? -1 : 1 ) : 0 );
1393     }
1394 
1395     /**
1396      * Returns the hash of a CData
1397      *
1398      * You should rarely need to call this function. It exists so that CDatas
1399      * can be used as associative array keys.
1400      */
1401     override size_t toHash() scope const nothrow { return hash(content); }
1402 
1403     /**
1404      * Returns a string representation of this CData section
1405      */
1406     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1407 
1408     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1409 }
1410 
1411 /**
1412  * Class representing a text (aka Parsed Character Data) section
1413  */
1414 class Text : Item
1415 {
1416     private string content;
1417 
1418     /**
1419      * Construct a text (aka PCData) section
1420      *
1421      * Params:
1422      *      content = the text. This function encodes the text before
1423      *      insertion, so it is safe to insert any text
1424      *
1425      * Example:
1426      * --------------
1427      * auto Text = new CData("a < b");
1428      *    // constructs a &lt; b
1429      * --------------
1430      */
1431     this(string content) @safe pure
1432     {
1433         this.content = encode(content);
1434     }
1435 
1436     /**
1437      * Compares two text sections for equality
1438      *
1439      * Example:
1440      * --------------
1441      * Text item1,item2;
1442      * if (item1 == item2) { }
1443      * --------------
1444      */
1445     override bool opEquals(scope const Object o) const
1446     {
1447         const item = toType!(const Item)(o);
1448         const t = cast(const Text) item;
1449         return t !is null && content == t.content;
1450     }
1451 
1452     /**
1453      * Compares two text sections
1454      *
1455      * You should rarely need to call this function. It exists so that Texts
1456      * can be used as associative array keys.
1457      *
1458      * Example:
1459      * --------------
1460      * Text item1,item2;
1461      * if (item1 < item2) { }
1462      * --------------
1463      */
1464     override int opCmp(scope const Object o) scope const
1465     {
1466         const item = toType!(const Item)(o);
1467         const t = cast(const Text) item;
1468         return t !is null
1469             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1470     }
1471 
1472     /**
1473      * Returns the hash of a text section
1474      *
1475      * You should rarely need to call this function. It exists so that Texts
1476      * can be used as associative array keys.
1477      */
1478     override size_t toHash() scope const nothrow { return hash(content); }
1479 
1480     /**
1481      * Returns a string representation of this Text section
1482      */
1483     override string toString() scope const @safe @nogc pure nothrow { return content; }
1484 
1485     /**
1486      * Returns true if the content is the empty string
1487      */
1488     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1489 }
1490 
1491 /**
1492  * Class representing an XML Instruction section
1493  */
1494 class XMLInstruction : Item
1495 {
1496     private string content;
1497 
1498     /**
1499      * Construct an XML Instruction section
1500      *
1501      * Params:
1502      *      content = the body of the instruction segment
1503      *
1504      * Throws: XIException if the segment body is illegal (contains ">")
1505      *
1506      * Example:
1507      * --------------
1508      * auto item = new XMLInstruction("ATTLIST");
1509      *    // constructs <!ATTLIST>
1510      * --------------
1511      */
1512     this(string content) @safe pure
1513     {
1514         import std..string : indexOf;
1515         if (content.indexOf(">") != -1) throw new XIException(content);
1516         this.content = content;
1517     }
1518 
1519     /**
1520      * Compares two XML instructions for equality
1521      *
1522      * Example:
1523      * --------------
1524      * XMLInstruction item1,item2;
1525      * if (item1 == item2) { }
1526      * --------------
1527      */
1528     override bool opEquals(scope const Object o) const
1529     {
1530         const item = toType!(const Item)(o);
1531         const t = cast(const XMLInstruction) item;
1532         return t !is null && content == t.content;
1533     }
1534 
1535     /**
1536      * Compares two XML instructions
1537      *
1538      * You should rarely need to call this function. It exists so that
1539      * XmlInstructions can be used as associative array keys.
1540      *
1541      * Example:
1542      * --------------
1543      * XMLInstruction item1,item2;
1544      * if (item1 < item2) { }
1545      * --------------
1546      */
1547     override int opCmp(scope const Object o) scope const
1548     {
1549         const item = toType!(const Item)(o);
1550         const t = cast(const XMLInstruction) item;
1551         return t !is null
1552             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1553     }
1554 
1555     /**
1556      * Returns the hash of an XMLInstruction
1557      *
1558      * You should rarely need to call this function. It exists so that
1559      * XmlInstructions can be used as associative array keys.
1560      */
1561     override size_t toHash() scope const nothrow { return hash(content); }
1562 
1563     /**
1564      * Returns a string representation of this XmlInstruction
1565      */
1566     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1567 
1568     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1569 }
1570 
1571 /**
1572  * Class representing a Processing Instruction section
1573  */
1574 class ProcessingInstruction : Item
1575 {
1576     private string content;
1577 
1578     /**
1579      * Construct a Processing Instruction section
1580      *
1581      * Params:
1582      *      content = the body of the instruction segment
1583      *
1584      * Throws: PIException if the segment body is illegal (contains "?>")
1585      *
1586      * Example:
1587      * --------------
1588      * auto item = new ProcessingInstruction("php");
1589      *    // constructs <?php?>
1590      * --------------
1591      */
1592     this(string content) @safe pure
1593     {
1594         import std..string : indexOf;
1595         if (content.indexOf("?>") != -1) throw new PIException(content);
1596         this.content = content;
1597     }
1598 
1599     /**
1600      * Compares two processing instructions for equality
1601      *
1602      * Example:
1603      * --------------
1604      * ProcessingInstruction item1,item2;
1605      * if (item1 == item2) { }
1606      * --------------
1607      */
1608     override bool opEquals(scope const Object o) const
1609     {
1610         const item = toType!(const Item)(o);
1611         const t = cast(const ProcessingInstruction) item;
1612         return t !is null && content == t.content;
1613     }
1614 
1615     /**
1616      * Compares two processing instructions
1617      *
1618      * You should rarely need to call this function. It exists so that
1619      * ProcessingInstructions can be used as associative array keys.
1620      *
1621      * Example:
1622      * --------------
1623      * ProcessingInstruction item1,item2;
1624      * if (item1 < item2) { }
1625      * --------------
1626      */
1627     override int opCmp(scope const Object o) scope const
1628     {
1629         const item = toType!(const Item)(o);
1630         const t = cast(const ProcessingInstruction) item;
1631         return t !is null
1632             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1633     }
1634 
1635     /**
1636      * Returns the hash of a ProcessingInstruction
1637      *
1638      * You should rarely need to call this function. It exists so that
1639      * ProcessingInstructions can be used as associative array keys.
1640      */
1641     override size_t toHash() scope const nothrow { return hash(content); }
1642 
1643     /**
1644      * Returns a string representation of this ProcessingInstruction
1645      */
1646     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1647 
1648     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1649 }
1650 
1651 /**
1652  * Abstract base class for XML items
1653  */
1654 abstract class Item
1655 {
1656     /// Compares with another Item of same type for equality
1657     abstract override bool opEquals(scope const Object o) @safe const;
1658 
1659     /// Compares with another Item of same type
1660     abstract override int opCmp(scope const Object o) @safe const;
1661 
1662     /// Returns the hash of this item
1663     abstract override size_t toHash() @safe scope const;
1664 
1665     /// Returns a string representation of this item
1666     abstract override string toString() @safe scope const;
1667 
1668     /**
1669      * Returns an indented string representation of this item
1670      *
1671      * Params:
1672      *      indent = number of spaces by which to indent child elements
1673      */
1674     string[] pretty(uint indent) @safe scope const
1675     {
1676         import std..string : strip;
1677         string s = strip(toString());
1678         return s.length == 0 ? [] : [ s ];
1679     }
1680 
1681     /// Returns true if the item represents empty XML text
1682     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1683 }
1684 
1685 /**
1686  * Class for parsing an XML Document.
1687  *
1688  * This is a subclass of ElementParser. Most of the useful functions are
1689  * documented there.
1690  *
1691  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1692  *
1693  * Bugs:
1694  *      Currently only supports UTF documents.
1695  *
1696  *      If there is an encoding attribute in the prolog, it is ignored.
1697  *
1698  */
1699 class DocumentParser : ElementParser
1700 {
1701     string xmlText;
1702 
1703     /**
1704      * Constructs a DocumentParser.
1705      *
1706      * The input to this function MUST be valid XML.
1707      * This is enforced by the function's in contract.
1708      *
1709      * Params:
1710      *      xmlText_ = the entire XML document as text
1711      *
1712      */
1713     this(string xmlText_)
1714     in
1715     {
1716         assert(xmlText_.length != 0);
1717         try
1718         {
1719             // Confirm that the input is valid XML
1720             check(xmlText_);
1721         }
1722         catch (CheckException e)
1723         {
1724             // And if it's not, tell the user why not
1725             assert(false, "\n" ~ e.toString());
1726         }
1727     }
1728     do
1729     {
1730         xmlText = xmlText_;
1731         s = &xmlText;
1732         super();    // Initialize everything
1733         parse();    // Parse through the root tag (but not beyond)
1734     }
1735 }
1736 
1737 @system unittest
1738 {
1739     auto doc = new Document("<root><child><grandchild/></child></root>");
1740     assert(doc.elements.length == 1);
1741     assert(doc.elements[0].tag.name == "child");
1742     assert(doc.items == doc.elements);
1743 }
1744 
1745 /**
1746  * Class for parsing an XML element.
1747  *
1748  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1749  *
1750  * Note that you cannot construct instances of this class directly. You can
1751  * construct a DocumentParser (which is a subclass of ElementParser), but
1752  * otherwise, Instances of ElementParser will be created for you by the
1753  * library, and passed your way via onStartTag handlers.
1754  *
1755  */
1756 class ElementParser
1757 {
1758     alias Handler = void delegate(string);
1759     alias ElementHandler = void delegate(in Element element);
1760     alias ParserHandler = void delegate(ElementParser parser);
1761 
1762     private
1763     {
1764         Tag tag_;
1765         string elementStart;
1766         string* s;
1767 
1768         Handler commentHandler = null;
1769         Handler cdataHandler = null;
1770         Handler xiHandler = null;
1771         Handler piHandler = null;
1772         Handler rawTextHandler = null;
1773         Handler textHandler = null;
1774 
1775         // Private constructor for start tags
1776         this(ElementParser parent) @safe @nogc pure nothrow
1777         {
1778             s = parent.s;
1779             this();
1780             tag_ = parent.tag_;
1781         }
1782 
1783         // Private constructor for empty tags
1784         this(Tag tag, string* t) @safe @nogc pure nothrow
1785         {
1786             s = t;
1787             this();
1788             tag_ = tag;
1789         }
1790     }
1791 
1792     /**
1793      * The Tag at the start of the element being parsed. You can read this to
1794      * determine the tag's name and attributes.
1795      */
1796     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1797 
1798     /**
1799      * Register a handler which will be called whenever a start tag is
1800      * encountered which matches the specified name. You can also pass null as
1801      * the name, in which case the handler will be called for any unmatched
1802      * start tag.
1803      *
1804      * Example:
1805      * --------------
1806      * // Call this function whenever a <podcast> start tag is encountered
1807      * onStartTag["podcast"] = (ElementParser xml)
1808      * {
1809      *     // Your code here
1810      *     //
1811      *     // This is a a closure, so code here may reference
1812      *     // variables which are outside of this scope
1813      * };
1814      *
1815      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1816      * // start tag is encountered
1817      * onStartTag["episode"] = &myEpisodeStartHandler;
1818      *
1819      * // call delegate dg for all other start tags
1820      * onStartTag[null] = dg;
1821      * --------------
1822      *
1823      * This library will supply your function with a new instance of
1824      * ElementHandler, which may be used to parse inside the element whose
1825      * start tag was just found, or to identify the tag attributes of the
1826      * element, etc.
1827      *
1828      * Note that your function will be called for both start tags and empty
1829      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1830      * and &lt;br/&gt;.
1831      */
1832     ParserHandler[string] onStartTag;
1833 
1834     /**
1835      * Register a handler which will be called whenever an end tag is
1836      * encountered which matches the specified name. You can also pass null as
1837      * the name, in which case the handler will be called for any unmatched
1838      * end tag.
1839      *
1840      * Example:
1841      * --------------
1842      * // Call this function whenever a </podcast> end tag is encountered
1843      * onEndTag["podcast"] = (in Element e)
1844      * {
1845      *     // Your code here
1846      *     //
1847      *     // This is a a closure, so code here may reference
1848      *     // variables which are outside of this scope
1849      * };
1850      *
1851      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1852      * // end tag is encountered
1853      * onEndTag["episode"] = &myEpisodeEndHandler;
1854      *
1855      * // call delegate dg for all other end tags
1856      * onEndTag[null] = dg;
1857      * --------------
1858      *
1859      * Note that your function will be called for both start tags and empty
1860      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1861      * and &lt;br/&gt;.
1862      */
1863     ElementHandler[string] onEndTag;
1864 
1865     protected this() @safe @nogc pure nothrow
1866     {
1867         elementStart = *s;
1868     }
1869 
1870     /**
1871      * Register a handler which will be called whenever text is encountered.
1872      *
1873      * Example:
1874      * --------------
1875      * // Call this function whenever text is encountered
1876      * onText = (string s)
1877      * {
1878      *     // Your code here
1879      *
1880      *     // The passed parameter s will have been decoded by the time you see
1881      *     // it, and so may contain any character.
1882      *     //
1883      *     // This is a a closure, so code here may reference
1884      *     // variables which are outside of this scope
1885      * };
1886      * --------------
1887      */
1888     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1889 
1890     /**
1891      * Register an alternative handler which will be called whenever text
1892      * is encountered. This differs from onText in that onText will decode
1893      * the text, whereas onTextRaw will not. This allows you to make design
1894      * choices, since onText will be more accurate, but slower, while
1895      * onTextRaw will be faster, but less accurate. Of course, you can
1896      * still call decode() within your handler, if you want, but you'd
1897      * probably want to use onTextRaw only in circumstances where you
1898      * know that decoding is unnecessary.
1899      *
1900      * Example:
1901      * --------------
1902      * // Call this function whenever text is encountered
1903      * onText = (string s)
1904      * {
1905      *     // Your code here
1906      *
1907      *     // The passed parameter s will NOT have been decoded.
1908      *     //
1909      *     // This is a a closure, so code here may reference
1910      *     // variables which are outside of this scope
1911      * };
1912      * --------------
1913      */
1914     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1915 
1916     /**
1917      * Register a handler which will be called whenever a character data
1918      * segment is encountered.
1919      *
1920      * Example:
1921      * --------------
1922      * // Call this function whenever a CData section is encountered
1923      * onCData = (string s)
1924      * {
1925      *     // Your code here
1926      *
1927      *     // The passed parameter s does not include the opening <![CDATA[
1928      *     // nor closing ]]>
1929      *     //
1930      *     // This is a a closure, so code here may reference
1931      *     // variables which are outside of this scope
1932      * };
1933      * --------------
1934      */
1935     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1936 
1937     /**
1938      * Register a handler which will be called whenever a comment is
1939      * encountered.
1940      *
1941      * Example:
1942      * --------------
1943      * // Call this function whenever a comment is encountered
1944      * onComment = (string s)
1945      * {
1946      *     // Your code here
1947      *
1948      *     // The passed parameter s does not include the opening <!-- nor
1949      *     // closing -->
1950      *     //
1951      *     // This is a a closure, so code here may reference
1952      *     // variables which are outside of this scope
1953      * };
1954      * --------------
1955      */
1956     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1957 
1958     /**
1959      * Register a handler which will be called whenever a processing
1960      * instruction is encountered.
1961      *
1962      * Example:
1963      * --------------
1964      * // Call this function whenever a processing instruction is encountered
1965      * onPI = (string s)
1966      * {
1967      *     // Your code here
1968      *
1969      *     // The passed parameter s does not include the opening <? nor
1970      *     // closing ?>
1971      *     //
1972      *     // This is a a closure, so code here may reference
1973      *     // variables which are outside of this scope
1974      * };
1975      * --------------
1976      */
1977     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1978 
1979     /**
1980      * Register a handler which will be called whenever an XML instruction is
1981      * encountered.
1982      *
1983      * Example:
1984      * --------------
1985      * // Call this function whenever an XML instruction is encountered
1986      * // (Note: XML instructions may only occur preceding the root tag of a
1987      * // document).
1988      * onPI = (string s)
1989      * {
1990      *     // Your code here
1991      *
1992      *     // The passed parameter s does not include the opening <! nor
1993      *     // closing >
1994      *     //
1995      *     // This is a a closure, so code here may reference
1996      *     // variables which are outside of this scope
1997      * };
1998      * --------------
1999      */
2000     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2001 
2002     /**
2003      * Parse an XML element.
2004      *
2005      * Parsing will continue until the end of the current element. Any items
2006      * encountered for which a handler has been registered will invoke that
2007      * handler.
2008      *
2009      * Throws: various kinds of XMLException
2010      */
2011     void parse()
2012     {
2013         import std.algorithm.searching : startsWith;
2014         import std..string : indexOf;
2015 
2016         string t;
2017         const Tag root = tag_;
2018         Tag[string] startTags;
2019         if (tag_ !is null) startTags[tag_.name] = tag_;
2020 
2021         while (s.length != 0)
2022         {
2023             if (startsWith(*s,"<!--"))
2024             {
2025                 chop(*s,4);
2026                 t = chop(*s,indexOf(*s,"-->"));
2027                 if (commentHandler.funcptr !is null) commentHandler(t);
2028                 chop(*s,3);
2029             }
2030             else if (startsWith(*s,"<![CDATA["))
2031             {
2032                 chop(*s,9);
2033                 t = chop(*s,indexOf(*s,"]]>"));
2034                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2035                 chop(*s,3);
2036             }
2037             else if (startsWith(*s,"<!"))
2038             {
2039                 chop(*s,2);
2040                 t = chop(*s,indexOf(*s,">"));
2041                 if (xiHandler.funcptr !is null) xiHandler(t);
2042                 chop(*s,1);
2043             }
2044             else if (startsWith(*s,"<?"))
2045             {
2046                 chop(*s,2);
2047                 t = chop(*s,indexOf(*s,"?>"));
2048                 if (piHandler.funcptr !is null) piHandler(t);
2049                 chop(*s,2);
2050             }
2051             else if (startsWith(*s,"<"))
2052             {
2053                 tag_ = new Tag(*s,true);
2054                 if (root is null)
2055                     return; // Return to constructor of derived class
2056 
2057                 if (tag_.isStart)
2058                 {
2059                     startTags[tag_.name] = tag_;
2060 
2061                     auto parser = new ElementParser(this);
2062 
2063                     auto handler = tag_.name in onStartTag;
2064                     if (handler !is null) (*handler)(parser);
2065                     else
2066                     {
2067                         handler = null in onStartTag;
2068                         if (handler !is null) (*handler)(parser);
2069                     }
2070                 }
2071                 else if (tag_.isEnd)
2072                 {
2073                     const startTag = startTags[tag_.name];
2074                     string text;
2075 
2076                     if (startTag.tagString.length == 0)
2077                         assert(0);
2078 
2079                     immutable(char)* p = startTag.tagString.ptr
2080                         + startTag.tagString.length;
2081                     immutable(char)* q = &tag_.tagString[0];
2082                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2083 
2084                     auto element = new Element(startTag);
2085                     if (text.length != 0) element ~= new Text(text);
2086 
2087                     auto handler = tag_.name in onEndTag;
2088                     if (handler !is null) (*handler)(element);
2089                     else
2090                     {
2091                         handler = null in onEndTag;
2092                         if (handler !is null) (*handler)(element);
2093                     }
2094 
2095                     if (tag_.name == root.name) return;
2096                 }
2097                 else if (tag_.isEmpty)
2098                 {
2099                     Tag startTag = new Tag(tag_.name);
2100 
2101                     // FIX by hed010gy
2102                     // https://issues.dlang.org/show_bug.cgi?id=2979
2103                     if (tag_.attr.length > 0)
2104                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2105                     // END FIX
2106 
2107                     // Handle the pretend start tag
2108                     string s2;
2109                     auto parser = new ElementParser(startTag,&s2);
2110                     auto handler1 = startTag.name in onStartTag;
2111                     if (handler1 !is null) (*handler1)(parser);
2112                     else
2113                     {
2114                         handler1 = null in onStartTag;
2115                         if (handler1 !is null) (*handler1)(parser);
2116                     }
2117 
2118                     // Handle the pretend end tag
2119                     auto element = new Element(startTag);
2120                     auto handler2 = tag_.name in onEndTag;
2121                     if (handler2 !is null) (*handler2)(element);
2122                     else
2123                     {
2124                         handler2 = null in onEndTag;
2125                         if (handler2 !is null) (*handler2)(element);
2126                     }
2127                 }
2128             }
2129             else
2130             {
2131                 t = chop(*s,indexOf(*s,"<"));
2132                 if (rawTextHandler.funcptr !is null)
2133                     rawTextHandler(t);
2134                 else if (textHandler.funcptr !is null)
2135                     textHandler(decode(t,DecodeMode.LOOSE));
2136             }
2137         }
2138     }
2139 
2140     /**
2141      * Returns that part of the element which has already been parsed
2142      */
2143     override string toString() const @nogc @safe pure nothrow
2144     {
2145         assert(elementStart.length >= s.length);
2146         return elementStart[0 .. elementStart.length - s.length];
2147     }
2148 
2149 }
2150 
2151 private
2152 {
2153     template Check(string msg)
2154     {
2155         string old = s;
2156 
2157         void fail() @safe pure
2158         {
2159             s = old;
2160             throw new Err(s,msg);
2161         }
2162 
2163         void fail(Err e) @safe pure
2164         {
2165             s = old;
2166             throw new Err(s,msg,e);
2167         }
2168 
2169         void fail(string msg2) @safe pure
2170         {
2171             fail(new Err(s,msg2));
2172         }
2173     }
2174 
2175     void checkMisc(ref string s) @safe pure // rule 27
2176     {
2177         import std.algorithm.searching : startsWith;
2178 
2179         mixin Check!("Misc");
2180 
2181         try
2182         {
2183                  if (s.startsWith("<!--")) { checkComment(s); }
2184             else if (s.startsWith("<?"))   { checkPI(s); }
2185             else                           { checkSpace(s); }
2186         }
2187         catch (Err e) { fail(e); }
2188     }
2189 
2190     void checkDocument(ref string s) @safe pure // rule 1
2191     {
2192         mixin Check!("Document");
2193         try
2194         {
2195             checkProlog(s);
2196             checkElement(s);
2197             star!(checkMisc)(s);
2198         }
2199         catch (Err e) { fail(e); }
2200     }
2201 
2202     void checkChars(ref string s) @safe pure // rule 2
2203     {
2204         // TO DO - Fix std.utf stride and decode functions, then use those
2205         // instead
2206         import std.format : format;
2207 
2208         mixin Check!("Chars");
2209 
2210         dchar c;
2211         ptrdiff_t n = -1;
2212         // 'i' must not be smaller than size_t because size_t is used internally in
2213         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2214         foreach (size_t i, dchar d; s)
2215         {
2216             if (!isChar(d))
2217             {
2218                 c = d;
2219                 n = i;
2220                 break;
2221             }
2222         }
2223         if (n != -1)
2224         {
2225             s = s[n..$];
2226             fail(format("invalid character: U+%04X",c));
2227         }
2228     }
2229 
2230     void checkSpace(ref string s) @safe pure // rule 3
2231     {
2232         import std.algorithm.searching : countUntil;
2233         import std.ascii : isWhite;
2234         import std.utf : byCodeUnit;
2235 
2236         mixin Check!("Whitespace");
2237         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2238         if (i == -1 && s.length > 0 && isWhite(s[0]))
2239             s = s[$ .. $];
2240         else if (i > -1)
2241             s = s[i .. $];
2242         if (s is old) fail();
2243     }
2244 
2245     void checkName(ref string s, out string name) @safe pure // rule 5
2246     {
2247         mixin Check!("Name");
2248 
2249         if (s.length == 0) fail();
2250         ptrdiff_t n;
2251         // 'i' must not be smaller than size_t because size_t is used internally in
2252         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2253         foreach (size_t i, dchar c; s)
2254         {
2255             if (c == '_' || c == ':' || isLetter(c)) continue;
2256             if (i == 0) fail();
2257             if (c == '-' || c == '.' || isDigit(c)
2258                 || isCombiningChar(c) || isExtender(c)) continue;
2259             n = i;
2260             break;
2261         }
2262         name = s[0 .. n];
2263         s = s[n..$];
2264     }
2265 
2266     void checkAttValue(ref string s) @safe pure // rule 10
2267     {
2268         import std.algorithm.searching : countUntil;
2269         import std.utf : byCodeUnit;
2270 
2271         mixin Check!("AttValue");
2272 
2273         if (s.length == 0) fail();
2274         char c = s[0];
2275         if (c != '\u0022' && c != '\u0027')
2276             fail("attribute value requires quotes");
2277         s = s[1..$];
2278         for (;;)
2279         {
2280             s = s[s.byCodeUnit.countUntil(c) .. $];
2281             if (s.length == 0) fail("unterminated attribute value");
2282             if (s[0] == '<') fail("< found in attribute value");
2283             if (s[0] == c) break;
2284             try { checkReference(s); } catch (Err e) { fail(e); }
2285         }
2286         s = s[1..$];
2287     }
2288 
2289     void checkCharData(ref string s) @safe pure // rule 14
2290     {
2291         import std.algorithm.searching : startsWith;
2292 
2293         mixin Check!("CharData");
2294 
2295         while (s.length != 0)
2296         {
2297             if (s.startsWith("&")) break;
2298             if (s.startsWith("<")) break;
2299             if (s.startsWith("]]>")) fail("]]> found within char data");
2300             s = s[1..$];
2301         }
2302     }
2303 
2304     void checkComment(ref string s) @safe pure // rule 15
2305     {
2306         import std..string : indexOf;
2307 
2308         mixin Check!("Comment");
2309 
2310         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2311         ptrdiff_t n = s.indexOf("--");
2312         if (n == -1) fail("unterminated comment");
2313         s = s[n..$];
2314         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2315     }
2316 
2317     void checkPI(ref string s) @safe pure // rule 16
2318     {
2319         mixin Check!("PI");
2320 
2321         try
2322         {
2323             checkLiteral("<?",s);
2324             checkEnd("?>",s);
2325         }
2326         catch (Err e) { fail(e); }
2327     }
2328 
2329     void checkCDSect(ref string s) @safe pure // rule 18
2330     {
2331         mixin Check!("CDSect");
2332 
2333         try
2334         {
2335             checkLiteral(cdata,s);
2336             checkEnd("]]>",s);
2337         }
2338         catch (Err e) { fail(e); }
2339     }
2340 
2341     void checkProlog(ref string s) @safe pure // rule 22
2342     {
2343         mixin Check!("Prolog");
2344 
2345         try
2346         {
2347             /* The XML declaration is optional
2348              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2349              */
2350             opt!(checkXMLDecl)(s);
2351 
2352             star!(checkMisc)(s);
2353             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2354         }
2355         catch (Err e) { fail(e); }
2356     }
2357 
2358     void checkXMLDecl(ref string s) @safe pure // rule 23
2359     {
2360         mixin Check!("XMLDecl");
2361 
2362         try
2363         {
2364             checkLiteral("<?xml",s);
2365             checkVersionInfo(s);
2366             opt!(checkEncodingDecl)(s);
2367             opt!(checkSDDecl)(s);
2368             opt!(checkSpace)(s);
2369             checkLiteral("?>",s);
2370         }
2371         catch (Err e) { fail(e); }
2372     }
2373 
2374     void checkVersionInfo(ref string s) @safe pure // rule 24
2375     {
2376         mixin Check!("VersionInfo");
2377 
2378         try
2379         {
2380             checkSpace(s);
2381             checkLiteral("version",s);
2382             checkEq(s);
2383             quoted!(checkVersionNum)(s);
2384         }
2385         catch (Err e) { fail(e); }
2386     }
2387 
2388     void checkEq(ref string s) @safe pure // rule 25
2389     {
2390         mixin Check!("Eq");
2391 
2392         try
2393         {
2394             opt!(checkSpace)(s);
2395             checkLiteral("=",s);
2396             opt!(checkSpace)(s);
2397         }
2398         catch (Err e) { fail(e); }
2399     }
2400 
2401     void checkVersionNum(ref string s) @safe pure // rule 26
2402     {
2403         import std.algorithm.searching : countUntil;
2404         import std.utf : byCodeUnit;
2405 
2406         mixin Check!("VersionNum");
2407 
2408         s = s[s.byCodeUnit.countUntil('\"') .. $];
2409         if (s is old) fail();
2410     }
2411 
2412     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2413     {
2414         mixin Check!("DocTypeDecl");
2415 
2416         try
2417         {
2418             checkLiteral("<!DOCTYPE",s);
2419             //
2420             // TO DO -- ensure DOCTYPE is well formed
2421             // (But not yet. That's one of our "future directions")
2422             //
2423             checkEnd(">",s);
2424         }
2425         catch (Err e) { fail(e); }
2426     }
2427 
2428     void checkSDDecl(ref string s) @safe pure // rule 32
2429     {
2430         import std.algorithm.searching : startsWith;
2431 
2432         mixin Check!("SDDecl");
2433 
2434         try
2435         {
2436             checkSpace(s);
2437             checkLiteral("standalone",s);
2438             checkEq(s);
2439         }
2440         catch (Err e) { fail(e); }
2441 
2442         int n = 0;
2443              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2444         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2445         else fail("standalone attribute value must be 'yes', \"yes\","~
2446             " 'no' or \"no\"");
2447         s = s[n..$];
2448     }
2449 
2450     void checkElement(ref string s) @safe pure // rule 39
2451     {
2452         mixin Check!("Element");
2453 
2454         string sname,ename,t;
2455         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2456 
2457         if (t == "STag")
2458         {
2459             try
2460             {
2461                 checkContent(s);
2462                 t = s;
2463                 checkETag(s,ename);
2464             }
2465             catch (Err e) { fail(e); }
2466 
2467             if (sname != ename)
2468             {
2469                 s = t;
2470                 fail("end tag name \"" ~ ename
2471                     ~ "\" differs from start tag name \""~sname~"\"");
2472             }
2473         }
2474     }
2475 
2476     // rules 40 and 44
2477     void checkTag(ref string s, out string type, out string name) @safe pure
2478     {
2479         mixin Check!("Tag");
2480 
2481         try
2482         {
2483             type = "STag";
2484             checkLiteral("<",s);
2485             checkName(s,name);
2486             star!(seq!(checkSpace,checkAttribute))(s);
2487             opt!(checkSpace)(s);
2488             if (s.length != 0 && s[0] == '/')
2489             {
2490                 s = s[1..$];
2491                 type = "ETag";
2492             }
2493             checkLiteral(">",s);
2494         }
2495         catch (Err e) { fail(e); }
2496     }
2497 
2498     void checkAttribute(ref string s) @safe pure // rule 41
2499     {
2500         mixin Check!("Attribute");
2501 
2502         try
2503         {
2504             string name;
2505             checkName(s,name);
2506             checkEq(s);
2507             checkAttValue(s);
2508         }
2509         catch (Err e) { fail(e); }
2510     }
2511 
2512     void checkETag(ref string s, out string name) @safe pure // rule 42
2513     {
2514         mixin Check!("ETag");
2515 
2516         try
2517         {
2518             checkLiteral("</",s);
2519             checkName(s,name);
2520             opt!(checkSpace)(s);
2521             checkLiteral(">",s);
2522         }
2523         catch (Err e) { fail(e); }
2524     }
2525 
2526     void checkContent(ref string s) @safe pure // rule 43
2527     {
2528         import std.algorithm.searching : startsWith;
2529 
2530         mixin Check!("Content");
2531 
2532         try
2533         {
2534             while (s.length != 0)
2535             {
2536                 old = s;
2537                      if (s.startsWith("&"))        { checkReference(s); }
2538                 else if (s.startsWith("<!--"))     { checkComment(s); }
2539                 else if (s.startsWith("<?"))       { checkPI(s); }
2540                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2541                 else if (s.startsWith("</"))       { break; }
2542                 else if (s.startsWith("<"))        { checkElement(s); }
2543                 else                               { checkCharData(s); }
2544             }
2545         }
2546         catch (Err e) { fail(e); }
2547     }
2548 
2549     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2550     {
2551         import std.format : format;
2552 
2553         mixin Check!("CharRef");
2554 
2555         c = 0;
2556         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2557         int radix = 10;
2558         if (s.length != 0 && s[0] == 'x')
2559         {
2560             s = s[1..$];
2561             radix = 16;
2562         }
2563         if (s.length == 0) fail("unterminated character reference");
2564         if (s[0] == ';')
2565             fail("character reference must have at least one digit");
2566         while (s.length != 0)
2567         {
2568             immutable char d = s[0];
2569             int n = 0;
2570             switch (d)
2571             {
2572                 case 'F','f': ++n;      goto case;
2573                 case 'E','e': ++n;      goto case;
2574                 case 'D','d': ++n;      goto case;
2575                 case 'C','c': ++n;      goto case;
2576                 case 'B','b': ++n;      goto case;
2577                 case 'A','a': ++n;      goto case;
2578                 case '9':     ++n;      goto case;
2579                 case '8':     ++n;      goto case;
2580                 case '7':     ++n;      goto case;
2581                 case '6':     ++n;      goto case;
2582                 case '5':     ++n;      goto case;
2583                 case '4':     ++n;      goto case;
2584                 case '3':     ++n;      goto case;
2585                 case '2':     ++n;      goto case;
2586                 case '1':     ++n;      goto case;
2587                 case '0':     break;
2588                 default: n = 100; break;
2589             }
2590             if (n >= radix) break;
2591             c *= radix;
2592             c += n;
2593             s = s[1..$];
2594         }
2595         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2596         if (s.length == 0 || s[0] != ';') fail("expected ;");
2597         else s = s[1..$];
2598     }
2599 
2600     void checkReference(ref string s) @safe pure // rule 67
2601     {
2602         import std.algorithm.searching : startsWith;
2603 
2604         mixin Check!("Reference");
2605 
2606         try
2607         {
2608             dchar c;
2609             if (s.startsWith("&#")) checkCharRef(s,c);
2610             else checkEntityRef(s);
2611         }
2612         catch (Err e) { fail(e); }
2613     }
2614 
2615     void checkEntityRef(ref string s) @safe pure // rule 68
2616     {
2617         mixin Check!("EntityRef");
2618 
2619         try
2620         {
2621             string name;
2622             checkLiteral("&",s);
2623             checkName(s,name);
2624             checkLiteral(";",s);
2625         }
2626         catch (Err e) { fail(e); }
2627     }
2628 
2629     void checkEncName(ref string s) @safe pure // rule 81
2630     {
2631         import std.algorithm.searching : countUntil;
2632         import std.ascii : isAlpha;
2633         import std.utf : byCodeUnit;
2634 
2635         mixin Check!("EncName");
2636 
2637         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2638         if (s is old) fail();
2639         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2640     }
2641 
2642     void checkEncodingDecl(ref string s) @safe pure // rule 80
2643     {
2644         mixin Check!("EncodingDecl");
2645 
2646         try
2647         {
2648             checkSpace(s);
2649             checkLiteral("encoding",s);
2650             checkEq(s);
2651             quoted!(checkEncName)(s);
2652         }
2653         catch (Err e) { fail(e); }
2654     }
2655 
2656     // Helper functions
2657 
2658     void checkLiteral(string literal,ref string s) @safe pure
2659     {
2660         import std..string : startsWith;
2661 
2662         mixin Check!("Literal");
2663 
2664         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2665         s = s[literal.length..$];
2666     }
2667 
2668     void checkEnd(string end,ref string s) @safe pure
2669     {
2670         import std..string : indexOf;
2671         // Deliberately no mixin Check here.
2672 
2673         auto n = s.indexOf(end);
2674         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2675         s = s[n..$];
2676         checkLiteral(end,s);
2677     }
2678 
2679     // Metafunctions -- none of these use mixin Check
2680 
2681     void opt(alias f)(ref string s)
2682     {
2683         try { f(s); } catch (Err e) {}
2684     }
2685 
2686     void plus(alias f)(ref string s)
2687     {
2688         f(s);
2689         star!(f)(s);
2690     }
2691 
2692     void star(alias f)(ref string s)
2693     {
2694         while (s.length != 0)
2695         {
2696             try { f(s); }
2697             catch (Err e) { return; }
2698         }
2699     }
2700 
2701     void quoted(alias f)(ref string s)
2702     {
2703         import std..string : startsWith;
2704 
2705         if (s.startsWith("'"))
2706         {
2707             checkLiteral("'",s);
2708             f(s);
2709             checkLiteral("'",s);
2710         }
2711         else
2712         {
2713             checkLiteral("\"",s);
2714             f(s);
2715             checkLiteral("\"",s);
2716         }
2717     }
2718 
2719     void seq(alias f,alias g)(ref string s)
2720     {
2721         f(s);
2722         g(s);
2723     }
2724 }
2725 
2726 /**
2727  * Check an entire XML document for well-formedness
2728  *
2729  * Params:
2730  *      s = the document to be checked, passed as a string
2731  *
2732  * Throws: CheckException if the document is not well formed
2733  *
2734  * CheckException's toString() method will yield the complete hierarchy of
2735  * parse failure (the XML equivalent of a stack trace), giving the line and
2736  * column number of every failure at every level.
2737  */
2738 void check(string s) @safe pure
2739 {
2740     try
2741     {
2742         checkChars(s);
2743         checkDocument(s);
2744         if (s.length != 0) throw new Err(s,"Junk found after document");
2745     }
2746     catch (Err e)
2747     {
2748         e.complete(s);
2749         throw e;
2750     }
2751 }
2752 
2753 @system pure unittest
2754 {
2755     import std..string : indexOf;
2756 
2757     try
2758     {
2759         check(q"[<?xml version="1.0"?>
2760         <catalog>
2761            <book id="bk101">
2762               <author>Gambardella, Matthew</author>
2763               <title>XML Developer's Guide</title>
2764               <genre>Computer</genre>
2765               <price>44.95</price>
2766               <publish_date>2000-10-01</publish_date>
2767               <description>An in-depth look at creating applications
2768               with XML.</description>
2769            </book>
2770            <book id="bk102">
2771               <author>Ralls, Kim</author>
2772               <title>Midnight Rain</title>
2773               <genre>Fantasy</genres>
2774               <price>5.95</price>
2775               <publish_date>2000-12-16</publish_date>
2776               <description>A former architect battles corporate zombies,
2777               an evil sorceress, and her own childhood to become queen
2778               of the world.</description>
2779            </book>
2780            <book id="bk103">
2781               <author>Corets, Eva</author>
2782               <title>Maeve Ascendant</title>
2783               <genre>Fantasy</genre>
2784               <price>5.95</price>
2785               <publish_date>2000-11-17</publish_date>
2786               <description>After the collapse of a nanotechnology
2787               society in England, the young survivors lay the
2788               foundation for a new society.</description>
2789            </book>
2790         </catalog>
2791         ]");
2792         assert(false);
2793     }
2794     catch (CheckException e)
2795     {
2796         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2797                                       " from start tag name \"genre\"");
2798         assert(n != -1);
2799     }
2800 }
2801 
2802 @system unittest
2803 {
2804     string s = q"EOS
2805 <?xml version="1.0"?>
2806 <set>
2807     <one>A</one>
2808     <!-- comment -->
2809     <two>B</two>
2810 </set>
2811 EOS";
2812     try
2813     {
2814         check(s);
2815     }
2816     catch (CheckException e)
2817     {
2818         assert(0, e.toString());
2819     }
2820 }
2821 
2822 @system unittest
2823 {
2824     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2825                         xmlns:stream="http://etherx.'jabber'.org/streams"
2826                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2827                         xml:lang="en" version="1.0" attr='a"b"c'>
2828                         </stream:stream></r>`;
2829 
2830     DocumentParser parser = new DocumentParser(test_xml);
2831     bool tested = false;
2832     parser.onStartTag["stream:stream"] = (ElementParser p) {
2833         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2834         assert(p.tag.attr["from"] == "jid.pl");
2835         assert(p.tag.attr["attr"] == "a\"b\"c");
2836         tested = true;
2837     };
2838     parser.parse();
2839     assert(tested);
2840 }
2841 
2842 @system unittest
2843 {
2844     string s = q"EOS
2845 <?xml version="1.0" encoding="utf-8"?> <Tests>
2846     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2847 </Tests>
2848 EOS";
2849     auto xml = new DocumentParser(s);
2850 
2851     xml.onStartTag["Test"] = (ElementParser xml) {
2852         assert(xml.tag.attr["thing"] == "What & Up");
2853     };
2854 
2855     xml.onEndTag["Test"] = (in Element e) {
2856         assert(e.text() == "What & Up Second");
2857     };
2858     xml.parse();
2859 }
2860 
2861 @system unittest
2862 {
2863     string s = `<tag attr="&quot;value&gt;" />`;
2864     auto doc = new Document(s);
2865     assert(doc.toString() == s);
2866 }
2867 
2868 /** The base class for exceptions thrown by this module */
2869 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2870 
2871 // Other exceptions
2872 
2873 /// Thrown during Comment constructor
2874 class CommentException : XMLException
2875 { private this(string msg) @safe pure { super(msg); } }
2876 
2877 /// Thrown during CData constructor
2878 class CDataException : XMLException
2879 { private this(string msg) @safe pure { super(msg); } }
2880 
2881 /// Thrown during XMLInstruction constructor
2882 class XIException : XMLException
2883 { private this(string msg) @safe pure { super(msg); } }
2884 
2885 /// Thrown during ProcessingInstruction constructor
2886 class PIException : XMLException
2887 { private this(string msg) @safe pure { super(msg); } }
2888 
2889 /// Thrown during Text constructor
2890 class TextException : XMLException
2891 { private this(string msg) @safe pure { super(msg); } }
2892 
2893 /// Thrown during decode()
2894 class DecodeException : XMLException
2895 { private this(string msg) @safe pure { super(msg); } }
2896 
2897 /// Thrown if comparing with wrong type
2898 class InvalidTypeException : XMLException
2899 { private this(string msg) @safe pure { super(msg); } }
2900 
2901 /// Thrown when parsing for Tags
2902 class TagException : XMLException
2903 { private this(string msg) @safe pure { super(msg); } }
2904 
2905 /**
2906  * Thrown during check()
2907  */
2908 class CheckException : XMLException
2909 {
2910     CheckException err; /// Parent in hierarchy
2911     private string tail;
2912     /**
2913      * Name of production rule which failed to parse,
2914      * or specific error message
2915      */
2916     string msg;
2917     size_t line = 0; /// Line number at which parse failure occurred
2918     size_t column = 0; /// Column number at which parse failure occurred
2919 
2920     private this(string tail,string msg,Err err=null) @safe pure
2921     {
2922         super(null);
2923         this.tail = tail;
2924         this.msg = msg;
2925         this.err = err;
2926     }
2927 
2928     private void complete(string entire) @safe pure
2929     {
2930         import std..string : count, lastIndexOf;
2931         import std.utf : toUTF32;
2932 
2933         string head = entire[0..$-tail.length];
2934         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2935         line = head.count("\n") + 1;
2936         dstring t = toUTF32(head[n..$]);
2937         column = t.length + 1;
2938         if (err !is null) err.complete(entire);
2939     }
2940 
2941     override string toString() const @safe pure
2942     {
2943         import std.format : format;
2944 
2945         string s;
2946         if (line != 0) s = format("Line %d, column %d: ",line,column);
2947         s ~= msg;
2948         s ~= '\n';
2949         if (err !is null) s = err.toString() ~ s;
2950         return s;
2951     }
2952 }
2953 
2954 private alias Err = CheckException;
2955 
2956 // Private helper functions
2957 
2958 private
2959 {
2960     inout(T) toType(T)(inout Object o)
2961     {
2962         T t = cast(T)(o);
2963         if (t is null)
2964         {
2965             throw new InvalidTypeException("Attempt to compare a "
2966                 ~ T.stringof ~ " with an instance of another type");
2967         }
2968         return t;
2969     }
2970 
2971     string chop(ref string s, size_t n) @safe pure nothrow
2972     {
2973         if (n == -1) n = s.length;
2974         string t = s[0 .. n];
2975         s = s[n..$];
2976         return t;
2977     }
2978 
2979     bool optc(ref string s, char c) @safe pure nothrow
2980     {
2981         immutable bool b = s.length != 0 && s[0] == c;
2982         if (b) s = s[1..$];
2983         return b;
2984     }
2985 
2986     void reqc(ref string s, char c) @safe pure
2987     {
2988         if (s.length == 0 || s[0] != c) throw new TagException("");
2989         s = s[1..$];
2990     }
2991 
2992     char requireOneOf(ref string s, string chars) @safe pure
2993     {
2994         import std..string : indexOf;
2995 
2996         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2997             throw new TagException("");
2998         immutable char ch = s[0];
2999         s = s[1..$];
3000         return ch;
3001     }
3002 
3003     alias hash = .hashOf;
3004 
3005     // Definitions from the XML specification
3006     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3007         0x10000,0x10FFFF];
3008     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3009         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3010         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3011         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3012         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3013         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3014         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3015         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3016         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3017         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3018         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3019         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3020         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3021         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3022         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3023         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3024         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3025         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3026         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3027         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3028         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3029         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3030         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3031         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3032         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3033         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3034         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3035         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3036         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3037         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3038         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3039         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3040         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3041         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3042         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3043         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3044         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3045         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3046         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3047         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3048         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3049     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3050     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3051         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3052         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3053         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3054         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3055         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3056         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3057         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3058         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3059         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3060         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3061         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3062         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3063         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3064         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3065         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3066         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3067         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3068         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3069         0x3099,0x3099,0x309A,0x309A];
3070     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3071         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3072         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3073         0x0ED9,0x0F20,0x0F29];
3074     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3075         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3076         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3077 
3078     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3079     {
3080         while (table.length != 0)
3081         {
3082             auto m = (table.length >> 1) & ~1;
3083             if (c < table[m])
3084             {
3085                 table = table[0 .. m];
3086             }
3087             else if (c > table[m+1])
3088             {
3089                 table = table[m+2..$];
3090             }
3091             else return true;
3092         }
3093         return false;
3094     }
3095 
3096     string startOf(string s) @safe nothrow pure
3097     {
3098         string r;
3099         foreach (char c;s)
3100         {
3101             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3102             if (r.length >= 40) { r ~= "___"; break; }
3103         }
3104         return r;
3105     }
3106 
3107     void exit(string s=null)
3108     {
3109         throw new XMLException(s);
3110     }
3111 }