std.regex source code

1 /++
2   $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3   are a commonly used method of pattern matching
4   on strings, with $(I regex) being a catchy word for a pattern in this domain
5   specific language. Typical problems usually solved by regular expressions
6   include validation of user input and the ubiquitous find $(AMP) replace
7   in text processing utilities.
8 
9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(DIVC quickindex,
11 $(BOOKTABLE,
12 $(TR $(TH Category) $(TH Functions))
13 $(TR $(TD Matching) $(TD
14         $(LREF bmatch)
15         $(LREF match)
16         $(LREF matchAll)
17         $(LREF matchFirst)
18 ))
19 $(TR $(TD Building) $(TD
20         $(LREF ctRegex)
21         $(LREF escaper)
22         $(LREF regex)
23 ))
24 $(TR $(TD Replace) $(TD
25         $(LREF replace)
26         $(LREF replaceAll)
27         $(LREF replaceAllInto)
28         $(LREF replaceFirst)
29         $(LREF replaceFirstInto)
30 ))
31 $(TR $(TD Split) $(TD
32         $(LREF split)
33         $(LREF splitter)
34 ))
35 $(TR $(TD Objects) $(TD
36         $(LREF Captures)
37         $(LREF Regex)
38         $(LREF RegexException)
39         $(LREF RegexMatch)
40         $(LREF Splitter)
41         $(LREF StaticRegex)
42 ))
43 ))
44 
45   $(SECTION Synopsis)
46   ---
47   import std.regex;
48   import std.stdio;
49   void main()
50   {
51       // Print out all possible dd/mm/yy(yy) dates found in user input.
52       auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
53       foreach (line; stdin.byLine)
54       {
55         // matchAll() returns a range that can be iterated
56         // to get all subsequent matches.
57         foreach (c; matchAll(line, r))
58             writeln(c.hit);
59       }
60   }
61   ...
62 
63   // Create a static regex at compile-time, which contains fast native code.
64   auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
65 
66   // It works just like a normal regex:
67   auto c2 = matchFirst("foo/bar", ctr);   // First match found here, if any
68   assert(!c2.empty);   // Be sure to check if there is a match before examining contents!
69   assert(c2[1] == "bar");   // Captures is a range of submatches: 0 = full match.
70 
71   ...
72   // multi-pattern regex
73   auto multi = regex([`\d+,\d+`,`(a-z]+):(\d+)`]);
74   auto m = "abc:43 12,34".matchAll(multi);
75   assert(m.front.whichPattern == 2);
76   assert(m.front[1] == "abc");
77   assert(m.front[2] == "43");
78   m.popFront();
79   assert(m.front.whichPattern == 1);
80   assert(m.front[1] == "12");
81   ...
82 
83   // The result of the `matchAll/matchFirst` is directly testable with if/assert/while.
84   // e.g. test if a string consists of letters:
85   assert(matchFirst("Letter", `^\p{L}+$`));
86   ---
87 
88   $(SECTION Syntax and general information)
89   The general usage guideline is to keep regex complexity on the side of simplicity,
90   as its capabilities reside in purely character-level manipulation.
91   As such it's ill-suited for tasks involving higher level invariants
92   like matching an integer number $(U bounded) in an [a,b] interval.
93   Checks of this sort of are better addressed by additional post-processing.
94 
95   The basic syntax shouldn't surprise experienced users of regular expressions.
96   For an introduction to `std.regex` see a
97   $(HTTP dlang.org/regular-expression.html, short tour) of the module API
98   and its abilities.
99 
100   There are other web resources on regular expressions to help newcomers,
101   and a good $(HTTP www.regular-expressions.info, reference with tutorial)
102   can easily be found.
103 
104   This library uses a remarkably common ECMAScript syntax flavor
105   with the following extensions:
106   $(UL
107     $(LI Named subexpressions, with Python syntax. )
108     $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
109     $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
110   )
111 
112   $(REG_START Pattern syntax )
113   $(I std.regex operates on codepoint level,
114     'character' in this table denotes a single Unicode codepoint.)
115   $(REG_TABLE
116     $(REG_TITLE Pattern element, Semantics )
117     $(REG_TITLE Atoms, Match single characters )
118     $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
119     $(REG_ROW ., In single line mode matches any character.
120       Otherwise it matches any character except '\n' and '\r'. )
121     $(REG_ROW [class], Matches a single character
122       that belongs to this character class. )
123     $(REG_ROW [^class], Matches a single character that
124       does $(U not) belong to this character class.)
125     $(REG_ROW \cC, Matches the control character corresponding to letter C)
126     $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
127     $(REG_ROW \uXXXX, Matches a character  with hexadecimal value of XXXX. )
128     $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
129     $(REG_ROW \f, Matches a formfeed character. )
130     $(REG_ROW \n, Matches a linefeed character. )
131     $(REG_ROW \r, Matches a carriage return character. )
132     $(REG_ROW \t, Matches a tab character. )
133     $(REG_ROW \v, Matches a vertical tab character. )
134     $(REG_ROW \d, Matches any Unicode digit. )
135     $(REG_ROW \D, Matches any character except Unicode digits. )
136     $(REG_ROW \w, Matches any word character (note: this includes numbers).)
137     $(REG_ROW \W, Matches any non-word character.)
138     $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
139     $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
140     $(REG_ROW \\\\, Matches \ character. )
141     $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
142     $(REG_ROW \p{PropertyName}, Matches a character that belongs
143         to the Unicode PropertyName set.
144       Single letter abbreviations can be used without surrounding {,}. )
145     $(REG_ROW  \P{PropertyName}, Matches a character that does not belong
146         to the Unicode PropertyName set.
147       Single letter abbreviations can be used without surrounding {,}. )
148     $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
149           the BasicLatin Unicode $(U block).)
150     $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
151           the BasicLatin Unicode $(U block).)
152     $(REG_ROW \p{Cyrillic}, Matches any character that is part of
153         Cyrillic $(U script).)
154     $(REG_ROW \P{Cyrillic}, Matches any character except ones in
155         Cyrillic $(U script).)
156     $(REG_TITLE Quantifiers, Specify repetition of other elements)
157     $(REG_ROW *, Matches previous character/subexpression 0 or more times.
158       Greedy version - tries as many times as possible.)
159     $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
160       Lazy version  - stops as early as possible.)
161     $(REG_ROW +, Matches previous character/subexpression 1 or more times.
162       Greedy version - tries as many times as possible.)
163     $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
164       Lazy version  - stops as early as possible.)
165     $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
166     $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
167       Greedy version - tries as many times as possible. )
168     $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
169       Lazy version - stops as early as possible.)
170     $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
171       Greedy version - tries as many times as possible, but no more than m times. )
172     $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
173       Lazy version - stops as early as possible, but no less then n times.)
174     $(REG_TITLE Other, Subexpressions $(AMP) alternations )
175     $(REG_ROW (regex),  Matches subexpression regex,
176       saving matched portion of text for later retrieval. )
177     $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
178     $(REG_ROW (?:regex), Matches subexpression regex,
179       $(U not) saving matched portion of text. Useful to speed up matching. )
180     $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
181     $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
182         regex labeling it with name 'name'.
183         When referring to a matched portion of text,
184         names work like aliases in addition to direct numbers.
185      )
186     $(REG_TITLE Assertions, Match position rather than character )
187     $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).)
188     $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
189     $(REG_ROW \b, Matches at word boundary. )
190     $(REG_ROW \B, Matches when $(U not) at word boundary. )
191     $(REG_ROW (?=regex), Zero-width lookahead assertion.
192         Matches at a point where the subexpression
193         regex could be matched starting from the current position.
194       )
195     $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
196         Matches at a point where the subexpression
197         regex could $(U not) be matched starting from the current position.
198       )
199     $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
200         where the subexpression regex could be matched ending
201         at the current position (matching goes backwards).
202       )
203     $(REG_ROW  (?<!regex), Zero-width negative lookbehind assertion.
204       Matches at a point where the subexpression regex could $(U not)
205       be matched ending at the current position (matching goes backwards).
206      )
207   )
208 
209   $(REG_START Character classes )
210   $(REG_TABLE
211     $(REG_TITLE Pattern element, Semantics )
212     $(REG_ROW Any atom, Has the same meaning as outside of a character class.)
213     $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
214     $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
215      Where a, b are arbitrary classes, means union, set difference,
216      symmetric set difference, and intersection respectively.
217      $(I Any sequence of character class elements implicitly forms a union.) )
218   )
219 
220   $(REG_START Regex flags )
221   $(REG_TABLE
222     $(REG_TITLE Flag, Semantics )
223     $(REG_ROW g, Global regex, repeat over the whole input. )
224     $(REG_ROW i, Case insensitive matching. )
225     $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
226        as well as start and end of input.)
227     $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
228     $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
229       useful for formatting complex regular expressions. )
230   )
231 
232   $(SECTION Unicode support)
233 
234   This library provides full Level 1 support* according to
235     $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
236   $(UL
237     $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
238     $(LI 1.2 Unicode properties.)
239     $(LI 1.3 Character classes with set operations.)
240     $(LI 1.4 Word boundaries use the full set of "word" characters.)
241     $(LI 1.5 Using simple casefolding to match case
242         insensitively across the full range of codepoints.)
243     $(LI 1.6 Respecting line breaks as any of
244         \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
245     $(LI 1.7 Operating on codepoint level.)
246   )
247   *With exception of point 1.1.1, as of yet, normalization of input
248     is expected to be enforced by user.
249 
250     $(SECTION Replace format string)
251 
252     A set of functions in this module that do the substitution rely
253     on a simple format to guide the process. In particular the table below
254     applies to the `format` argument of
255     $(LREF replaceFirst) and $(LREF replaceAll).
256 
257     The format string can reference parts of match using the following notation.
258     $(REG_TABLE
259         $(REG_TITLE Format specifier, Replaced by )
260         $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
261         $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
262         $(REG_ROW $', part of input $(I following) the match. )
263         $(REG_ROW $$, '$' character. )
264         $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
265         $(REG_ROW \\\\, '\\' character. )
266         $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
267     )
268 
269   $(SECTION Slicing and zero memory allocations orientation)
270 
271   All matches returned by pattern matching functionality in this library
272     are slices of the original input. The notable exception is the `replace`
273     family of functions  that generate a new string from the input.
274 
275     In cases where producing the replacement is the ultimate goal
276     $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
277     as functions that  avoid allocations even for replacement.
278 
279     Copyright: Copyright Dmitry Olshansky, 2011-
280 
281   License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
282 
283   Authors: Dmitry Olshansky,
284 
285     API and utility constructs are modeled after the original `std.regex`
286   by Walter Bright and Andrei Alexandrescu.
287 
288   Source: $(PHOBOSSRC std/regex/package.d)
289 
290 Macros:
291     REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
292     REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
293     REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
294     REG_START = <h3><div align="center"> $0 </div></h3>
295     SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
296     S_LINK = <a href="#$1">$+</a>
297  +/
298 module std.regex;
299 
300 import std.range.primitives, std.traits;
301 import std.regex.internal.ir;
302 import std.typecons : Flag, Yes, No;
303 
304 /++
305     `Regex` object holds regular expression pattern in compiled form.
306 
307     Instances of this object are constructed via calls to `regex`.
308     This is an intended form for caching and storage of frequently
309     used regular expressions.
310 
311     Example:
312 
313     Test if this object doesn't contain any compiled pattern.
314     ---
315     Regex!char r;
316     assert(r.empty);
317     r = regex(""); // Note: "" is a valid regex pattern.
318     assert(!r.empty);
319     ---
320 
321     Getting a range of all the named captures in the regex.
322     ----
323     import std.range;
324     import std.algorithm;
325 
326     auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
327     auto nc = re.namedCaptures;
328     static assert(isRandomAccessRange!(typeof(nc)));
329     assert(!nc.empty);
330     assert(nc.length == 2);
331     assert(nc.equal(["name", "var"]));
332     assert(nc[0] == "name");
333     assert(nc[1..$].equal(["var"]));
334     ----
335 +/
336 public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
337 
338 /++
339     A `StaticRegex` is `Regex` object that contains D code specially
340     generated at compile-time to speed up matching.
341 
342     No longer used, kept as alias to Regex for backwards compatibility.
343 +/
344 public alias StaticRegex = Regex;
345 
346 /++
347     Compile regular expression pattern for the later execution.
348     Returns: `Regex` object that works on inputs having
349     the same character width as `pattern`.
350 
351     Params:
352     pattern = A single regular expression to match.
353     patterns = An array of regular expression strings.
354         The resulting `Regex` object will match any expression;
355         use $(LREF whichPattern) to know which.
356     flags = The _attributes (g, i, m, s and x accepted)
357 
358     Throws: `RegexException` if there were any errors during compilation.
359 +/
360 @trusted public auto regex(S : C[], C)(const S[] patterns, const(char)[] flags="")
361 if (isSomeString!(S))
362 {
363     import std.array : appender;
364     import std.functional : memoize;
365     enum cacheSize = 8; //TODO: invent nice interface to control regex caching
366     const(C)[] pat;
367     if (patterns.length > 1)
368     {
369         auto app = appender!S();
370         foreach (i, p; patterns)
371         {
372             if (i != 0)
373                 app.put("|");
374             app.put("(?:");
375             app.put(patterns[i]);
376             // terminator for the pattern
377             // to detect if the pattern unexpectedly ends
378             app.put("\\");
379             app.put(cast(dchar)(privateUseStart+i));
380             app.put(")");
381             // another one to return correct whichPattern
382             // for all of potential alternatives in the patterns[i]
383             app.put("\\");
384             app.put(cast(dchar)(privateUseStart+i));
385         }
386         pat = app.data;
387     }
388     else
389         pat = patterns[0];
390 
391     if (__ctfe)
392         return regexImpl(pat, flags);
393     return memoize!(regexImpl!S, cacheSize)(pat, flags);
394 }
395 
396 ///ditto
397 @trusted public auto regex(S)(S pattern, const(char)[] flags="")
398 if (isSomeString!(S))
399 {
400     return regex([pattern], flags);
401 }
402 
403 ///
404 @system unittest
405 {
406     void test(S)()
407     {
408         // multi-pattern regex example
409         S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
410         auto multi = regex(arr); // multi regex
411         S str = "abc:43 12,34";
412         auto m = str.matchAll(multi);
413         assert(m.front.whichPattern == 1);
414         assert(m.front[1] == "abc");
415         assert(m.front[2] == "43");
416         m.popFront();
417         assert(m.front.whichPattern == 2);
418         assert(m.front[1] == "12");
419     }
420 
421     import std.meta : AliasSeq;
422     static foreach (C; AliasSeq!(string, wstring, dstring))
423         // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301
424         static foreach (S; AliasSeq!(C, const C, immutable C))
425             test!S();
426 }
427 
428 @system unittest
429 {
430     import std.conv : to;
431     import std..string : indexOf;
432 
433     immutable pattern = "s+";
434     auto regexString = to!string(regex(pattern, "U"));
435     assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
436     assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
437     assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
438 }
439 
440 public auto regexImpl(S)(const S pattern, const(char)[] flags="")
441 if (isSomeString!(typeof(pattern)))
442 {
443     import std.regex.internal.parser : Parser, CodeGen;
444     auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
445     auto r = parser.program;
446     return r;
447 }
448 
449 
450 private struct CTRegexWrapper(Char)
451 {
452     private immutable(Regex!Char)* re;
453 
454     // allow code that expects mutable Regex to still work
455     // we stay "logically const"
456     @property @trusted ref getRe() const { return *cast(Regex!Char*) re; }
457     alias getRe this;
458 }
459 
460 template ctRegexImpl(alias pattern, string flags=[])
461 {
462     import std.regex.internal.backtracking, std.regex.internal.parser;
463     static immutable r = cast(immutable) regex(pattern, flags);
464     alias Char = BasicElementOf!(typeof(pattern));
465     enum source = ctGenRegExCode(r);
466     @trusted bool func(BacktrackingMatcher!Char matcher)
467     {
468         debug(std_regex_ctr) pragma(msg, source);
469         cast(void) matcher;
470         mixin(source);
471     }
472     static immutable staticRe =
473         cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
474     enum wrapper = CTRegexWrapper!Char(&staticRe);
475 }
476 
477 @safe unittest
478 {
479     // test compat for logical const workaround
480     static void test(StaticRegex!char)
481     {
482     }
483     enum re = ctRegex!``;
484     test(re);
485 }
486 
487 @safe unittest
488 {
489     auto re = ctRegex!`foo`;
490     assert(matchFirst("foo", re));
491 
492     // test reassignment
493     re = ctRegex!`bar`;
494     assert(matchFirst("bar", re));
495     assert(!matchFirst("bar", ctRegex!`foo`));
496 }
497 
498 /++
499     Compile regular expression using CTFE
500     and generate optimized native machine code for matching it.
501 
502     Returns: StaticRegex object for faster matching.
503 
504     Params:
505     pattern = Regular expression
506     flags = The _attributes (g, i, m, s and x accepted)
507 +/
508 public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
509 
510 enum isRegexFor(RegEx, R) = is(immutable RegEx == immutable Regex!(BasicElementOf!R))
511      || is(RegEx : const(Regex!(BasicElementOf!R)))
512      || is(immutable RegEx == immutable StaticRegex!(BasicElementOf!R));
513 
514 
515 /++
516     `Captures` object contains submatches captured during a call
517     to `match` or iteration over `RegexMatch` range.
518 
519     First element of range is the whole match.
520 +/
521 @trusted public struct Captures(R)
522 if (isSomeString!R)
523 {//@trusted because of union inside
524     alias DataIndex = size_t;
525     alias String = R;
526     alias Store = SmallFixedArray!(Group!DataIndex, 3);
527 private:
528     import std.conv : text;
529     Store matches;
530     const(NamedGroup)[] _names;
531     R _input;
532     int _nMatch;
533     uint _f, _b;
534 
535     this(R input, uint n, const(NamedGroup)[] named)
536     {
537         _input = input;
538         _names = named;
539         matches = Store(n);
540         _b = n;
541         _f = 0;
542     }
543 
544     this(ref RegexMatch!R rmatch)
545     {
546         _input = rmatch._input;
547         _names = rmatch._engine.pattern.dict;
548         immutable n = rmatch._engine.pattern.ngroup;
549         matches = Store(n);
550         _b = n;
551         _f = 0;
552     }
553 
554     inout(R) getMatch(size_t index) inout
555     {
556         auto m = &matches[index];
557         return *m ? _input[m.begin .. m.end] : null;
558     }
559 
560 public:
561     ///Slice of input prior to the match.
562     @property R pre()
563     {
564         return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
565     }
566 
567     ///Slice of input immediately after the match.
568     @property R post()
569     {
570         return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
571     }
572 
573     ///Slice of matched portion of input.
574     @property R hit()
575     {
576         assert(_nMatch, "attempted to get hit of an empty match");
577         return _input[matches[0].begin .. matches[0].end];
578     }
579 
580     ///Range interface.
581     @property R front()
582     {
583         assert(_nMatch, "attempted to get front of an empty match");
584         return getMatch(_f);
585     }
586 
587     ///ditto
588     @property R back()
589     {
590         assert(_nMatch, "attempted to get back of an empty match");
591         return getMatch(_b - 1);
592     }
593 
594     ///ditto
595     void popFront()
596     {
597         assert(!empty);
598         ++_f;
599     }
600 
601     ///ditto
602     void popBack()
603     {
604         assert(!empty);
605         --_b;
606     }
607 
608     ///ditto
609     @property bool empty() const { return _nMatch == 0 || _f >= _b; }
610 
611     ///ditto
612     inout(R) opIndex()(size_t i) inout
613     {
614         assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
615         return getMatch(_f + i);
616     }
617 
618     /++
619         Explicit cast to bool.
620         Useful as a shorthand for !(x.empty) in if and assert statements.
621 
622         ---
623         import std.regex;
624 
625         assert(!matchFirst("nothing", "something"));
626         ---
627     +/
628 
629     @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
630 
631     /++
632         Number of pattern matched counting, where 1 - the first pattern.
633         Returns 0 on no match.
634     +/
635 
636     @safe @property int whichPattern() const nothrow { return _nMatch; }
637 
638     ///
639     @system unittest
640     {
641         import std.regex;
642         assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
643     }
644 
645     /++
646         Lookup named submatch.
647 
648         ---
649         import std.regex;
650         import std.range;
651 
652         auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
653         assert(c["var"] == "a");
654         assert(c["value"] == "42");
655         popFrontN(c, 2);
656         //named groups are unaffected by range primitives
657         assert(c["var"] =="a");
658         assert(c.front == "42");
659         ----
660     +/
661     R opIndex(String)(String i) /*const*/ //@@@BUG@@@
662         if (isSomeString!String)
663     {
664         size_t index = lookupNamedGroup(_names, i);
665         return getMatch(index);
666     }
667 
668     ///Number of matches in this object.
669     @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f;  }
670 
671     ///A hook for compatibility with original std.regex.
672     @property ref captures(){ return this; }
673 }
674 
675 ///
676 @system unittest
677 {
678     import std.range.primitives : popFrontN;
679 
680     auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
681     assert(c.pre == "@"); // Part of input preceding match
682     assert(c.post == "#"); // Immediately after match
683     assert(c.hit == c[0] && c.hit == "abc"); // The whole match
684     assert(c[2] == "b");
685     assert(c.front == "abc");
686     c.popFront();
687     assert(c.front == "a");
688     assert(c.back == "c");
689     c.popBack();
690     assert(c.back == "b");
691     popFrontN(c, 2);
692     assert(c.empty);
693 
694     assert(!matchFirst("nothing", "something"));
695 
696     // Captures that are not matched will be null.
697     c = matchFirst("ac", regex(`a(b)?c`));
698     assert(c);
699     assert(!c[1]);
700 }
701 
702 @system unittest
703 {
704     Captures!string c;
705     string s = "abc";
706     assert(cast(bool)(c = matchFirst(s, regex("d")))
707         || cast(bool)(c = matchFirst(s, regex("a"))));
708 }
709 
710 // https://issues.dlang.org/show_bug.cgi?id=19979
711 @system unittest
712 {
713     auto c = matchFirst("bad", regex(`(^)(not )?bad($)`));
714     assert(c[0] && c[0].length == "bad".length);
715     assert(c[1] && !c[1].length);
716     assert(!c[2]);
717     assert(c[3] && !c[3].length);
718 }
719 
720 /++
721     A regex engine state, as returned by `match` family of functions.
722 
723     Effectively it's a forward range of Captures!R, produced
724     by lazily searching for matches in a given input.
725 +/
726 @trusted public struct RegexMatch(R)
727 if (isSomeString!R)
728 {
729     import std.typecons : Rebindable;
730 private:
731     alias Char = BasicElementOf!R;
732     Matcher!Char _engine;
733     Rebindable!(const MatcherFactory!Char) _factory;
734     R _input;
735     Captures!R _captures;
736 
737     this(RegEx)(R input, RegEx prog)
738     {
739         import std.exception : enforce;
740         _input = input;
741         if (prog.factory is null) _factory = defaultFactory!Char(prog);
742         else _factory = prog.factory;
743         _engine = _factory.create(prog, input);
744         assert(_engine.refCount == 1);
745         _captures = Captures!R(this);
746         _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
747     }
748 
749 public:
750     this(this)
751     {
752         if (_engine) _factory.incRef(_engine);
753     }
754 
755     ~this()
756     {
757         if (_engine) _factory.decRef(_engine);
758     }
759 
760     ///Shorthands for front.pre, front.post, front.hit.
761     @property R pre()
762     {
763         return _captures.pre;
764     }
765 
766     ///ditto
767     @property R post()
768     {
769         return _captures.post;
770     }
771 
772     ///ditto
773     @property R hit()
774     {
775         return _captures.hit;
776     }
777 
778     /++
779         Functionality for processing subsequent matches of global regexes via range interface:
780         ---
781         import std.regex;
782         auto m = matchAll("Hello, world!", regex(`\w+`));
783         assert(m.front.hit == "Hello");
784         m.popFront();
785         assert(m.front.hit == "world");
786         m.popFront();
787         assert(m.empty);
788         ---
789     +/
790     @property inout(Captures!R) front() inout
791     {
792         return _captures;
793     }
794 
795     ///ditto
796     void popFront()
797     {
798         import std.exception : enforce;
799         // CoW - if refCount is not 1, we are aliased by somebody else
800         if (_engine.refCount != 1)
801         {
802             // we create a new engine & abandon this reference
803             auto old = _engine;
804             _engine = _factory.dup(old, _input);
805             _factory.decRef(old);
806         }
807         _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
808     }
809 
810     ///ditto
811     auto save(){ return this; }
812 
813     ///Test if this match object is empty.
814     @property bool empty() const { return _captures._nMatch == 0; }
815 
816     ///Same as !(x.empty), provided for its convenience  in conditional statements.
817     T opCast(T:bool)(){ return !empty; }
818 
819     /// Same as .front, provided for compatibility with original std.regex.
820     @property inout(Captures!R) captures() inout { return _captures; }
821 }
822 
823 private @trusted auto matchOnce(RegEx, R)(R input, const auto ref RegEx prog)
824 {
825     alias Char = BasicElementOf!R;
826     static struct Key
827     {
828         immutable(Char)[] pattern;
829         uint flags;
830     }
831     static Key cacheKey = Key("", -1);
832     static Matcher!Char cache;
833     auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
834     auto key = Key(prog.pattern, prog.flags);
835     Matcher!Char engine;
836     if (cacheKey == key)
837     {
838         engine = cache;
839         engine.rearm(input);
840     }
841     else
842     {
843         engine = factory.create(prog, input);
844         if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one
845         cache = engine;
846         cacheKey = key;
847     }
848     auto captures = Captures!R(input, prog.ngroup, prog.dict);
849     captures.matches.mutate((slice){ captures._nMatch = engine.match(slice); });
850     return captures;
851 }
852 
853 private auto matchMany(RegEx, R)(R input, auto ref RegEx re) @safe
854 {
855     return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
856 }
857 
858 @system unittest
859 {
860     //sanity checks for new API
861     auto re = regex("abc");
862     assert(!"abc".matchOnce(re).empty);
863     assert("abc".matchOnce(re)[0] == "abc");
864 }
865 
866 // https://issues.dlang.org/show_bug.cgi?id=18135
867 @system unittest
868 {
869     static struct MapResult { RegexMatch!string m; }
870     MapResult m;
871     m = MapResult();
872     assert(m == m);
873 }
874 
875 private enum isReplaceFunctor(alias fun, R) =
876     __traits(compiles, (Captures!R c) { fun(c); });
877 
878 // the lowest level - just stuff replacements into the sink
879 private @trusted void replaceCapturesInto(alias output, Sink, R, T)
880         (ref Sink sink, R input, T captures)
881 if (isOutputRange!(Sink, dchar) && isSomeString!R)
882 {
883     if (captures.empty)
884     {
885         sink.put(input);
886         return;
887     }
888     sink.put(captures.pre);
889     // a hack to get around bogus errors, should be simply output(captures, sink)
890     // "is a nested function and cannot be accessed from"
891     static if (isReplaceFunctor!(output, R))
892         sink.put(output(captures)); //"mutator" type of function
893     else
894         output(captures, sink); //"output" type of function
895     sink.put(captures.post);
896 }
897 
898 // ditto for a range of captures
899 private void replaceMatchesInto(alias output, Sink, R, T)
900         (ref Sink sink, R input, T matches)
901 if (isOutputRange!(Sink, dchar) && isSomeString!R)
902 {
903     size_t offset = 0;
904     foreach (cap; matches)
905     {
906         sink.put(cap.pre[offset .. $]);
907         // same hack, see replaceCapturesInto
908         static if (isReplaceFunctor!(output, R))
909             sink.put(output(cap)); //"mutator" type of function
910         else
911             output(cap, sink); //"output" type of function
912         offset = cap.pre.length + cap.hit.length;
913     }
914     sink.put(input[offset .. $]);
915 }
916 
917 //  a general skeleton of replaceFirst
918 private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
919 if (isSomeString!R && isRegexFor!(RegEx, R))
920 {
921     import std.array : appender;
922     auto data = matchFirst(input, re);
923     if (data.empty)
924         return input;
925     auto app = appender!(R)();
926     replaceCapturesInto!output(app, input, data);
927     return app.data;
928 }
929 
930 // ditto for replaceAll
931 // the method parameter allows old API to ride on the back of the new one
932 private R replaceAllWith(alias output,
933         alias method=matchAll, R, RegEx)(R input, RegEx re)
934 if (isSomeString!R && isRegexFor!(RegEx, R))
935 {
936     import std.array : appender;
937     auto matches = method(input, re); //inout(C)[] fails
938     if (matches.empty)
939         return input;
940     auto app = appender!(R)();
941     replaceMatchesInto!output(app, input, matches);
942     return app.data;
943 }
944 
945 
946 /++
947     Start matching `input` to regex pattern `re`,
948     using Thompson NFA matching scheme.
949 
950     The use of this function is $(RED discouraged) - use either of
951     $(LREF matchAll) or $(LREF matchFirst).
952 
953     Delegating  the kind of operation
954     to "g" flag is soon to be phased out along with the
955     ability to choose the exact matching scheme. The choice of
956     matching scheme to use depends highly on the pattern kind and
957     can done automatically on case by case basis.
958 
959     Returns: a `RegexMatch` object holding engine state after first match.
960 +/
961 
962 public auto match(R, RegEx)(R input, RegEx re)
963 if (isSomeString!R && isRegexFor!(RegEx,R))
964 {
965     return RegexMatch!(Unqual!(typeof(input)))(input, re);
966 }
967 
968 ///ditto
969 public auto match(R, String)(R input, String re)
970 if (isSomeString!R && isSomeString!String)
971 {
972     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
973 }
974 
975 /++
976     Find the first (leftmost) slice of the `input` that
977     matches the pattern `re`. This function picks the most suitable
978     regular expression engine depending on the pattern properties.
979 
980     `re` parameter can be one of three types:
981     $(UL
982       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
983       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
984         compiled  bytecode. )
985       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
986         compiled native machine code. )
987     )
988 
989     Returns:
990     $(LREF Captures) containing the extent of a match together with all submatches
991     if there was a match, otherwise an empty $(LREF Captures) object.
992 +/
993 public auto matchFirst(R, RegEx)(R input, RegEx re)
994 if (isSomeString!R && isRegexFor!(RegEx, R))
995 {
996     return matchOnce(input, re);
997 }
998 
999 ///ditto
1000 public auto matchFirst(R, String)(R input, String re)
1001 if (isSomeString!R && isSomeString!String)
1002 {
1003     return matchOnce(input, regex(re));
1004 }
1005 
1006 ///ditto
1007 public auto matchFirst(R, String)(R input, String[] re...)
1008 if (isSomeString!R && isSomeString!String)
1009 {
1010     return matchOnce(input, regex(re));
1011 }
1012 
1013 /++
1014     Initiate a search for all non-overlapping matches to the pattern `re`
1015     in the given `input`. The result is a lazy range of matches generated
1016     as they are encountered in the input going left to right.
1017 
1018     This function picks the most suitable regular expression engine
1019     depending on the pattern properties.
1020 
1021     `re` parameter can be one of three types:
1022     $(UL
1023       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1024       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1025         compiled  bytecode. )
1026       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1027         compiled native machine code. )
1028     )
1029 
1030     Returns:
1031     $(LREF RegexMatch) object that represents matcher state
1032     after the first match was found or an empty one if not present.
1033 +/
1034 public auto matchAll(R, RegEx)(R input, RegEx re)
1035 if (isSomeString!R && isRegexFor!(RegEx, R))
1036 {
1037     return matchMany(input, re);
1038 }
1039 
1040 ///ditto
1041 public auto matchAll(R, String)(R input, String re)
1042 if (isSomeString!R && isSomeString!String)
1043 {
1044     return matchMany(input, regex(re));
1045 }
1046 
1047 ///ditto
1048 public auto matchAll(R, String)(R input, String[] re...)
1049 if (isSomeString!R && isSomeString!String)
1050 {
1051     return matchMany(input, regex(re));
1052 }
1053 
1054 // another set of tests just to cover the new API
1055 @system unittest
1056 {
1057     import std.algorithm.comparison : equal;
1058     import std.algorithm.iteration : map;
1059     import std.conv : to;
1060 
1061     static foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1062     {{
1063         auto str1 = "blah-bleh".to!String();
1064         auto pat1 = "bl[ae]h".to!String();
1065         auto mf = matchFirst(str1, pat1);
1066         assert(mf.equal(["blah".to!String()]));
1067         auto mAll = matchAll(str1, pat1);
1068         assert(mAll.equal!((a,b) => a.equal(b))
1069             ([["blah".to!String()], ["bleh".to!String()]]));
1070 
1071         auto str2 = "1/03/12 - 3/03/12".to!String();
1072         auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1073         auto mf2 = matchFirst(str2, pat2);
1074         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1075         auto mAll2 = matchAll(str2, pat2);
1076         assert(mAll2.front.equal(mf2));
1077         mAll2.popFront();
1078         assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1079         mf2.popFrontN(3);
1080         assert(mf2.equal(["12".to!String()]));
1081 
1082         auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1083         auto str = "2 + 34/56 - 6/1".to!String();
1084         auto cmf = matchFirst(str, ctPat);
1085         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1086         assert(cmf["Quot"] == "34".to!String());
1087         assert(cmf["Denom"] == "56".to!String());
1088 
1089         auto cmAll = matchAll(str, ctPat);
1090         assert(cmAll.front.equal(cmf));
1091         cmAll.popFront();
1092         assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1093     }}
1094 }
1095 
1096 /++
1097     Start matching of `input` to regex pattern `re`,
1098     using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1099     backtracking) matching scheme.
1100 
1101     The use of this function is $(RED discouraged) - use either of
1102     $(LREF matchAll) or $(LREF matchFirst).
1103 
1104     Delegating  the kind of operation
1105     to "g" flag is soon to be phased out along with the
1106     ability to choose the exact matching scheme. The choice of
1107     matching scheme to use depends highly on the pattern kind and
1108     can done automatically on case by case basis.
1109 
1110     Returns: a `RegexMatch` object holding engine
1111     state after first match.
1112 
1113 +/
1114 public auto bmatch(R, RegEx)(R input, RegEx re)
1115 if (isSomeString!R && isRegexFor!(RegEx, R))
1116 {
1117     return RegexMatch!(Unqual!(typeof(input)))(input, re);
1118 }
1119 
1120 ///ditto
1121 public auto bmatch(R, String)(R input, String re)
1122 if (isSomeString!R && isSomeString!String)
1123 {
1124     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1125 }
1126 
1127 // produces replacement string from format using captures for substitution
1128 package void replaceFmt(R, Capt, OutR)
1129     (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1130 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1131     isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1132 {
1133     import std.algorithm.searching : find;
1134     import std.ascii : isDigit, isAlpha;
1135     import std.conv : text, parse;
1136     import std.exception : enforce;
1137     enum State { Normal, Dollar }
1138     auto state = State.Normal;
1139     size_t offset;
1140 L_Replace_Loop:
1141     while (!format.empty)
1142         final switch (state)
1143         {
1144         case State.Normal:
1145             for (offset = 0; offset < format.length; offset++)//no decoding
1146             {
1147                 if (format[offset] == '$')
1148                 {
1149                     state = State.Dollar;
1150                     sink.put(format[0 .. offset]);
1151                     format = format[offset+1 .. $];//ditto
1152                     continue L_Replace_Loop;
1153                 }
1154             }
1155             sink.put(format[0 .. offset]);
1156             format = format[offset .. $];
1157             break;
1158         case State.Dollar:
1159             if (isDigit(format[0]))
1160             {
1161                 uint digit = parse!uint(format);
1162                 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1163                 if (digit < captures.length)
1164                     sink.put(captures[digit]);
1165             }
1166             else if (format[0] == '{')
1167             {
1168                 auto x = find!(a => !isAlpha(a))(format[1..$]);
1169                 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1170                 auto name = format[1 .. $ - x.length];
1171                 format = x[1..$];
1172                 enforce(!name.empty, "invalid name in ${...} replacement format");
1173                 sink.put(captures[name]);
1174             }
1175             else if (format[0] == '&')
1176             {
1177                 sink.put(captures[0]);
1178                 format = format[1 .. $];
1179             }
1180             else if (format[0] == '`')
1181             {
1182                 sink.put(captures.pre);
1183                 format = format[1 .. $];
1184             }
1185             else if (format[0] == '\'')
1186             {
1187                 sink.put(captures.post);
1188                 format = format[1 .. $];
1189             }
1190             else if (format[0] == '$')
1191             {
1192                 sink.put(format[0 .. 1]);
1193                 format = format[1 .. $];
1194             }
1195             state = State.Normal;
1196             break;
1197         }
1198     enforce(state == State.Normal, "invalid format string in regex replace");
1199 }
1200 
1201 /++
1202     Construct a new string from `input` by replacing the first match with
1203     a string generated from it according to the `format` specifier.
1204 
1205     To replace all matches use $(LREF replaceAll).
1206 
1207     Params:
1208     input = string to search
1209     re = compiled regular expression to use
1210     format = _format string to generate replacements from,
1211     see $(S_LINK Replace _format string, the _format string).
1212 
1213     Returns:
1214     A string of the same type with the first match (if any) replaced.
1215     If no match is found returns the input string itself.
1216 +/
1217 public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1218 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1219 {
1220     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1221 }
1222 
1223 ///
1224 @system unittest
1225 {
1226     assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1227 }
1228 
1229 /++
1230     This is a general replacement tool that construct a new string by replacing
1231     matches of pattern `re` in the `input`. Unlike the other overload
1232     there is no format string instead captures are passed to
1233     to a user-defined functor `fun` that returns a new string
1234     to use as replacement.
1235 
1236     This version replaces the first match in `input`,
1237     see $(LREF replaceAll) to replace the all of the matches.
1238 
1239     Returns:
1240     A new string of the same type as `input` with all matches
1241     replaced by return values of `fun`. If no matches found
1242     returns the `input` itself.
1243 +/
1244 public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1245 if (isSomeString!R && isRegexFor!(RegEx, R))
1246 {
1247     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1248 }
1249 
1250 ///
1251 @system unittest
1252 {
1253     import std.conv : to;
1254     string list = "#21 out of 46";
1255     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1256         (list, regex(`[0-9]+`));
1257     assert(newList == "#22 out of 46");
1258 }
1259 
1260 /++
1261     A variation on $(LREF replaceFirst) that instead of allocating a new string
1262     on each call outputs the result piece-wise to the `sink`. In particular
1263     this enables efficient construction of a final output incrementally.
1264 
1265     Like in $(LREF replaceFirst) family of functions there is an overload
1266     for the substitution guided by the `format` string
1267     and the one with the user defined callback.
1268 +/
1269 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1270         (ref Sink sink, R input, RegEx re, const(C)[] format)
1271 if (isOutputRange!(Sink, dchar) && isSomeString!R
1272     && is(C : dchar) && isRegexFor!(RegEx, R))
1273     {
1274     replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1275         (sink, input, matchFirst(input, re));
1276     }
1277 
1278 ///ditto
1279 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1280     (Sink sink, R input, RegEx re)
1281 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1282 {
1283     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1284 }
1285 
1286 ///
1287 @system unittest
1288 {
1289     import std.array;
1290     string m1 = "first message\n";
1291     string m2 = "second message\n";
1292     auto result = appender!string();
1293     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1294     //equivalent of the above with user-defined callback
1295     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1296     assert(result.data == "first\nsecond\n");
1297 }
1298 
1299 //examples for replaceFirst
1300 @system unittest
1301 {
1302     import std.conv;
1303     string list = "#21 out of 46";
1304     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1305         (list, regex(`[0-9]+`));
1306     assert(newList == "#22 out of 46");
1307     import std.array;
1308     string m1 = "first message\n";
1309     string m2 = "second message\n";
1310     auto result = appender!string();
1311     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1312     //equivalent of the above with user-defined callback
1313     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1314     assert(result.data == "first\nsecond\n");
1315 }
1316 
1317 /++
1318     Construct a new string from `input` by replacing all of the
1319     fragments that match a pattern `re` with a string generated
1320     from the match according to the `format` specifier.
1321 
1322     To replace only the first match use $(LREF replaceFirst).
1323 
1324     Params:
1325     input = string to search
1326     re = compiled regular expression to use
1327     format = _format string to generate replacements from,
1328     see $(S_LINK Replace _format string, the _format string).
1329 
1330     Returns:
1331     A string of the same type as `input` with the all
1332     of the matches (if any) replaced.
1333     If no match is found returns the input string itself.
1334 +/
1335 public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1336 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1337 {
1338     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1339 }
1340 
1341 ///
1342 @system unittest
1343 {
1344     // insert comma as thousands delimiter
1345     auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1346     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1347 }
1348 
1349 /++
1350     This is a general replacement tool that construct a new string by replacing
1351     matches of pattern `re` in the `input`. Unlike the other overload
1352     there is no format string instead captures are passed to
1353     to a user-defined functor `fun` that returns a new string
1354     to use as replacement.
1355 
1356     This version replaces all of the matches found in `input`,
1357     see $(LREF replaceFirst) to replace the first match only.
1358 
1359     Returns:
1360     A new string of the same type as `input` with all matches
1361     replaced by return values of `fun`. If no matches found
1362     returns the `input` itself.
1363 
1364     Params:
1365     input = string to search
1366     re = compiled regular expression
1367     fun = delegate to use
1368 +/
1369 public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1370 if (isSomeString!R && isRegexFor!(RegEx, R))
1371 {
1372     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1373 }
1374 
1375 ///
1376 @system unittest
1377 {
1378     string baz(Captures!(string) m)
1379     {
1380         import std..string : toUpper;
1381         return toUpper(m.hit);
1382     }
1383     // Capitalize the letters 'a' and 'r':
1384     auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1385             regex("[ar]"));
1386     assert(s == "StRAp A Rocket engine on A chicken.");
1387 }
1388 
1389 /++
1390     A variation on $(LREF replaceAll) that instead of allocating a new string
1391     on each call outputs the result piece-wise to the `sink`. In particular
1392     this enables efficient construction of a final output incrementally.
1393 
1394     As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1395     the other one with a user defined functor.
1396 +/
1397 public @trusted void replaceAllInto(Sink, R, C, RegEx)
1398         (Sink sink, R input, RegEx re, const(C)[] format)
1399 if (isOutputRange!(Sink, dchar) && isSomeString!R
1400     && is(C : dchar) && isRegexFor!(RegEx, R))
1401     {
1402     replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1403         (sink, input, matchAll(input, re));
1404     }
1405 
1406 ///ditto
1407 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1408         (Sink sink, R input, RegEx re)
1409 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1410 {
1411     replaceMatchesInto!fun(sink, input, matchAll(input, re));
1412 }
1413 
1414 ///
1415 @system unittest
1416 {
1417     // insert comma as thousands delimiter in fifty randomly produced big numbers
1418     import std.array, std.conv, std.random, std.range;
1419     static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1420     auto sink = appender!(char [])();
1421     enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1422     foreach (i; 0 .. 50)
1423     {
1424         sink.clear();
1425         replaceAllInto(sink, text(uniform(min, max)), re, ",");
1426         foreach (pos; iota(sink.data.length - 4, 0, -4))
1427             assert(sink.data[pos] == ',');
1428     }
1429 }
1430 
1431 // exercise all of the replace APIs
1432 @system unittest
1433 {
1434     import std.array : appender;
1435     import std.conv;
1436     // try and check first/all simple substitution
1437     static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1438     {{
1439         S s1 = "curt trial".to!S();
1440         S s2 = "round dome".to!S();
1441         S t1F = "court trial".to!S();
1442         S t2F = "hound dome".to!S();
1443         S t1A = "court trial".to!S();
1444         S t2A = "hound home".to!S();
1445         auto re1 = regex("curt".to!S());
1446         auto re2 = regex("[dr]o".to!S());
1447 
1448         assert(replaceFirst(s1, re1, "court") == t1F);
1449         assert(replaceFirst(s2, re2, "ho") == t2F);
1450         assert(replaceAll(s1, re1, "court") == t1A);
1451         assert(replaceAll(s2, re2, "ho") == t2A);
1452 
1453         auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1454         assert(rep1 == t1F);
1455         assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1456         auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1457         assert(rep1A == t1A);
1458         assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1459 
1460         auto sink = appender!S();
1461         replaceFirstInto(sink, s1, re1, "court");
1462         assert(sink.data == t1F);
1463         replaceFirstInto(sink, s2, re2, "ho");
1464         assert(sink.data == t1F~t2F);
1465         replaceAllInto(sink, s1, re1, "court");
1466         assert(sink.data == t1F~t2F~t1A);
1467         replaceAllInto(sink, s2, re2, "ho");
1468         assert(sink.data == t1F~t2F~t1A~t2A);
1469     }}
1470 }
1471 
1472 /++
1473     Old API for replacement, operation depends on flags of pattern `re`.
1474     With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1475     works the same as $(LREF replaceFirst).
1476 
1477     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1478     or $(LREF replaceFirst) explicitly.
1479 +/
1480 public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1481 if (isSomeString!R && isRegexFor!(RegEx, R))
1482 {
1483     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1484 }
1485 
1486 ///ditto
1487 public R replace(alias fun, R, RegEx)(R input, RegEx re)
1488 if (isSomeString!R && isRegexFor!(RegEx, R))
1489 {
1490     return replaceAllWith!(fun, match)(input, re);
1491 }
1492 
1493 /**
1494 Splits a string `r` using a regular expression `pat` as a separator.
1495 
1496 Params:
1497     keepSeparators = flag to specify if the matches should be in the resulting range
1498     r = the string to split
1499     pat = the pattern to split on
1500 Returns:
1501     A lazy range of strings
1502 */
1503 public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1504 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1505 {
1506 private:
1507     Range _input;
1508     size_t _offset;
1509     alias Rx = typeof(match(Range.init,RegEx.init));
1510     Rx _match;
1511 
1512     static if (keepSeparators) bool onMatch = false;
1513 
1514     @trusted this(Range input, RegEx separator)
1515     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1516         _input = input;
1517         const re = separator.withFlags(separator.flags | RegexOption.global);
1518         if (_input.empty)
1519         {
1520             //there is nothing to match at all, make _offset > 0
1521             _offset = 1;
1522         }
1523         else
1524         {
1525             _match = Rx(_input, re);
1526 
1527             static if (keepSeparators)
1528                 if (_match.pre.empty)
1529                     popFront();
1530         }
1531     }
1532 
1533 public:
1534     auto ref opSlice()
1535     {
1536         return this.save;
1537     }
1538 
1539     ///Forward range primitives.
1540     @property Range front()
1541     {
1542         import std.algorithm.comparison : min;
1543 
1544         assert(!empty && _offset <= _match.pre.length
1545                 && _match.pre.length <= _input.length);
1546 
1547         static if (keepSeparators)
1548         {
1549             if (!onMatch)
1550                 return _input[_offset .. min($, _match.pre.length)];
1551             else
1552                 return _match.hit();
1553         }
1554         else
1555         {
1556             return _input[_offset .. min($, _match.pre.length)];
1557         }
1558     }
1559 
1560     ///ditto
1561     @property bool empty()
1562     {
1563         static if (keepSeparators)
1564             return _offset >= _input.length;
1565         else
1566             return _offset > _input.length;
1567     }
1568 
1569     ///ditto
1570     void popFront()
1571     {
1572         assert(!empty);
1573         if (_match.empty)
1574         {
1575             //No more separators, work is done here
1576             _offset = _input.length + 1;
1577         }
1578         else
1579         {
1580             static if (keepSeparators)
1581             {
1582                 if (!onMatch)
1583                 {
1584                     //skip past the separator
1585                     _offset = _match.pre.length;
1586                 }
1587                 else
1588                 {
1589                     _offset += _match.hit.length;
1590                     _match.popFront();
1591                 }
1592 
1593                 onMatch = !onMatch;
1594             }
1595             else
1596             {
1597                 //skip past the separator
1598                 _offset = _match.pre.length + _match.hit.length;
1599                 _match.popFront();
1600             }
1601         }
1602     }
1603 
1604     ///ditto
1605     @property auto save()
1606     {
1607         return this;
1608     }
1609 }
1610 
1611 /// ditto
1612 public Splitter!(keepSeparators, Range, RegEx) splitter(
1613     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1614 if (
1615     is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1616 {
1617     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1618 }
1619 
1620 ///
1621 @system unittest
1622 {
1623     import std.algorithm.comparison : equal;
1624     auto s1 = ", abc, de,  fg, hi, ";
1625     assert(equal(splitter(s1, regex(", *")),
1626         ["", "abc", "de", "fg", "hi", ""]));
1627 }
1628 
1629 /// Split on a pattern, but keep the matches in the resulting range
1630 @system unittest
1631 {
1632     import std.algorithm.comparison : equal;
1633     import std.typecons : Yes;
1634 
1635     auto pattern = regex(`([\.,])`);
1636 
1637     assert("2003.04.05"
1638         .splitter!(Yes.keepSeparators)(pattern)
1639         .equal(["2003", ".", "04", ".", "05"]));
1640 
1641     assert(",1,2,3"
1642         .splitter!(Yes.keepSeparators)(pattern)
1643         .equal([",", "1", ",", "2", ",", "3"]));
1644 }
1645 
1646 ///An eager version of `splitter` that creates an array with splitted slices of `input`.
1647 public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1648 if (isSomeString!String  && isRegexFor!(RegEx, String))
1649 {
1650     import std.array : appender;
1651     auto a = appender!(String[])();
1652     foreach (e; splitter(input, rx))
1653         a.put(e);
1654     return a.data;
1655 }
1656 
1657 ///Exception object thrown in case of errors during regex compilation.
1658 public alias RegexException = std.regex.internal.ir.RegexException;
1659 
1660 /++
1661   A range that lazily produces a string output escaped
1662   to be used inside of a regular expression.
1663 +/
1664 auto escaper(Range)(Range r)
1665 {
1666     import std.algorithm.searching : find;
1667     static immutable escapables = [Escapables];
1668     static struct Escaper // template to deduce attributes
1669     {
1670         Range r;
1671         bool escaped;
1672 
1673         @property ElementType!Range front(){
1674           if (escaped)
1675               return '\\';
1676           else
1677               return r.front;
1678         }
1679 
1680         @property bool empty(){ return r.empty; }
1681 
1682         void popFront(){
1683           if (escaped) escaped = false;
1684           else
1685           {
1686               r.popFront();
1687               if (!r.empty && !escapables.find(r.front).empty)
1688                   escaped = true;
1689           }
1690         }
1691 
1692         @property auto save(){ return Escaper(r.save, escaped); }
1693     }
1694 
1695     bool escaped = !r.empty && !escapables.find(r.front).empty;
1696     return Escaper(r, escaped);
1697 }
1698 
1699 ///
1700 @system unittest
1701 {
1702     import std.algorithm.comparison;
1703     import std.regex;
1704     string s = `This is {unfriendly} to *regex*`;
1705     assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1706 }
1707 
1708 @system unittest
1709 {
1710     import std.algorithm.comparison;
1711     import std.conv;
1712     static foreach (S; AliasSeq!(string, wstring, dstring))
1713     {{
1714       auto s = "^".to!S;
1715       assert(s.escaper.equal(`\^`));
1716       auto s2 = "";
1717       assert(s2.escaper.equal(""));
1718     }}
1719 }