1 // Written in the D programming language. 2 3 /** 4 String handling functions. 5 6 $(SCRIPT inhibitQuickIndex = 1;) 7 8 $(DIVC quickindex, 9 $(BOOKTABLE , 10 $(TR $(TH Category) $(TH Functions) ) 11 $(TR $(TDNW Searching) 12 $(TD 13 $(MYREF column) 14 $(MYREF indexOf) 15 $(MYREF indexOfAny) 16 $(MYREF indexOfNeither) 17 $(MYREF lastIndexOf) 18 $(MYREF lastIndexOfAny) 19 $(MYREF lastIndexOfNeither) 20 ) 21 ) 22 $(TR $(TDNW Comparison) 23 $(TD 24 $(MYREF isNumeric) 25 ) 26 ) 27 $(TR $(TDNW Mutation) 28 $(TD 29 $(MYREF capitalize) 30 ) 31 ) 32 $(TR $(TDNW Pruning and Filling) 33 $(TD 34 $(MYREF center) 35 $(MYREF chomp) 36 $(MYREF chompPrefix) 37 $(MYREF chop) 38 $(MYREF detabber) 39 $(MYREF detab) 40 $(MYREF entab) 41 $(MYREF entabber) 42 $(MYREF leftJustify) 43 $(MYREF outdent) 44 $(MYREF rightJustify) 45 $(MYREF strip) 46 $(MYREF stripLeft) 47 $(MYREF stripRight) 48 $(MYREF wrap) 49 ) 50 ) 51 $(TR $(TDNW Substitution) 52 $(TD 53 $(MYREF abbrev) 54 $(MYREF soundex) 55 $(MYREF soundexer) 56 $(MYREF succ) 57 $(MYREF tr) 58 $(MYREF translate) 59 ) 60 ) 61 $(TR $(TDNW Miscellaneous) 62 $(TD 63 $(MYREF assumeUTF) 64 $(MYREF fromStringz) 65 $(MYREF lineSplitter) 66 $(MYREF representation) 67 $(MYREF splitLines) 68 $(MYREF toStringz) 69 ) 70 ))) 71 72 Objects of types `string`, `wstring`, and `dstring` are value types 73 and cannot be mutated element-by-element. For using mutation during building 74 strings, use `char[]`, `wchar[]`, or `dchar[]`. The `xxxstring` 75 types are preferable because they don't exhibit undesired aliasing, thus 76 making code more robust. 77 78 The following functions are publicly imported: 79 80 $(BOOKTABLE , 81 $(TR $(TH Module) $(TH Functions) ) 82 $(LEADINGROW Publicly imported functions) 83 $(TR $(TD std.algorithm) 84 $(TD 85 $(REF_SHORT cmp, std,algorithm,comparison) 86 $(REF_SHORT count, std,algorithm,searching) 87 $(REF_SHORT endsWith, std,algorithm,searching) 88 $(REF_SHORT startsWith, std,algorithm,searching) 89 )) 90 $(TR $(TD std.array) 91 $(TD 92 $(REF_SHORT join, std,array) 93 $(REF_SHORT replace, std,array) 94 $(REF_SHORT replaceInPlace, std,array) 95 $(REF_SHORT split, std,array) 96 $(REF_SHORT empty, std,array) 97 )) 98 $(TR $(TD std.format) 99 $(TD 100 $(REF_SHORT format, std,format) 101 $(REF_SHORT sformat, std,format) 102 )) 103 $(TR $(TD std.uni) 104 $(TD 105 $(REF_SHORT icmp, std,uni) 106 $(REF_SHORT toLower, std,uni) 107 $(REF_SHORT toLowerInPlace, std,uni) 108 $(REF_SHORT toUpper, std,uni) 109 $(REF_SHORT toUpperInPlace, std,uni) 110 )) 111 ) 112 113 There is a rich set of functions for string handling defined in other modules. 114 Functions related to Unicode and ASCII are found in $(MREF std, uni) 115 and $(MREF std, ascii), respectively. Other functions that have a 116 wider generality than just strings can be found in $(MREF std, algorithm) 117 and $(MREF std, range). 118 119 See_Also: 120 $(LIST 121 $(MREF std, algorithm) and 122 $(MREF std, range) 123 for generic range algorithms 124 , 125 $(MREF std, ascii) 126 for functions that work with ASCII strings 127 , 128 $(MREF std, uni) 129 for functions that work with unicode strings 130 ) 131 132 Copyright: Copyright The D Language Foundation 2007-. 133 134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0). 135 136 Authors: $(HTTP digitalmars.com, Walter Bright), 137 $(HTTP erdani.org, Andrei Alexandrescu), 138 $(HTTP jmdavisprog.com, Jonathan M Davis), 139 and David L. 'SpottedTiger' Davis 140 141 Source: $(PHOBOSSRC std/string.d) 142 143 */ 144 module std..string; 145 146 version (StdUnittest) 147 { 148 private: 149 struct TestAliasedString 150 { 151 string get() @safe @nogc pure nothrow { return _s; } 152 alias get this; 153 @disable this(this); 154 string _s; 155 } 156 157 bool testAliasedString(alias func, Args...)(string s, Args args) 158 { 159 import std.algorithm.comparison : equal; 160 auto a = func(TestAliasedString(s), args); 161 auto b = func(s, args); 162 static if (is(typeof(equal(a, b)))) 163 { 164 // For ranges, compare contents instead of object identity. 165 return equal(a, b); 166 } 167 else 168 { 169 return a == b; 170 } 171 } 172 } 173 174 public import std.format : format, sformat; 175 import std.typecons : Flag, Yes, No; 176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace; 177 178 import std.meta : AliasSeq, staticIndexOf; 179 import std.range.primitives : back, ElementEncodingType, ElementType, front, 180 hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite, 181 isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put, 182 save; 183 import std.traits : isConvertibleToString, isNarrowString, isSomeChar, 184 isSomeString, StringTypeOf, Unqual; 185 186 //public imports for backward compatibility 187 public import std.algorithm.comparison : cmp; 188 public import std.algorithm.searching : startsWith, endsWith, count; 189 public import std.array : join, replace, replaceInPlace, split, empty; 190 191 /* ************* Exceptions *************** */ 192 193 /++ 194 Exception thrown on errors in std.string functions. 195 +/ 196 class StringException : Exception 197 { 198 import std.exception : basicExceptionCtors; 199 200 /// 201 mixin basicExceptionCtors; 202 } 203 204 /// 205 @safe pure unittest 206 { 207 import std.exception : assertThrown; 208 auto bad = " a\n\tb\n c"; 209 assertThrown!StringException(bad.outdent); 210 } 211 212 /++ 213 Params: 214 cString = A null-terminated c-style string. 215 216 Returns: A D-style array of `char`, `wchar` or `dchar` referencing the same 217 string. The returned array will retain the same type qualifiers as the input. 218 219 $(RED Important Note:) The returned array is a slice of the original buffer. 220 The original data is not changed and not copied. 221 +/ 222 inout(Char)[] fromStringz(Char)(return scope inout(Char)* cString) @nogc @system pure nothrow 223 if (isSomeChar!Char) 224 { 225 import core.stdc.stddef : wchar_t; 226 227 static if (is(immutable Char == immutable char)) 228 import core.stdc..string : cstrlen = strlen; 229 else static if (is(immutable Char == immutable wchar_t)) 230 import core.stdc.wchar_ : cstrlen = wcslen; 231 else 232 static size_t cstrlen(scope const Char* s) 233 { 234 const(Char)* p = s; 235 while (*p) 236 ++p; 237 return p - s; 238 } 239 240 return cString ? cString[0 .. cstrlen(cString)] : null; 241 } 242 243 /// 244 @system pure unittest 245 { 246 assert(fromStringz("foo\0"c.ptr) == "foo"c); 247 assert(fromStringz("foo\0"w.ptr) == "foo"w); 248 assert(fromStringz("foo\0"d.ptr) == "foo"d); 249 250 assert(fromStringz("福\0"c.ptr) == "福"c); 251 assert(fromStringz("福\0"w.ptr) == "福"w); 252 assert(fromStringz("福\0"d.ptr) == "福"d); 253 } 254 255 @system pure unittest 256 { 257 char* a = null; 258 assert(fromStringz(a) == null); 259 wchar* b = null; 260 assert(fromStringz(b) == null); 261 dchar* c = null; 262 assert(fromStringz(c) == null); 263 264 const char* d = "foo\0"; 265 assert(fromStringz(d) == "foo"); 266 267 immutable char* e = "foo\0"; 268 assert(fromStringz(e) == "foo"); 269 270 const wchar* f = "foo\0"; 271 assert(fromStringz(f) == "foo"); 272 273 immutable wchar* g = "foo\0"; 274 assert(fromStringz(g) == "foo"); 275 276 const dchar* h = "foo\0"; 277 assert(fromStringz(h) == "foo"); 278 279 immutable dchar* i = "foo\0"; 280 assert(fromStringz(i) == "foo"); 281 282 immutable wchar z = 0x0000; 283 // Test some surrogate pairs 284 // high surrogates are in the range 0xD800 .. 0xDC00 285 // low surrogates are in the range 0xDC00 .. 0xE000 286 // since UTF16 doesn't specify endianness we test both. 287 foreach (wchar[] t; [[0xD800, 0xDC00], [0xD800, 0xE000], [0xDC00, 0xDC00], 288 [0xDC00, 0xE000], [0xDA00, 0xDE00]]) 289 { 290 immutable hi = t[0], lo = t[1]; 291 assert(fromStringz([hi, lo, z].ptr) == [hi, lo]); 292 assert(fromStringz([lo, hi, z].ptr) == [lo, hi]); 293 } 294 } 295 296 /++ 297 Params: 298 s = A D-style string. 299 300 Returns: A C-style null-terminated string equivalent to `s`. `s` 301 must not contain embedded `'\0'`'s as any C function will treat the 302 first `'\0'` that it sees as the end of the string. If `s.empty` is 303 `true`, then a string containing only `'\0'` is returned. 304 305 $(RED Important Note:) When passing a `char*` to a C function, and the C 306 function keeps it around for any reason, make sure that you keep a 307 reference to it in your D code. Otherwise, it may become invalid during a 308 garbage collection cycle and cause a nasty bug when the C code tries to use 309 it. 310 +/ 311 immutable(char)* toStringz(scope const(char)[] s) @trusted pure nothrow 312 out (result) 313 { 314 import core.stdc..string : strlen, memcmp; 315 if (result) 316 { 317 auto slen = s.length; 318 while (slen > 0 && s[slen-1] == 0) --slen; 319 assert(strlen(result) == slen, 320 "The result c string is shorter than the in input string"); 321 assert(result[0 .. slen] == s[0 .. slen], 322 "The input and result string are not equal"); 323 } 324 } 325 do 326 { 327 import std.exception : assumeUnique; 328 /+ Unfortunately, this isn't reliable. 329 We could make this work if string literals are put 330 in read-only memory and we test if s[] is pointing into 331 that. 332 333 /* Peek past end of s[], if it's 0, no conversion necessary. 334 * Note that the compiler will put a 0 past the end of static 335 * strings, and the storage allocator will put a 0 past the end 336 * of newly allocated char[]'s. 337 */ 338 char* p = &s[0] + s.length; 339 if (*p == 0) 340 return s; 341 +/ 342 343 // Need to make a copy 344 auto copy = new char[s.length + 1]; 345 copy[0 .. s.length] = s[]; 346 copy[s.length] = 0; 347 348 return &assumeUnique(copy)[0]; 349 } 350 351 /++ Ditto +/ 352 immutable(char)* toStringz(return scope string s) @trusted pure nothrow 353 { 354 if (s.empty) return "".ptr; 355 /* Peek past end of s[], if it's 0, no conversion necessary. 356 * Note that the compiler will put a 0 past the end of static 357 * strings, and the storage allocator will put a 0 past the end 358 * of newly allocated char[]'s. 359 */ 360 immutable p = s.ptr + s.length; 361 // Is p dereferenceable? A simple test: if the p points to an 362 // address multiple of 4, then conservatively assume the pointer 363 // might be pointing to a new block of memory, which might be 364 // unreadable. Otherwise, it's definitely pointing to valid 365 // memory. 366 if ((cast(size_t) p & 3) && *p == 0) 367 return &s[0]; 368 return toStringz(cast(const char[]) s); 369 } 370 371 /// 372 pure nothrow @system unittest 373 { 374 import core.stdc..string : strlen; 375 import std.conv : to; 376 377 auto p = toStringz("foo"); 378 assert(strlen(p) == 3); 379 const(char)[] foo = "abbzxyzzy"; 380 p = toStringz(foo[3 .. 5]); 381 assert(strlen(p) == 2); 382 383 string test = ""; 384 p = toStringz(test); 385 assert(*p == 0); 386 387 test = "\0"; 388 p = toStringz(test); 389 assert(*p == 0); 390 391 test = "foo\0"; 392 p = toStringz(test); 393 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0); 394 395 const string test2 = ""; 396 p = toStringz(test2); 397 assert(*p == 0); 398 } 399 400 401 /** 402 Flag indicating whether a search is case-sensitive. 403 */ 404 alias CaseSensitive = Flag!"caseSensitive"; 405 406 /++ 407 Searches for character in range. 408 409 Params: 410 s = string or InputRange of characters to search in correct UTF format 411 c = character to search for 412 startIdx = starting index to a well-formed code point 413 cs = `Yes.caseSensitive` or `No.caseSensitive` 414 415 Returns: 416 the index of the first occurrence of `c` in `s` with 417 respect to the start index `startIdx`. If `c` 418 is not found, then `-1` is returned. 419 If `c` is found the value of the returned index is at least 420 `startIdx`. 421 If the parameters are not valid UTF, the result will still 422 be in the range [-1 .. s.length], but will not be reliable otherwise. 423 424 Throws: 425 If the sequence starting at `startIdx` does not represent a well 426 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 427 428 See_Also: $(REF countUntil, std,algorithm,searching) 429 +/ 430 ptrdiff_t indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive) 431 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 432 { 433 return _indexOf(s, c, cs); 434 } 435 436 /// Ditto 437 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, CaseSensitive cs = Yes.caseSensitive) 438 if (isSomeChar!C) 439 { 440 return _indexOf(s, c, cs); 441 } 442 443 /// Ditto 444 ptrdiff_t indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 445 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 446 { 447 return _indexOf(s, c, startIdx, cs); 448 } 449 450 /// Ditto 451 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 452 if (isSomeChar!C) 453 { 454 return _indexOf(s, c, startIdx, cs); 455 } 456 457 /// 458 @safe pure unittest 459 { 460 import std.typecons : No; 461 462 string s = "Hello World"; 463 assert(indexOf(s, 'W') == 6); 464 assert(indexOf(s, 'Z') == -1); 465 assert(indexOf(s, 'w', No.caseSensitive) == 6); 466 } 467 468 /// 469 @safe pure unittest 470 { 471 import std.typecons : No; 472 473 string s = "Hello World"; 474 assert(indexOf(s, 'W', 4) == 6); 475 assert(indexOf(s, 'Z', 100) == -1); 476 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6); 477 } 478 479 @safe pure unittest 480 { 481 assert(testAliasedString!indexOf("std/string.d", '/')); 482 483 enum S : string { a = "std/string.d" } 484 assert(S.a.indexOf('/') == 3); 485 486 char[S.a.length] sa = S.a[]; 487 assert(sa.indexOf('/') == 3); 488 } 489 490 @safe pure unittest 491 { 492 import std.conv : to; 493 import std.exception : assertCTFEable; 494 import std.traits : EnumMembers; 495 import std.utf : byChar, byWchar, byDchar; 496 497 assertCTFEable!( 498 { 499 static foreach (S; AliasSeq!(string, wstring, dstring)) 500 {{ 501 assert(indexOf(cast(S) null, cast(dchar)'a') == -1); 502 assert(indexOf(to!S("def"), cast(dchar)'a') == -1); 503 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0); 504 assert(indexOf(to!S("def"), cast(dchar)'f') == 2); 505 506 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 507 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 508 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0); 509 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2); 510 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0); 511 512 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 513 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2); 514 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23); 515 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2); 516 }} 517 518 foreach (cs; EnumMembers!CaseSensitive) 519 { 520 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9); 521 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7); 522 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6); 523 524 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9); 525 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7); 526 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6); 527 528 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2); 529 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7); 530 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8); 531 532 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5); 533 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1); 534 } 535 536 char[10] fixedSizeArray = "0123456789"; 537 assert(indexOf(fixedSizeArray, '2') == 2); 538 }); 539 } 540 541 @safe pure unittest 542 { 543 assert(testAliasedString!indexOf("std/string.d", '/', 0)); 544 assert(testAliasedString!indexOf("std/string.d", '/', 1)); 545 assert(testAliasedString!indexOf("std/string.d", '/', 4)); 546 547 enum S : string { a = "std/string.d" } 548 assert(S.a.indexOf('/', 0) == 3); 549 assert(S.a.indexOf('/', 1) == 3); 550 assert(S.a.indexOf('/', 4) == -1); 551 552 char[S.a.length] sa = S.a[]; 553 assert(sa.indexOf('/', 0) == 3); 554 assert(sa.indexOf('/', 1) == 3); 555 assert(sa.indexOf('/', 4) == -1); 556 } 557 558 @safe pure unittest 559 { 560 import std.conv : to; 561 import std.traits : EnumMembers; 562 import std.utf : byCodeUnit, byChar, byWchar; 563 564 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2); 565 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2); 566 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1); 567 568 static foreach (S; AliasSeq!(string, wstring, dstring)) 569 {{ 570 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1); 571 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1); 572 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3); 573 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2); 574 575 assert((to!S("def")).indexOf(cast(dchar)'a', 1, 576 No.caseSensitive) == -1); 577 assert(indexOf(to!S("def"), cast(dchar)'a', 1, 578 No.caseSensitive) == -1); 579 assert(indexOf(to!S("def"), cast(dchar)'a', 12, 580 No.caseSensitive) == -1); 581 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2, 582 No.caseSensitive) == 3); 583 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2); 584 585 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 586 assert(indexOf("def", cast(char)'f', cast(uint) 2, 587 No.caseSensitive) == 2); 588 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23); 589 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1, 590 No.caseSensitive) == 2); 591 }} 592 593 foreach (cs; EnumMembers!CaseSensitive) 594 { 595 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs) 596 == 9); 597 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs) 598 == 7); 599 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs) 600 == 6); 601 } 602 } 603 604 private ptrdiff_t _indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive) 605 if (isInputRange!Range && isSomeChar!(ElementType!Range)) 606 { 607 static import std.ascii; 608 static import std.uni; 609 import std.utf : byDchar, byCodeUnit, UTFException, codeLength; 610 alias Char = Unqual!(ElementEncodingType!Range); 611 612 if (cs == Yes.caseSensitive) 613 { 614 static if (Char.sizeof == 1 && isSomeString!Range) 615 { 616 if (std.ascii.isASCII(c) && !__ctfe) 617 { // Plain old ASCII 618 static ptrdiff_t trustedmemchr(Range s, char c) @trusted 619 { 620 import core.stdc..string : memchr; 621 const p = cast(const(Char)*)memchr(s.ptr, c, s.length); 622 return p ? p - s.ptr : -1; 623 } 624 625 return trustedmemchr(s, cast(char) c); 626 } 627 } 628 629 static if (Char.sizeof == 1) 630 { 631 if (c <= 0x7F) 632 { 633 ptrdiff_t i; 634 foreach (const c2; s) 635 { 636 if (c == c2) 637 return i; 638 ++i; 639 } 640 } 641 else 642 { 643 ptrdiff_t i; 644 foreach (const c2; s.byDchar()) 645 { 646 if (c == c2) 647 return i; 648 i += codeLength!Char(c2); 649 } 650 } 651 } 652 else static if (Char.sizeof == 2) 653 { 654 if (c <= 0xFFFF) 655 { 656 ptrdiff_t i; 657 foreach (const c2; s) 658 { 659 if (c == c2) 660 return i; 661 ++i; 662 } 663 } 664 else if (c <= 0x10FFFF) 665 { 666 // Encode UTF-16 surrogate pair 667 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); 668 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00); 669 ptrdiff_t i; 670 for (auto r = s.byCodeUnit(); !r.empty; r.popFront()) 671 { 672 if (c1 == r.front) 673 { 674 r.popFront(); 675 if (r.empty) // invalid UTF - missing second of pair 676 break; 677 if (c2 == r.front) 678 return i; 679 ++i; 680 } 681 ++i; 682 } 683 } 684 } 685 else static if (Char.sizeof == 4) 686 { 687 ptrdiff_t i; 688 foreach (const c2; s) 689 { 690 if (c == c2) 691 return i; 692 ++i; 693 } 694 } 695 else 696 static assert(0); 697 return -1; 698 } 699 else 700 { 701 if (std.ascii.isASCII(c)) 702 { // Plain old ASCII 703 immutable c1 = cast(char) std.ascii.toLower(c); 704 705 ptrdiff_t i; 706 foreach (const c2; s.byCodeUnit()) 707 { 708 if (c1 == std.ascii.toLower(c2)) 709 return i; 710 ++i; 711 } 712 } 713 else 714 { // c is a universal character 715 immutable c1 = std.uni.toLower(c); 716 717 ptrdiff_t i; 718 foreach (const c2; s.byDchar()) 719 { 720 if (c1 == std.uni.toLower(c2)) 721 return i; 722 i += codeLength!Char(c2); 723 } 724 } 725 } 726 return -1; 727 } 728 729 private ptrdiff_t _indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive) 730 if (isInputRange!Range && isSomeChar!(ElementType!Range)) 731 { 732 static if (isSomeString!(typeof(s)) || 733 (hasSlicing!(typeof(s)) && hasLength!(typeof(s)))) 734 { 735 if (startIdx < s.length) 736 { 737 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs); 738 if (foundIdx != -1) 739 { 740 return foundIdx + cast(ptrdiff_t) startIdx; 741 } 742 } 743 } 744 else 745 { 746 foreach (i; 0 .. startIdx) 747 { 748 if (s.empty) 749 return -1; 750 s.popFront(); 751 } 752 ptrdiff_t foundIdx = indexOf(s, c, cs); 753 if (foundIdx != -1) 754 { 755 return foundIdx + cast(ptrdiff_t) startIdx; 756 } 757 } 758 return -1; 759 } 760 761 private template _indexOfStr(CaseSensitive cs) 762 { 763 private ptrdiff_t _indexOfStr(Range, Char)(Range s, const(Char)[] sub) 764 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 765 isSomeChar!Char) 766 { 767 alias Char1 = Unqual!(ElementEncodingType!Range); 768 769 static if (isSomeString!Range) 770 { 771 static if (is(Char1 == Char) && cs == Yes.caseSensitive) 772 { 773 import std.algorithm.searching : countUntil; 774 return s.representation.countUntil(sub.representation); 775 } 776 else 777 { 778 import std.algorithm.searching : find; 779 780 const(Char1)[] balance; 781 static if (cs == Yes.caseSensitive) 782 { 783 balance = find(s, sub); 784 } 785 else 786 { 787 balance = find! 788 ((a, b) => toLower(a) == toLower(b)) 789 (s, sub); 790 } 791 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } (); 792 } 793 } 794 else 795 { 796 if (s.empty) 797 return -1; 798 if (sub.empty) 799 return 0; // degenerate case 800 801 import std.utf : byDchar, codeLength; 802 auto subr = sub.byDchar; // decode sub[] by dchar's 803 dchar sub0 = subr.front; // cache first character of sub[] 804 subr.popFront(); 805 806 // Special case for single character search 807 if (subr.empty) 808 return indexOf(s, sub0, cs); 809 810 static if (cs == No.caseSensitive) 811 sub0 = toLower(sub0); 812 813 /* Classic double nested loop search algorithm 814 */ 815 ptrdiff_t index = 0; // count code unit index into s 816 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront()) 817 { 818 dchar c2 = sbydchar.front; 819 static if (cs == No.caseSensitive) 820 c2 = toLower(c2); 821 if (c2 == sub0) 822 { 823 auto s2 = sbydchar.save; // why s must be a forward range 824 foreach (c; subr.save) 825 { 826 s2.popFront(); 827 if (s2.empty) 828 return -1; 829 static if (cs == Yes.caseSensitive) 830 { 831 if (c != s2.front) 832 goto Lnext; 833 } 834 else 835 { 836 if (toLower(c) != toLower(s2.front)) 837 goto Lnext; 838 } 839 } 840 return index; 841 } 842 Lnext: 843 index += codeLength!Char1(c2); 844 } 845 return -1; 846 } 847 } 848 } 849 850 /++ 851 Searches for substring in `s`. 852 853 Params: 854 s = string or ForwardRange of characters to search in correct UTF format 855 sub = substring to search for 856 startIdx = the index into s to start searching from 857 cs = `Yes.caseSensitive` (default) or `No.caseSensitive` 858 859 Returns: 860 the index of the first occurrence of `sub` in `s` with 861 respect to the start index `startIdx`. If `sub` is not found, 862 then `-1` is returned. 863 If the arguments are not valid UTF, the result will still 864 be in the range [-1 .. s.length], but will not be reliable otherwise. 865 If `sub` is found the value of the returned index is at least 866 `startIdx`. 867 868 Throws: 869 If the sequence starting at `startIdx` does not represent a well 870 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 871 872 Bugs: 873 Does not work with case insensitive strings where the mapping of 874 tolower and toupper is not 1:1. 875 +/ 876 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub) 877 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 878 isSomeChar!Char) 879 { 880 return _indexOfStr!(Yes.caseSensitive)(s, sub); 881 } 882 883 /// Ditto 884 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub, in CaseSensitive cs) 885 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 886 isSomeChar!Char) 887 { 888 if (cs == Yes.caseSensitive) 889 return indexOf(s, sub); 890 else 891 return _indexOfStr!(No.caseSensitive)(s, sub); 892 } 893 894 /// Ditto 895 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 896 in size_t startIdx) 897 @safe 898 if (isSomeChar!Char1 && isSomeChar!Char2) 899 { 900 if (startIdx >= s.length) 901 return -1; 902 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub); 903 if (foundIdx == -1) 904 return -1; 905 return foundIdx + cast(ptrdiff_t) startIdx; 906 } 907 908 /// Ditto 909 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 910 in size_t startIdx, in CaseSensitive cs) 911 @safe 912 if (isSomeChar!Char1 && isSomeChar!Char2) 913 { 914 if (startIdx >= s.length) 915 return -1; 916 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs); 917 if (foundIdx == -1) 918 return -1; 919 return foundIdx + cast(ptrdiff_t) startIdx; 920 } 921 922 /// 923 @safe pure unittest 924 { 925 import std.typecons : No; 926 927 string s = "Hello World"; 928 assert(indexOf(s, "Wo", 4) == 6); 929 assert(indexOf(s, "Zo", 100) == -1); 930 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6); 931 } 932 933 /// 934 @safe pure unittest 935 { 936 import std.typecons : No; 937 938 string s = "Hello World"; 939 assert(indexOf(s, "Wo") == 6); 940 assert(indexOf(s, "Zo") == -1); 941 assert(indexOf(s, "wO", No.caseSensitive) == 6); 942 } 943 944 @safe pure nothrow @nogc unittest 945 { 946 string s = "Hello World"; 947 assert(indexOf(s, "Wo", 4) == 6); 948 assert(indexOf(s, "Zo", 100) == -1); 949 assert(indexOf(s, "Wo") == 6); 950 assert(indexOf(s, "Zo") == -1); 951 } 952 953 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub) 954 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 955 isSomeChar!Char) && 956 is(StringTypeOf!Range)) 957 { 958 return indexOf!(StringTypeOf!Range)(s, sub); 959 } 960 961 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub, 962 in CaseSensitive cs) 963 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 964 isSomeChar!Char) && 965 is(StringTypeOf!Range)) 966 { 967 return indexOf!(StringTypeOf!Range)(s, sub, cs); 968 } 969 970 @safe pure nothrow @nogc unittest 971 { 972 assert(testAliasedString!indexOf("std/string.d", "string")); 973 } 974 975 @safe pure unittest 976 { 977 import std.conv : to; 978 import std.exception : assertCTFEable; 979 import std.traits : EnumMembers; 980 981 assertCTFEable!( 982 { 983 static foreach (S; AliasSeq!(string, wstring, dstring)) 984 { 985 static foreach (T; AliasSeq!(string, wstring, dstring)) 986 {{ 987 assert(indexOf(cast(S) null, to!T("a")) == -1); 988 assert(indexOf(to!S("def"), to!T("a")) == -1); 989 assert(indexOf(to!S("abba"), to!T("a")) == 0); 990 assert(indexOf(to!S("def"), to!T("f")) == 2); 991 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3); 992 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6); 993 994 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1); 995 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1); 996 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0); 997 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2); 998 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3); 999 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6); 1000 1001 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1002 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1003 1004 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1); 1005 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7); 1006 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0); 1007 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17); 1008 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41); 1009 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0); 1010 1011 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 1012 1013 // Thanks to Carlos Santander B. and zwang 1014 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 1015 to!T("page-break-before"), No.caseSensitive) == -1); 1016 }} 1017 1018 foreach (cs; EnumMembers!CaseSensitive) 1019 { 1020 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9); 1021 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7); 1022 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6); 1023 } 1024 } 1025 }); 1026 } 1027 1028 @safe pure @nogc nothrow 1029 unittest 1030 { 1031 import std.traits : EnumMembers; 1032 import std.utf : byWchar; 1033 1034 foreach (cs; EnumMembers!CaseSensitive) 1035 { 1036 assert(indexOf("".byWchar, "", cs) == -1); 1037 assert(indexOf("hello".byWchar, "", cs) == 0); 1038 assert(indexOf("hello".byWchar, "l", cs) == 2); 1039 assert(indexOf("heLLo".byWchar, "LL", cs) == 2); 1040 assert(indexOf("hello".byWchar, "lox", cs) == -1); 1041 assert(indexOf("hello".byWchar, "betty", cs) == -1); 1042 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7); 1043 } 1044 } 1045 1046 @safe pure unittest 1047 { 1048 import std.conv : to; 1049 import std.traits : EnumMembers; 1050 1051 static foreach (S; AliasSeq!(string, wstring, dstring)) 1052 { 1053 static foreach (T; AliasSeq!(string, wstring, dstring)) 1054 {{ 1055 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1); 1056 assert(indexOf(to!S("def"), to!T("a"), 0) == -1); 1057 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3); 1058 assert(indexOf(to!S("def"), to!T("f"), 1) == 2); 1059 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3); 1060 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6); 1061 1062 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1); 1063 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1); 1064 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3); 1065 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2); 1066 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3); 1067 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6); 1068 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9, 1069 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive)) 1070 ~ " " ~ S.stringof ~ " " ~ T.stringof); 1071 1072 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1073 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1074 1075 assert(indexOf(sMars, to!T("MY fAVe"), 10, 1076 No.caseSensitive) == -1); 1077 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7); 1078 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0); 1079 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17); 1080 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41); 1081 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0); 1082 1083 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0); 1084 1085 // Thanks to Carlos Santander B. and zwang 1086 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 1087 to!T("page-break-before"), 10, No.caseSensitive) == -1); 1088 1089 // In order for indexOf with and without index to be consistent 1090 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0)); 1091 }} 1092 1093 foreach (cs; EnumMembers!CaseSensitive) 1094 { 1095 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), 1096 3, cs) == 9); 1097 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), 1098 3, cs) == 7); 1099 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), 1100 3, cs) == 6); 1101 } 1102 } 1103 } 1104 1105 /++ 1106 Params: 1107 s = string to search 1108 c = character to search for 1109 startIdx = the index into s to start searching from 1110 cs = `Yes.caseSensitive` or `No.caseSensitive` 1111 1112 Returns: 1113 The index of the last occurrence of `c` in `s`. If `c` is not 1114 found, then `-1` is returned. The `startIdx` slices `s` in 1115 the following way $(D s[0 .. startIdx]). `startIdx` represents a 1116 codeunit index in `s`. 1117 1118 Throws: 1119 If the sequence ending at `startIdx` does not represent a well 1120 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 1121 1122 `cs` indicates whether the comparisons are case sensitive. 1123 +/ 1124 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, 1125 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1126 if (isSomeChar!Char) 1127 { 1128 static import std.ascii, std.uni; 1129 import std.utf : canSearchInCodeUnits; 1130 if (cs == Yes.caseSensitive) 1131 { 1132 if (canSearchInCodeUnits!Char(c)) 1133 { 1134 foreach_reverse (i, it; s) 1135 { 1136 if (it == c) 1137 { 1138 return i; 1139 } 1140 } 1141 } 1142 else 1143 { 1144 foreach_reverse (i, dchar it; s) 1145 { 1146 if (it == c) 1147 { 1148 return i; 1149 } 1150 } 1151 } 1152 } 1153 else 1154 { 1155 if (std.ascii.isASCII(c)) 1156 { 1157 immutable c1 = std.ascii.toLower(c); 1158 1159 foreach_reverse (i, it; s) 1160 { 1161 immutable c2 = std.ascii.toLower(it); 1162 if (c1 == c2) 1163 { 1164 return i; 1165 } 1166 } 1167 } 1168 else 1169 { 1170 immutable c1 = std.uni.toLower(c); 1171 1172 foreach_reverse (i, dchar it; s) 1173 { 1174 immutable c2 = std.uni.toLower(it); 1175 if (c1 == c2) 1176 { 1177 return i; 1178 } 1179 } 1180 } 1181 } 1182 1183 return -1; 1184 } 1185 1186 /// Ditto 1187 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx, 1188 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1189 if (isSomeChar!Char) 1190 { 1191 if (startIdx <= s.length) 1192 { 1193 return lastIndexOf(s[0u .. startIdx], c, cs); 1194 } 1195 1196 return -1; 1197 } 1198 1199 /// 1200 @safe pure unittest 1201 { 1202 import std.typecons : No; 1203 1204 string s = "Hello World"; 1205 assert(lastIndexOf(s, 'l') == 9); 1206 assert(lastIndexOf(s, 'Z') == -1); 1207 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9); 1208 } 1209 1210 /// 1211 @safe pure unittest 1212 { 1213 import std.typecons : No; 1214 1215 string s = "Hello World"; 1216 assert(lastIndexOf(s, 'l', 4) == 3); 1217 assert(lastIndexOf(s, 'Z', 1337) == -1); 1218 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3); 1219 } 1220 1221 @safe pure unittest 1222 { 1223 import std.conv : to; 1224 import std.exception : assertCTFEable; 1225 import std.traits : EnumMembers; 1226 1227 assertCTFEable!( 1228 { 1229 static foreach (S; AliasSeq!(string, wstring, dstring)) 1230 {{ 1231 assert(lastIndexOf(cast(S) null, 'a') == -1); 1232 assert(lastIndexOf(to!S("def"), 'a') == -1); 1233 assert(lastIndexOf(to!S("abba"), 'a') == 3); 1234 assert(lastIndexOf(to!S("def"), 'f') == 2); 1235 assert(lastIndexOf(to!S("ödef"), 'ö') == 0); 1236 1237 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1238 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1239 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3); 1240 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2); 1241 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0); 1242 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"), 1243 No.caseSensitive) == 1); 1244 1245 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1246 1247 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2); 1248 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34); 1249 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40); 1250 }} 1251 1252 foreach (cs; EnumMembers!CaseSensitive) 1253 { 1254 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1255 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1256 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1257 } 1258 }); 1259 } 1260 1261 @safe pure unittest 1262 { 1263 import std.conv : to; 1264 import std.traits : EnumMembers; 1265 1266 static foreach (S; AliasSeq!(string, wstring, dstring)) 1267 {{ 1268 assert(lastIndexOf(cast(S) null, 'a') == -1); 1269 assert(lastIndexOf(to!S("def"), 'a') == -1); 1270 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0); 1271 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2); 1272 1273 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1274 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1275 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3, 1276 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive))); 1277 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2); 1278 1279 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1280 1281 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1); 1282 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34); 1283 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40); 1284 }} 1285 1286 foreach (cs; EnumMembers!CaseSensitive) 1287 { 1288 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1289 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1290 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1291 } 1292 } 1293 1294 /++ 1295 Params: 1296 s = string to search 1297 sub = substring to search for 1298 startIdx = the index into s to start searching from 1299 cs = `Yes.caseSensitive` or `No.caseSensitive` 1300 1301 Returns: 1302 the index of the last occurrence of `sub` in `s`. If `sub` is 1303 not found, then `-1` is returned. The `startIdx` slices `s` 1304 in the following way $(D s[0 .. startIdx]). `startIdx` represents a 1305 codeunit index in `s`. 1306 1307 Throws: 1308 If the sequence ending at `startIdx` does not represent a well 1309 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 1310 1311 `cs` indicates whether the comparisons are case sensitive. 1312 +/ 1313 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1314 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1315 if (isSomeChar!Char1 && isSomeChar!Char2) 1316 { 1317 import std.algorithm.searching : endsWith; 1318 import std.conv : to; 1319 import std.range.primitives : walkLength; 1320 static import std.uni; 1321 import std.utf : strideBack; 1322 if (sub.empty) 1323 return -1; 1324 1325 if (walkLength(sub) == 1) 1326 return lastIndexOf(s, sub.front, cs); 1327 1328 if (cs == Yes.caseSensitive) 1329 { 1330 static if (is(immutable Char1 == immutable Char2)) 1331 { 1332 import core.stdc..string : memcmp; 1333 1334 immutable c = sub[0]; 1335 1336 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i) 1337 { 1338 if (s[i] == c) 1339 { 1340 if (__ctfe) 1341 { 1342 if (s[i + 1 .. i + sub.length] == sub[1 .. $]) 1343 return i; 1344 } 1345 else 1346 { 1347 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted 1348 { 1349 return memcmp(s1, s2, n); 1350 } 1351 if (trustedMemcmp(&s[i + 1], &sub[1], 1352 (sub.length - 1) * Char1.sizeof) == 0) 1353 return i; 1354 } 1355 } 1356 } 1357 } 1358 else 1359 { 1360 for (size_t i = s.length; !s.empty;) 1361 { 1362 if (s.endsWith(sub)) 1363 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1364 1365 i -= strideBack(s, i); 1366 s = s[0 .. i]; 1367 } 1368 } 1369 } 1370 else 1371 { 1372 for (size_t i = s.length; !s.empty;) 1373 { 1374 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b)) 1375 (s, sub)) 1376 { 1377 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1378 } 1379 1380 i -= strideBack(s, i); 1381 s = s[0 .. i]; 1382 } 1383 } 1384 1385 return -1; 1386 } 1387 1388 /// Ditto 1389 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1390 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1391 if (isSomeChar!Char1 && isSomeChar!Char2) 1392 { 1393 if (startIdx <= s.length) 1394 { 1395 return lastIndexOf(s[0u .. startIdx], sub, cs); 1396 } 1397 1398 return -1; 1399 } 1400 1401 /// 1402 @safe pure unittest 1403 { 1404 import std.typecons : No; 1405 1406 string s = "Hello World"; 1407 assert(lastIndexOf(s, "ll") == 2); 1408 assert(lastIndexOf(s, "Zo") == -1); 1409 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2); 1410 } 1411 1412 /// 1413 @safe pure unittest 1414 { 1415 import std.typecons : No; 1416 1417 string s = "Hello World"; 1418 assert(lastIndexOf(s, "ll", 4) == 2); 1419 assert(lastIndexOf(s, "Zo", 128) == -1); 1420 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1); 1421 } 1422 1423 @safe pure unittest 1424 { 1425 import std.conv : to; 1426 1427 static foreach (S; AliasSeq!(string, wstring, dstring)) 1428 {{ 1429 auto r = to!S("").lastIndexOf("hello"); 1430 assert(r == -1, to!string(r)); 1431 1432 r = to!S("hello").lastIndexOf(""); 1433 assert(r == -1, to!string(r)); 1434 1435 r = to!S("").lastIndexOf(""); 1436 assert(r == -1, to!string(r)); 1437 }} 1438 } 1439 1440 @safe pure unittest 1441 { 1442 import std.conv : to; 1443 import std.exception : assertCTFEable; 1444 import std.traits : EnumMembers; 1445 1446 assertCTFEable!( 1447 { 1448 static foreach (S; AliasSeq!(string, wstring, dstring)) 1449 { 1450 static foreach (T; AliasSeq!(string, wstring, dstring)) 1451 {{ 1452 enum typeStr = S.stringof ~ " " ~ T.stringof; 1453 1454 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1455 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr); 1456 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr); 1457 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr); 1458 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr); 1459 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr); 1460 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr); 1461 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr); 1462 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr); 1463 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr); 1464 1465 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr); 1466 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1467 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr); 1468 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr); 1469 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr); 1470 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr); 1471 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr); 1472 1473 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1474 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr); 1475 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr); 1476 1477 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0); 1478 1479 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1480 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1481 1482 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr); 1483 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr); 1484 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr); 1485 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr); 1486 }} 1487 1488 foreach (cs; EnumMembers!CaseSensitive) 1489 { 1490 enum csString = to!string(cs); 1491 1492 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString); 1493 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString); 1494 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString); 1495 } 1496 } 1497 }); 1498 } 1499 1500 // https://issues.dlang.org/show_bug.cgi?id=13529 1501 @safe pure unittest 1502 { 1503 import std.conv : to; 1504 static foreach (S; AliasSeq!(string, wstring, dstring)) 1505 { 1506 static foreach (T; AliasSeq!(string, wstring, dstring)) 1507 {{ 1508 enum typeStr = S.stringof ~ " " ~ T.stringof; 1509 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö")); 1510 assert(idx != -1, to!string(idx) ~ " " ~ typeStr); 1511 1512 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd")); 1513 assert(idx == -1, to!string(idx) ~ " " ~ typeStr); 1514 }} 1515 } 1516 } 1517 1518 @safe pure unittest 1519 { 1520 import std.conv : to; 1521 import std.traits : EnumMembers; 1522 1523 static foreach (S; AliasSeq!(string, wstring, dstring)) 1524 { 1525 static foreach (T; AliasSeq!(string, wstring, dstring)) 1526 {{ 1527 enum typeStr = S.stringof ~ " " ~ T.stringof; 1528 1529 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1530 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr); 1531 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr); 1532 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~ 1533 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6))); 1534 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr); 1535 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr); 1536 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr); 1537 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr); 1538 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr); 1539 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~ 1540 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472 1541 1542 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr); 1543 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr); 1544 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~ 1545 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive))); 1546 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr); 1547 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr); 1548 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr); 1549 1550 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr); 1551 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr); 1552 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr); 1553 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr); 1554 }} 1555 1556 foreach (cs; EnumMembers!CaseSensitive) 1557 { 1558 enum csString = to!string(cs); 1559 1560 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString); 1561 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString); 1562 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString); 1563 } 1564 } 1565 } 1566 1567 // https://issues.dlang.org/show_bug.cgi?id=20783 1568 @safe pure @nogc unittest 1569 { 1570 enum lastIndex = "aa".lastIndexOf("ab"); 1571 assert(lastIndex == -1); 1572 } 1573 1574 @safe pure @nogc unittest 1575 { 1576 enum lastIndex = "hello hello hell h".lastIndexOf("hello"); 1577 assert(lastIndex == 6); 1578 } 1579 1580 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)( 1581 const(Char)[] haystack, const(Char2)[] needles, 1582 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1583 if (isSomeChar!Char && isSomeChar!Char2) 1584 { 1585 import std.algorithm.searching : canFind, findAmong; 1586 if (cs == Yes.caseSensitive) 1587 { 1588 static if (forward) 1589 { 1590 static if (any) 1591 { 1592 size_t n = haystack.findAmong(needles).length; 1593 return n ? haystack.length - n : -1; 1594 } 1595 else 1596 { 1597 foreach (idx, dchar hay; haystack) 1598 { 1599 if (!canFind(needles, hay)) 1600 { 1601 return idx; 1602 } 1603 } 1604 } 1605 } 1606 else 1607 { 1608 static if (any) 1609 { 1610 import std.range : retro; 1611 import std.utf : strideBack; 1612 size_t n = haystack.retro.findAmong(needles).source.length; 1613 if (n) 1614 { 1615 return n - haystack.strideBack(n); 1616 } 1617 } 1618 else 1619 { 1620 foreach_reverse (idx, dchar hay; haystack) 1621 { 1622 if (!canFind(needles, hay)) 1623 { 1624 return idx; 1625 } 1626 } 1627 } 1628 } 1629 } 1630 else 1631 { 1632 import std.range.primitives : walkLength; 1633 if (needles.length <= 16 && needles.walkLength(17)) 1634 { 1635 size_t si = 0; 1636 dchar[16] scratch = void; 1637 foreach ( dchar c; needles) 1638 { 1639 scratch[si++] = toLower(c); 1640 } 1641 1642 static if (forward) 1643 { 1644 foreach (i, dchar c; haystack) 1645 { 1646 if (canFind(scratch[0 .. si], toLower(c)) == any) 1647 { 1648 return i; 1649 } 1650 } 1651 } 1652 else 1653 { 1654 foreach_reverse (i, dchar c; haystack) 1655 { 1656 if (canFind(scratch[0 .. si], toLower(c)) == any) 1657 { 1658 return i; 1659 } 1660 } 1661 } 1662 } 1663 else 1664 { 1665 static bool f(dchar a, dchar b) 1666 { 1667 return toLower(a) == b; 1668 } 1669 1670 static if (forward) 1671 { 1672 foreach (i, dchar c; haystack) 1673 { 1674 if (canFind!f(needles, toLower(c)) == any) 1675 { 1676 return i; 1677 } 1678 } 1679 } 1680 else 1681 { 1682 foreach_reverse (i, dchar c; haystack) 1683 { 1684 if (canFind!f(needles, toLower(c)) == any) 1685 { 1686 return i; 1687 } 1688 } 1689 } 1690 } 1691 } 1692 1693 return -1; 1694 } 1695 1696 /** 1697 Returns the index of the first occurrence of any of the elements in $(D 1698 needles) in `haystack`. If no element of `needles` is found, 1699 then `-1` is returned. The `startIdx` slices `haystack` in the 1700 following way $(D haystack[startIdx .. $]). `startIdx` represents a 1701 codeunit index in `haystack`. If the sequence ending at `startIdx` 1702 does not represent a well formed codepoint, then a $(REF UTFException, std,utf) 1703 may be thrown. 1704 1705 Params: 1706 haystack = String to search for needles in. 1707 needles = Strings to search for in haystack. 1708 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If 1709 the startIdx is greater equal the length of haystack the functions 1710 returns `-1`. 1711 cs = Indicates whether the comparisons are case sensitive. 1712 */ 1713 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1714 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1715 if (isSomeChar!Char && isSomeChar!Char2) 1716 { 1717 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs); 1718 } 1719 1720 /// Ditto 1721 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1722 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1723 if (isSomeChar!Char && isSomeChar!Char2) 1724 { 1725 if (startIdx < haystack.length) 1726 { 1727 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs); 1728 if (foundIdx != -1) 1729 { 1730 return foundIdx + cast(ptrdiff_t) startIdx; 1731 } 1732 } 1733 1734 return -1; 1735 } 1736 1737 /// 1738 @safe pure unittest 1739 { 1740 import std.conv : to; 1741 1742 ptrdiff_t i = "helloWorld".indexOfAny("Wr"); 1743 assert(i == 5); 1744 i = "öällo world".indexOfAny("lo "); 1745 assert(i == 4, to!string(i)); 1746 } 1747 1748 /// 1749 @safe pure unittest 1750 { 1751 import std.conv : to; 1752 1753 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4); 1754 assert(i == 5); 1755 1756 i = "Foo öällo world".indexOfAny("lh", 3); 1757 assert(i == 8, to!string(i)); 1758 } 1759 1760 @safe pure unittest 1761 { 1762 import std.conv : to; 1763 1764 static foreach (S; AliasSeq!(string, wstring, dstring)) 1765 {{ 1766 auto r = to!S("").indexOfAny("hello"); 1767 assert(r == -1, to!string(r)); 1768 1769 r = to!S("hello").indexOfAny(""); 1770 assert(r == -1, to!string(r)); 1771 1772 r = to!S("").indexOfAny(""); 1773 assert(r == -1, to!string(r)); 1774 }} 1775 } 1776 1777 @safe pure unittest 1778 { 1779 import std.conv : to; 1780 import std.exception : assertCTFEable; 1781 1782 assertCTFEable!( 1783 { 1784 static foreach (S; AliasSeq!(string, wstring, dstring)) 1785 { 1786 static foreach (T; AliasSeq!(string, wstring, dstring)) 1787 { 1788 assert(indexOfAny(cast(S) null, to!T("a")) == -1); 1789 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1); 1790 assert(indexOfAny(to!S("abba"), to!T("a")) == 0); 1791 assert(indexOfAny(to!S("def"), to!T("f")) == 2); 1792 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1); 1793 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1); 1794 1795 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 1796 No.caseSensitive) == -1); 1797 assert(indexOfAny(to!S("def"), to!T("MI6"), 1798 No.caseSensitive) == -1); 1799 assert(indexOfAny(to!S("abba"), to!T("DEA"), 1800 No.caseSensitive) == 0); 1801 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2); 1802 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive) 1803 == -1); 1804 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"), 1805 No.caseSensitive) == 0); 1806 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), 1807 No.caseSensitive) == 0); 1808 1809 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 1810 } 1811 } 1812 } 1813 ); 1814 } 1815 1816 @safe pure unittest 1817 { 1818 import std.conv : to; 1819 import std.traits : EnumMembers; 1820 1821 static foreach (S; AliasSeq!(string, wstring, dstring)) 1822 { 1823 static foreach (T; AliasSeq!(string, wstring, dstring)) 1824 { 1825 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1); 1826 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1); 1827 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3); 1828 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2); 1829 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3); 1830 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6); 1831 1832 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1, 1833 No.caseSensitive) == -1); 1834 assert(indexOfAny(to!S("def"), to!T("DRS"), 2, 1835 No.caseSensitive) == -1); 1836 assert(indexOfAny(to!S("abba"), to!T("SI"), 3, 1837 No.caseSensitive) == -1); 1838 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1, 1839 No.caseSensitive) == 2); 1840 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2, 1841 No.caseSensitive) == 3); 1842 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4, 1843 No.caseSensitive) == 4); 1844 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9, 1845 No.caseSensitive) == 9); 1846 1847 assert(indexOfAny("\u0100", to!T("\u0100"), 0, 1848 No.caseSensitive) == 0); 1849 } 1850 1851 foreach (cs; EnumMembers!CaseSensitive) 1852 { 1853 assert(indexOfAny("hello\U00010143\u0100\U00010143", 1854 to!S("e\u0100"), 3, cs) == 9); 1855 assert(indexOfAny("hello\U00010143\u0100\U00010143"w, 1856 to!S("h\u0100"), 3, cs) == 7); 1857 assert(indexOfAny("hello\U00010143\u0100\U00010143"d, 1858 to!S("l\u0100"), 5, cs) == 6); 1859 } 1860 } 1861 } 1862 1863 /** 1864 Returns the index of the last occurrence of any of the elements in $(D 1865 needles) in `haystack`. If no element of `needles` is found, 1866 then `-1` is returned. The `stopIdx` slices `haystack` in the 1867 following way $(D s[0 .. stopIdx]). `stopIdx` represents a codeunit 1868 index in `haystack`. If the sequence ending at `startIdx` does not 1869 represent a well formed codepoint, then a $(REF UTFException, std,utf) may be 1870 thrown. 1871 1872 Params: 1873 haystack = String to search for needles in. 1874 needles = Strings to search for in haystack. 1875 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]). If 1876 the stopIdx is greater equal the length of haystack the functions 1877 returns `-1`. 1878 cs = Indicates whether the comparisons are case sensitive. 1879 */ 1880 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1881 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 1882 @safe pure 1883 if (isSomeChar!Char && isSomeChar!Char2) 1884 { 1885 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs); 1886 } 1887 1888 /// Ditto 1889 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1890 const(Char2)[] needles, in size_t stopIdx, 1891 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1892 if (isSomeChar!Char && isSomeChar!Char2) 1893 { 1894 if (stopIdx <= haystack.length) 1895 { 1896 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs); 1897 } 1898 1899 return -1; 1900 } 1901 1902 /// 1903 @safe pure unittest 1904 { 1905 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo"); 1906 assert(i == 8); 1907 1908 i = "Foo öäöllo world".lastIndexOfAny("öF"); 1909 assert(i == 8); 1910 } 1911 1912 /// 1913 @safe pure unittest 1914 { 1915 import std.conv : to; 1916 1917 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4); 1918 assert(i == 3); 1919 1920 i = "Foo öäöllo world".lastIndexOfAny("öF", 3); 1921 assert(i == 0); 1922 } 1923 1924 @safe pure unittest 1925 { 1926 import std.conv : to; 1927 1928 static foreach (S; AliasSeq!(string, wstring, dstring)) 1929 {{ 1930 auto r = to!S("").lastIndexOfAny("hello"); 1931 assert(r == -1, to!string(r)); 1932 1933 r = to!S("hello").lastIndexOfAny(""); 1934 assert(r == -1, to!string(r)); 1935 1936 r = to!S("").lastIndexOfAny(""); 1937 assert(r == -1, to!string(r)); 1938 }} 1939 } 1940 1941 @safe pure unittest 1942 { 1943 import std.conv : to; 1944 import std.exception : assertCTFEable; 1945 1946 assertCTFEable!( 1947 { 1948 static foreach (S; AliasSeq!(string, wstring, dstring)) 1949 { 1950 static foreach (T; AliasSeq!(string, wstring, dstring)) 1951 {{ 1952 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1); 1953 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1); 1954 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3); 1955 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2); 1956 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6); 1957 1958 ptrdiff_t oeIdx = 9; 1959 if (is(S == wstring) || is(S == dstring)) 1960 { 1961 oeIdx = 8; 1962 } 1963 1964 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg")); 1965 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 1966 1967 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 1968 No.caseSensitive) == -1); 1969 assert(lastIndexOfAny(to!S("def"), to!T("MI6"), 1970 No.caseSensitive) == -1); 1971 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"), 1972 No.caseSensitive) == 3); 1973 assert(lastIndexOfAny(to!S("def"), to!T("FBI"), 1974 No.caseSensitive) == 2); 1975 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"), 1976 No.caseSensitive) == -1); 1977 1978 oeIdx = 2; 1979 if (is(S == wstring) || is(S == dstring)) 1980 { 1981 oeIdx = 1; 1982 } 1983 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"), 1984 No.caseSensitive) == oeIdx); 1985 1986 assert(lastIndexOfAny("\u0100", to!T("\u0100"), 1987 No.caseSensitive) == 0); 1988 }} 1989 } 1990 } 1991 ); 1992 } 1993 1994 @safe pure unittest 1995 { 1996 import std.conv : to; 1997 import std.exception : assertCTFEable; 1998 1999 assertCTFEable!( 2000 { 2001 static foreach (S; AliasSeq!(string, wstring, dstring)) 2002 { 2003 static foreach (T; AliasSeq!(string, wstring, dstring)) 2004 {{ 2005 enum typeStr = S.stringof ~ " " ~ T.stringof; 2006 2007 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1, 2008 typeStr); 2009 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6, 2010 typeStr); 2011 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3, 2012 typeStr); 2013 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5, 2014 typeStr); 2015 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2, 2016 typeStr); 2017 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1, 2018 typeStr); 2019 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1, 2020 typeStr); 2021 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0, 2022 typeStr); 2023 2024 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337, 2025 No.caseSensitive) == -1, typeStr); 2026 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7, 2027 No.caseSensitive) == 6, typeStr); 2028 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5, 2029 No.caseSensitive) == 3, typeStr); 2030 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6, 2031 No.caseSensitive) == 5, typeStr); 2032 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8, 2033 No.caseSensitive) == 6, typeStr); 2034 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7, 2035 No.caseSensitive) == -1, typeStr); 2036 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4, 2037 No.caseSensitive) == -1, typeStr); 2038 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2, 2039 No.caseSensitive) == 0, typeStr); 2040 }} 2041 } 2042 } 2043 ); 2044 } 2045 2046 /** 2047 Returns the index of the first occurrence of any character not an elements 2048 in `needles` in `haystack`. If all element of `haystack` are 2049 element of `needles` `-1` is returned. 2050 2051 Params: 2052 haystack = String to search for needles in. 2053 needles = Strings to search for in haystack. 2054 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If 2055 the startIdx is greater equal the length of haystack the functions 2056 returns `-1`. 2057 cs = Indicates whether the comparisons are case sensitive. 2058 */ 2059 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 2060 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 2061 @safe pure 2062 if (isSomeChar!Char && isSomeChar!Char2) 2063 { 2064 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs); 2065 } 2066 2067 /// Ditto 2068 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 2069 const(Char2)[] needles, in size_t startIdx, 2070 in CaseSensitive cs = Yes.caseSensitive) 2071 @safe pure 2072 if (isSomeChar!Char && isSomeChar!Char2) 2073 { 2074 if (startIdx < haystack.length) 2075 { 2076 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)( 2077 haystack[startIdx .. $], needles, cs); 2078 if (foundIdx != -1) 2079 { 2080 return foundIdx + cast(ptrdiff_t) startIdx; 2081 } 2082 } 2083 return -1; 2084 } 2085 2086 /// 2087 @safe pure unittest 2088 { 2089 assert(indexOfNeither("abba", "a", 2) == 2); 2090 assert(indexOfNeither("def", "de", 1) == 2); 2091 assert(indexOfNeither("dfefffg", "dfe", 4) == 6); 2092 } 2093 2094 /// 2095 @safe pure unittest 2096 { 2097 assert(indexOfNeither("def", "a") == 0); 2098 assert(indexOfNeither("def", "de") == 2); 2099 assert(indexOfNeither("dfefffg", "dfe") == 6); 2100 } 2101 2102 @safe pure unittest 2103 { 2104 import std.conv : to; 2105 2106 static foreach (S; AliasSeq!(string, wstring, dstring)) 2107 {{ 2108 auto r = to!S("").indexOfNeither("hello"); 2109 assert(r == -1, to!string(r)); 2110 2111 r = to!S("hello").indexOfNeither(""); 2112 assert(r == 0, to!string(r)); 2113 2114 r = to!S("").indexOfNeither(""); 2115 assert(r == -1, to!string(r)); 2116 }} 2117 } 2118 2119 @safe pure unittest 2120 { 2121 import std.conv : to; 2122 import std.exception : assertCTFEable; 2123 2124 assertCTFEable!( 2125 { 2126 static foreach (S; AliasSeq!(string, wstring, dstring)) 2127 { 2128 static foreach (T; AliasSeq!(string, wstring, dstring)) 2129 { 2130 assert(indexOfNeither(cast(S) null, to!T("a")) == -1); 2131 assert(indexOfNeither("abba", "a") == 1); 2132 2133 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 2134 No.caseSensitive) == 0); 2135 assert(indexOfNeither(to!S("def"), to!T("D"), 2136 No.caseSensitive) == 1); 2137 assert(indexOfNeither(to!S("ABca"), to!T("a"), 2138 No.caseSensitive) == 1); 2139 assert(indexOfNeither(to!S("def"), to!T("f"), 2140 No.caseSensitive) == 0); 2141 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 2142 No.caseSensitive) == 6); 2143 if (is(S == string)) 2144 { 2145 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2146 No.caseSensitive) == 8, 2147 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2148 No.caseSensitive))); 2149 } 2150 else 2151 { 2152 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2153 No.caseSensitive) == 7, 2154 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"), 2155 No.caseSensitive))); 2156 } 2157 } 2158 } 2159 } 2160 ); 2161 } 2162 2163 @safe pure unittest 2164 { 2165 import std.conv : to; 2166 import std.exception : assertCTFEable; 2167 2168 assertCTFEable!( 2169 { 2170 static foreach (S; AliasSeq!(string, wstring, dstring)) 2171 { 2172 static foreach (T; AliasSeq!(string, wstring, dstring)) 2173 { 2174 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1); 2175 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1, 2176 to!string(indexOfNeither(to!S("def"), to!T("a"), 1))); 2177 2178 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4, 2179 No.caseSensitive) == 4); 2180 assert(indexOfNeither(to!S("def"), to!T("D"), 2, 2181 No.caseSensitive) == 2); 2182 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3, 2183 No.caseSensitive) == -1); 2184 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2, 2185 No.caseSensitive) == -1); 2186 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5, 2187 No.caseSensitive) == 6); 2188 if (is(S == string)) 2189 { 2190 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2, 2191 No.caseSensitive) == 3, to!string(indexOfNeither( 2192 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive))); 2193 } 2194 else 2195 { 2196 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2, 2197 No.caseSensitive) == 2, to!string(indexOfNeither( 2198 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive))); 2199 } 2200 } 2201 } 2202 } 2203 ); 2204 } 2205 2206 /** 2207 Returns the last index of the first occurence of any character that is not 2208 an elements in `needles` in `haystack`. If all element of 2209 `haystack` are element of `needles` `-1` is returned. 2210 2211 Params: 2212 haystack = String to search for needles in. 2213 needles = Strings to search for in haystack. 2214 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]) If 2215 the stopIdx is greater equal the length of haystack the functions 2216 returns `-1`. 2217 cs = Indicates whether the comparisons are case sensitive. 2218 */ 2219 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2220 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 2221 @safe pure 2222 if (isSomeChar!Char && isSomeChar!Char2) 2223 { 2224 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs); 2225 } 2226 2227 /// Ditto 2228 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2229 const(Char2)[] needles, in size_t stopIdx, 2230 in CaseSensitive cs = Yes.caseSensitive) 2231 @safe pure 2232 if (isSomeChar!Char && isSomeChar!Char2) 2233 { 2234 if (stopIdx < haystack.length) 2235 { 2236 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx], 2237 needles, cs); 2238 } 2239 return -1; 2240 } 2241 2242 /// 2243 @safe pure unittest 2244 { 2245 assert(lastIndexOfNeither("abba", "a") == 2); 2246 assert(lastIndexOfNeither("def", "f") == 1); 2247 } 2248 2249 /// 2250 @safe pure unittest 2251 { 2252 assert(lastIndexOfNeither("def", "rsa", 3) == -1); 2253 assert(lastIndexOfNeither("abba", "a", 2) == 1); 2254 } 2255 2256 @safe pure unittest 2257 { 2258 import std.conv : to; 2259 2260 static foreach (S; AliasSeq!(string, wstring, dstring)) 2261 {{ 2262 auto r = to!S("").lastIndexOfNeither("hello"); 2263 assert(r == -1, to!string(r)); 2264 2265 r = to!S("hello").lastIndexOfNeither(""); 2266 assert(r == 4, to!string(r)); 2267 2268 r = to!S("").lastIndexOfNeither(""); 2269 assert(r == -1, to!string(r)); 2270 }} 2271 } 2272 2273 @safe pure unittest 2274 { 2275 import std.conv : to; 2276 import std.exception : assertCTFEable; 2277 2278 assertCTFEable!( 2279 { 2280 static foreach (S; AliasSeq!(string, wstring, dstring)) 2281 { 2282 static foreach (T; AliasSeq!(string, wstring, dstring)) 2283 {{ 2284 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1); 2285 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2); 2286 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2287 2288 ptrdiff_t oeIdx = 8; 2289 if (is(S == string)) 2290 { 2291 oeIdx = 9; 2292 } 2293 2294 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg")); 2295 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2296 2297 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 2298 No.caseSensitive) == 5); 2299 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2300 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"), 2301 to!T("MI6"), No.caseSensitive))); 2302 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 2303 No.caseSensitive) == 6, to!string(lastIndexOfNeither( 2304 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive))); 2305 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 2306 No.caseSensitive) == 1); 2307 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2308 No.caseSensitive) == 6); 2309 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), 2310 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"), 2311 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive))); 2312 }} 2313 } 2314 } 2315 ); 2316 } 2317 2318 @safe pure unittest 2319 { 2320 import std.conv : to; 2321 import std.exception : assertCTFEable; 2322 2323 assertCTFEable!( 2324 { 2325 static foreach (S; AliasSeq!(string, wstring, dstring)) 2326 { 2327 static foreach (T; AliasSeq!(string, wstring, dstring)) 2328 {{ 2329 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1); 2330 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1); 2331 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2332 2333 ptrdiff_t oeIdx = 4; 2334 if (is(S == string)) 2335 { 2336 oeIdx = 5; 2337 } 2338 2339 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"), 2340 7); 2341 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2342 2343 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6, 2344 No.caseSensitive) == 5); 2345 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2, 2346 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"), 2347 to!T("MI6"), 2, No.caseSensitive))); 2348 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6, 2349 No.caseSensitive) == 5, to!string(lastIndexOfNeither( 2350 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive))); 2351 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3, 2352 No.caseSensitive) == 1); 2353 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2, 2354 No.caseSensitive) == 1, to!string(lastIndexOfNeither( 2355 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive))); 2356 }} 2357 } 2358 } 2359 ); 2360 } 2361 2362 /** 2363 * Returns the _representation of a string, which has the same type 2364 * as the string except the character type is replaced by `ubyte`, 2365 * `ushort`, or `uint` depending on the character width. 2366 * 2367 * Params: 2368 * s = The string to return the _representation of. 2369 * 2370 * Returns: 2371 * The _representation of the passed string. 2372 */ 2373 auto representation(Char)(Char[] s) @safe pure nothrow @nogc 2374 if (isSomeChar!Char) 2375 { 2376 import std.traits : ModifyTypePreservingTQ; 2377 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2]; 2378 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s; 2379 } 2380 2381 /// 2382 @safe pure unittest 2383 { 2384 string s = "hello"; 2385 static assert(is(typeof(representation(s)) == immutable(ubyte)[])); 2386 assert(representation(s) is cast(immutable(ubyte)[]) s); 2387 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]); 2388 } 2389 2390 @system pure unittest 2391 { 2392 import std.exception : assertCTFEable; 2393 import std.traits : Fields; 2394 import std.typecons : Tuple; 2395 2396 assertCTFEable!( 2397 { 2398 void test(Char, T)(Char[] str) 2399 { 2400 static assert(is(typeof(representation(str)) == T[])); 2401 assert(representation(str) is cast(T[]) str); 2402 } 2403 2404 static foreach (Type; AliasSeq!(Tuple!(char , ubyte ), 2405 Tuple!(wchar, ushort), 2406 Tuple!(dchar, uint ))) 2407 {{ 2408 alias Char = Fields!Type[0]; 2409 alias Int = Fields!Type[1]; 2410 enum immutable(Char)[] hello = "hello"; 2411 2412 test!( immutable Char, immutable Int)(hello); 2413 test!( const Char, const Int)(hello); 2414 test!( Char, Int)(hello.dup); 2415 test!( shared Char, shared Int)(cast(shared) hello.dup); 2416 test!(const shared Char, const shared Int)(hello); 2417 }} 2418 }); 2419 } 2420 2421 2422 /** 2423 * Capitalize the first character of `s` and convert the rest of `s` to 2424 * lowercase. 2425 * 2426 * Params: 2427 * input = The string to _capitalize. 2428 * 2429 * Returns: 2430 * The capitalized string. 2431 * 2432 * See_Also: 2433 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory 2434 */ 2435 S capitalize(S)(S input) @trusted pure 2436 if (isSomeString!S) 2437 { 2438 import std.array : array; 2439 import std.uni : asCapitalized; 2440 import std.utf : byUTF; 2441 2442 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array; 2443 } 2444 2445 /// 2446 pure @safe unittest 2447 { 2448 assert(capitalize("hello") == "Hello"); 2449 assert(capitalize("World") == "World"); 2450 } 2451 2452 auto capitalize(S)(auto ref S s) 2453 if (!isSomeString!S && is(StringTypeOf!S)) 2454 { 2455 return capitalize!(StringTypeOf!S)(s); 2456 } 2457 2458 @safe pure unittest 2459 { 2460 assert(testAliasedString!capitalize("hello")); 2461 } 2462 2463 @safe pure unittest 2464 { 2465 import std.algorithm.comparison : cmp; 2466 import std.conv : to; 2467 import std.exception : assertCTFEable; 2468 2469 assertCTFEable!( 2470 { 2471 static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[])) 2472 {{ 2473 S s1 = to!S("FoL"); 2474 S s2; 2475 2476 s2 = capitalize(s1); 2477 assert(cmp(s2, "Fol") == 0); 2478 assert(s2 !is s1); 2479 2480 s2 = capitalize(s1[0 .. 2]); 2481 assert(cmp(s2, "Fo") == 0); 2482 2483 s1 = to!S("fOl"); 2484 s2 = capitalize(s1); 2485 assert(cmp(s2, "Fol") == 0); 2486 assert(s2 !is s1); 2487 s1 = to!S("\u0131 \u0130"); 2488 s2 = capitalize(s1); 2489 assert(cmp(s2, "\u0049 i\u0307") == 0); 2490 assert(s2 !is s1); 2491 2492 s1 = to!S("\u017F \u0049"); 2493 s2 = capitalize(s1); 2494 assert(cmp(s2, "\u0053 \u0069") == 0); 2495 assert(s2 !is s1); 2496 }} 2497 }); 2498 } 2499 2500 /++ 2501 Split `s` into an array of lines according to the unicode standard using 2502 `'\r'`, `'\n'`, `"\r\n"`, $(REF lineSep, std,uni), 2503 $(REF paraSep, std,uni), `U+0085` (NEL), `'\v'` and `'\f'` 2504 as delimiters. If `keepTerm` is set to `KeepTerminator.yes`, then the 2505 delimiter is included in the strings returned. 2506 2507 Does not throw on invalid UTF; such is simply passed unchanged 2508 to the output. 2509 2510 Allocates memory; use $(LREF lineSplitter) for an alternative that 2511 does not. 2512 2513 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2514 2515 Params: 2516 s = a string of `chars`, `wchars`, or `dchars`, or any custom 2517 type that casts to a `string` type 2518 keepTerm = whether delimiter is included or not in the results 2519 Returns: 2520 array of strings, each element is a line that is a slice of `s` 2521 See_Also: 2522 $(LREF lineSplitter) 2523 $(REF splitter, std,algorithm) 2524 $(REF splitter, std,regex) 2525 +/ 2526 alias KeepTerminator = Flag!"keepTerminator"; 2527 2528 /// ditto 2529 C[][] splitLines(C)(C[] s, KeepTerminator keepTerm = No.keepTerminator) @safe pure 2530 if (isSomeChar!C) 2531 { 2532 import std.array : appender; 2533 import std.uni : lineSep, paraSep; 2534 2535 size_t iStart = 0; 2536 auto retval = appender!(C[][])(); 2537 2538 for (size_t i; i < s.length; ++i) 2539 { 2540 switch (s[i]) 2541 { 2542 case '\v', '\f', '\n': 2543 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]); 2544 iStart = i + 1; 2545 break; 2546 2547 case '\r': 2548 if (i + 1 < s.length && s[i + 1] == '\n') 2549 { 2550 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2551 iStart = i + 2; 2552 ++i; 2553 } 2554 else 2555 { 2556 goto case '\n'; 2557 } 2558 break; 2559 2560 static if (s[i].sizeof == 1) 2561 { 2562 /* Manually decode: 2563 * lineSep is E2 80 A8 2564 * paraSep is E2 80 A9 2565 */ 2566 case 0xE2: 2567 if (i + 2 < s.length && 2568 s[i + 1] == 0x80 && 2569 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9) 2570 ) 2571 { 2572 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]); 2573 iStart = i + 3; 2574 i += 2; 2575 } 2576 else 2577 goto default; 2578 break; 2579 /* Manually decode: 2580 * NEL is C2 85 2581 */ 2582 case 0xC2: 2583 if (i + 1 < s.length && s[i + 1] == 0x85) 2584 { 2585 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2586 iStart = i + 2; 2587 i += 1; 2588 } 2589 else 2590 goto default; 2591 break; 2592 } 2593 else 2594 { 2595 case lineSep: 2596 case paraSep: 2597 case '\u0085': 2598 goto case '\n'; 2599 } 2600 2601 default: 2602 break; 2603 } 2604 } 2605 2606 if (iStart != s.length) 2607 retval.put(s[iStart .. $]); 2608 2609 return retval.data; 2610 } 2611 2612 /// 2613 @safe pure nothrow unittest 2614 { 2615 string s = "Hello\nmy\rname\nis"; 2616 assert(splitLines(s) == ["Hello", "my", "name", "is"]); 2617 } 2618 2619 @safe pure nothrow unittest 2620 { 2621 string s = "a\xC2\x86b"; 2622 assert(splitLines(s) == [s]); 2623 } 2624 2625 @safe pure nothrow unittest 2626 { 2627 assert(testAliasedString!splitLines("hello\nworld")); 2628 2629 enum S : string { a = "hello\nworld" } 2630 assert(S.a.splitLines() == ["hello", "world"]); 2631 2632 char[S.a.length] sa = S.a[]; 2633 assert(sa.splitLines() == ["hello", "world"]); 2634 } 2635 2636 @safe pure unittest 2637 { 2638 import std.conv : to; 2639 import std.exception : assertCTFEable; 2640 2641 assertCTFEable!( 2642 { 2643 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2644 {{ 2645 auto s = to!S( 2646 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~ 2647 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2648 ); 2649 auto lines = splitLines(s); 2650 assert(lines.length == 14); 2651 assert(lines[0] == ""); 2652 assert(lines[1] == "peter"); 2653 assert(lines[2] == ""); 2654 assert(lines[3] == "paul"); 2655 assert(lines[4] == "jerry"); 2656 assert(lines[5] == "ice"); 2657 assert(lines[6] == "cream"); 2658 assert(lines[7] == ""); 2659 assert(lines[8] == "sunday"); 2660 assert(lines[9] == "mon\u2030day"); 2661 assert(lines[10] == "schadenfreude"); 2662 assert(lines[11] == "kindergarten"); 2663 assert(lines[12] == ""); 2664 assert(lines[13] == "cookies"); 2665 2666 2667 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 2668 auto ulines = splitLines(cast(char[]) u); 2669 assert(cast(ubyte[])(ulines[0]) == u); 2670 2671 lines = splitLines(s, Yes.keepTerminator); 2672 assert(lines.length == 14); 2673 assert(lines[0] == "\r"); 2674 assert(lines[1] == "peter\n"); 2675 assert(lines[2] == "\r"); 2676 assert(lines[3] == "paul\r\n"); 2677 assert(lines[4] == "jerry\u2028"); 2678 assert(lines[5] == "ice\u2029"); 2679 assert(lines[6] == "cream\n"); 2680 assert(lines[7] == "\n"); 2681 assert(lines[8] == "sunday\n"); 2682 assert(lines[9] == "mon\u2030day\n"); 2683 assert(lines[10] == "schadenfreude\v"); 2684 assert(lines[11] == "kindergarten\f"); 2685 assert(lines[12] == "\v"); 2686 assert(lines[13] == "cookies\u0085"); 2687 2688 s.popBack(); // Lop-off trailing \n 2689 lines = splitLines(s); 2690 assert(lines.length == 14); 2691 assert(lines[9] == "mon\u2030day"); 2692 2693 lines = splitLines(s, Yes.keepTerminator); 2694 assert(lines.length == 14); 2695 assert(lines[13] == "cookies"); 2696 }} 2697 }); 2698 } 2699 2700 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range) 2701 { 2702 import std.conv : unsigned; 2703 import std.uni : lineSep, paraSep; 2704 private: 2705 Range _input; 2706 2707 alias IndexType = typeof(unsigned(_input.length)); 2708 enum IndexType _unComputed = IndexType.max; 2709 IndexType iStart = _unComputed; 2710 IndexType iEnd = 0; 2711 IndexType iNext = 0; 2712 2713 public: 2714 this(Range input) 2715 { 2716 _input = input; 2717 } 2718 2719 static if (isInfinite!Range) 2720 { 2721 enum bool empty = false; 2722 } 2723 else 2724 { 2725 @property bool empty() 2726 { 2727 return iStart == _unComputed && iNext == _input.length; 2728 } 2729 } 2730 2731 @property typeof(_input) front() 2732 { 2733 if (iStart == _unComputed) 2734 { 2735 iStart = iNext; 2736 Loop: 2737 for (IndexType i = iNext; ; ++i) 2738 { 2739 if (i == _input.length) 2740 { 2741 iEnd = i; 2742 iNext = i; 2743 break Loop; 2744 } 2745 switch (_input[i]) 2746 { 2747 case '\v', '\f', '\n': 2748 iEnd = i + (keepTerm == Yes.keepTerminator); 2749 iNext = i + 1; 2750 break Loop; 2751 2752 case '\r': 2753 if (i + 1 < _input.length && _input[i + 1] == '\n') 2754 { 2755 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2756 iNext = i + 2; 2757 break Loop; 2758 } 2759 else 2760 { 2761 goto case '\n'; 2762 } 2763 2764 static if (_input[i].sizeof == 1) 2765 { 2766 /* Manually decode: 2767 * lineSep is E2 80 A8 2768 * paraSep is E2 80 A9 2769 */ 2770 case 0xE2: 2771 if (i + 2 < _input.length && 2772 _input[i + 1] == 0x80 && 2773 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9) 2774 ) 2775 { 2776 iEnd = i + (keepTerm == Yes.keepTerminator) * 3; 2777 iNext = i + 3; 2778 break Loop; 2779 } 2780 else 2781 goto default; 2782 /* Manually decode: 2783 * NEL is C2 85 2784 */ 2785 case 0xC2: 2786 if (i + 1 < _input.length && _input[i + 1] == 0x85) 2787 { 2788 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2789 iNext = i + 2; 2790 break Loop; 2791 } 2792 else 2793 goto default; 2794 } 2795 else 2796 { 2797 case '\u0085': 2798 case lineSep: 2799 case paraSep: 2800 goto case '\n'; 2801 } 2802 2803 default: 2804 break; 2805 } 2806 } 2807 } 2808 return _input[iStart .. iEnd]; 2809 } 2810 2811 void popFront() 2812 { 2813 if (iStart == _unComputed) 2814 { 2815 assert(!empty, "Can not popFront an empty range"); 2816 front; 2817 } 2818 iStart = _unComputed; 2819 } 2820 2821 static if (isForwardRange!Range) 2822 { 2823 @property typeof(this) save() 2824 { 2825 auto ret = this; 2826 ret._input = _input.save; 2827 return ret; 2828 } 2829 } 2830 } 2831 2832 /*********************************** 2833 * Split an array or slicable range of characters into a range of lines 2834 using `'\r'`, `'\n'`, `'\v'`, `'\f'`, `"\r\n"`, 2835 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and `'\u0085'` (NEL) 2836 as delimiters. If `keepTerm` is set to `Yes.keepTerminator`, then the 2837 delimiter is included in the slices returned. 2838 2839 Does not throw on invalid UTF; such is simply passed unchanged 2840 to the output. 2841 2842 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2843 2844 Does not allocate memory. 2845 2846 Params: 2847 r = array of `chars`, `wchars`, or `dchars` or a slicable range 2848 keepTerm = whether delimiter is included or not in the results 2849 Returns: 2850 range of slices of the input range `r` 2851 2852 See_Also: 2853 $(LREF splitLines) 2854 $(REF splitter, std,algorithm) 2855 $(REF splitter, std,regex) 2856 */ 2857 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r) 2858 if (hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range) 2859 { 2860 return LineSplitter!(keepTerm, Range)(r); 2861 } 2862 2863 /// Ditto 2864 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, C)(C[] r) 2865 if (isSomeChar!C) 2866 { 2867 return LineSplitter!(keepTerm, C[])(r); 2868 } 2869 2870 /// 2871 @safe pure unittest 2872 { 2873 import std.array : array; 2874 2875 string s = "Hello\nmy\rname\nis"; 2876 2877 /* notice the call to 'array' to turn the lazy range created by 2878 lineSplitter comparable to the string[] created by splitLines. 2879 */ 2880 assert(lineSplitter(s).array == splitLines(s)); 2881 } 2882 2883 @safe pure unittest 2884 { 2885 import std.array : array; 2886 import std.conv : to; 2887 import std.exception : assertCTFEable; 2888 2889 assertCTFEable!( 2890 { 2891 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2892 {{ 2893 auto s = to!S( 2894 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~ 2895 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2896 ); 2897 2898 auto lines = lineSplitter(s).array; 2899 assert(lines.length == 14); 2900 assert(lines[0] == ""); 2901 assert(lines[1] == "peter"); 2902 assert(lines[2] == ""); 2903 assert(lines[3] == "paul"); 2904 assert(lines[4] == "jerry"); 2905 assert(lines[5] == "ice"); 2906 assert(lines[6] == "cream"); 2907 assert(lines[7] == ""); 2908 assert(lines[8] == "sunday"); 2909 assert(lines[9] == "mon\u2030day"); 2910 assert(lines[10] == "schadenfreude"); 2911 assert(lines[11] == "kindergarten"); 2912 assert(lines[12] == ""); 2913 assert(lines[13] == "cookies"); 2914 2915 2916 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 2917 auto ulines = lineSplitter(cast(char[]) u).array; 2918 assert(cast(ubyte[])(ulines[0]) == u); 2919 2920 lines = lineSplitter!(Yes.keepTerminator)(s).array; 2921 assert(lines.length == 14); 2922 assert(lines[0] == "\r"); 2923 assert(lines[1] == "peter\n"); 2924 assert(lines[2] == "\r"); 2925 assert(lines[3] == "paul\r\n"); 2926 assert(lines[4] == "jerry\u2028"); 2927 assert(lines[5] == "ice\u2029"); 2928 assert(lines[6] == "cream\n"); 2929 assert(lines[7] == "\n"); 2930 assert(lines[8] == "sunday\n"); 2931 assert(lines[9] == "mon\u2030day\n"); 2932 assert(lines[10] == "schadenfreude\v"); 2933 assert(lines[11] == "kindergarten\f"); 2934 assert(lines[12] == "\v"); 2935 assert(lines[13] == "cookies\u0085"); 2936 2937 s.popBack(); // Lop-off trailing \n 2938 lines = lineSplitter(s).array; 2939 assert(lines.length == 14); 2940 assert(lines[9] == "mon\u2030day"); 2941 2942 lines = lineSplitter!(Yes.keepTerminator)(s).array; 2943 assert(lines.length == 14); 2944 assert(lines[13] == "cookies"); 2945 }} 2946 }); 2947 } 2948 2949 /// 2950 @nogc @safe pure unittest 2951 { 2952 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n"; 2953 auto lines = s.lineSplitter(); 2954 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"]; 2955 uint i; 2956 foreach (line; lines) 2957 { 2958 assert(line == witness[i++]); 2959 } 2960 assert(i == witness.length); 2961 } 2962 2963 @nogc @safe pure unittest 2964 { 2965 import std.algorithm.comparison : equal; 2966 import std.range : only; 2967 2968 auto s = "std/string.d"; 2969 auto as = TestAliasedString(s); 2970 assert(equal(s.lineSplitter(), as.lineSplitter())); 2971 2972 enum S : string { a = "hello\nworld" } 2973 assert(equal(S.a.lineSplitter(), only("hello", "world"))); 2974 2975 char[S.a.length] sa = S.a[]; 2976 assert(equal(sa.lineSplitter(), only("hello", "world"))); 2977 } 2978 2979 @safe pure unittest 2980 { 2981 auto s = "line1\nline2"; 2982 auto spl0 = s.lineSplitter!(Yes.keepTerminator); 2983 auto spl1 = spl0.save; 2984 spl0.popFront; 2985 assert(spl1.front ~ spl0.front == s); 2986 string r = "a\xC2\x86b"; 2987 assert(r.lineSplitter.front == r); 2988 } 2989 2990 /++ 2991 Strips leading whitespace (as defined by $(REF isWhite, std,uni)) or 2992 as specified in the second argument. 2993 2994 Params: 2995 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 2996 of characters 2997 chars = string of characters to be stripped 2998 2999 Returns: `input` stripped of leading whitespace or characters 3000 specified in the second argument. 3001 3002 Postconditions: `input` and the returned value 3003 will share the same tail (see $(REF sameTail, std,array)). 3004 3005 See_Also: 3006 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation) 3007 +/ 3008 auto stripLeft(Range)(Range input) 3009 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 3010 !isInfinite!Range && !isConvertibleToString!Range) 3011 { 3012 import std.traits : isDynamicArray; 3013 static import std.ascii; 3014 static import std.uni; 3015 3016 static if (is(immutable ElementEncodingType!Range == immutable dchar) 3017 || is(immutable ElementEncodingType!Range == immutable wchar)) 3018 { 3019 // Decoding is never needed for dchar. It happens not to be needed 3020 // here for wchar because no whitepace is outside the basic 3021 // multilingual plane meaning every whitespace character is encoded 3022 // with a single wchar and due to the design of UTF-16 those wchars 3023 // will not occur as part of the encoding of multi-wchar codepoints. 3024 static if (isDynamicArray!Range) 3025 { 3026 foreach (i; 0 .. input.length) 3027 { 3028 if (!std.uni.isWhite(input[i])) 3029 return input[i .. $]; 3030 } 3031 return input[$ .. $]; 3032 } 3033 else 3034 { 3035 while (!input.empty) 3036 { 3037 if (!std.uni.isWhite(input.front)) 3038 break; 3039 input.popFront(); 3040 } 3041 return input; 3042 } 3043 } 3044 else 3045 { 3046 static if (isDynamicArray!Range) 3047 { 3048 // ASCII optimization for dynamic arrays. 3049 size_t i = 0; 3050 for (const size_t end = input.length; i < end; ++i) 3051 { 3052 auto c = input[i]; 3053 if (c >= 0x80) goto NonAsciiPath; 3054 if (!std.ascii.isWhite(c)) break; 3055 } 3056 input = input[i .. $]; 3057 return input; 3058 3059 NonAsciiPath: 3060 input = input[i .. $]; 3061 // Fall through to standard case. 3062 } 3063 3064 import std.utf : decode, decodeFront, UseReplacementDchar; 3065 3066 static if (isNarrowString!Range) 3067 { 3068 for (size_t index = 0; index < input.length;) 3069 { 3070 const saveIndex = index; 3071 if (!std.uni.isWhite(decode!(UseReplacementDchar.yes)(input, index))) 3072 return input[saveIndex .. $]; 3073 } 3074 return input[$ .. $]; 3075 } 3076 else 3077 { 3078 while (!input.empty) 3079 { 3080 auto c = input.front; 3081 if (std.ascii.isASCII(c)) 3082 { 3083 if (!std.ascii.isWhite(c)) 3084 break; 3085 input.popFront(); 3086 } 3087 else 3088 { 3089 auto save = input.save; 3090 auto dc = decodeFront!(UseReplacementDchar.yes)(input); 3091 if (!std.uni.isWhite(dc)) 3092 return save; 3093 } 3094 } 3095 return input; 3096 } 3097 } 3098 } 3099 3100 /// 3101 nothrow @safe pure unittest 3102 { 3103 import std.uni : lineSep, paraSep; 3104 assert(stripLeft(" hello world ") == 3105 "hello world "); 3106 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") == 3107 "hello world\n\t\v\r"); 3108 assert(stripLeft(" \u2028hello world") == 3109 "hello world"); 3110 assert(stripLeft("hello world") == 3111 "hello world"); 3112 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) == 3113 "hello world" ~ [lineSep]); 3114 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) == 3115 "hello world" ~ [paraSep]); 3116 3117 import std.array : array; 3118 import std.utf : byChar; 3119 assert(stripLeft(" hello world "w.byChar).array == 3120 "hello world "); 3121 assert(stripLeft(" \u2022hello world ".byChar).array == 3122 "\u2022hello world "); 3123 } 3124 3125 auto stripLeft(Range)(auto ref Range str) 3126 if (isConvertibleToString!Range) 3127 { 3128 return stripLeft!(StringTypeOf!Range)(str); 3129 } 3130 3131 @nogc nothrow @safe pure unittest 3132 { 3133 assert(testAliasedString!stripLeft(" hello")); 3134 } 3135 3136 /// Ditto 3137 auto stripLeft(Range, Char)(Range input, const(Char)[] chars) 3138 if (((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3139 isConvertibleToString!Range) && isSomeChar!Char) 3140 { 3141 static if (isConvertibleToString!Range) 3142 return stripLeft!(StringTypeOf!Range)(input, chars); 3143 else 3144 { 3145 for (; !input.empty; input.popFront) 3146 { 3147 if (chars.indexOf(input.front) == -1) 3148 break; 3149 } 3150 return input; 3151 } 3152 } 3153 3154 /// 3155 @safe pure unittest 3156 { 3157 assert(stripLeft(" hello world ", " ") == 3158 "hello world "); 3159 assert(stripLeft("xxxxxhello world ", "x") == 3160 "hello world "); 3161 assert(stripLeft("xxxyy hello world ", "xy ") == 3162 "hello world "); 3163 } 3164 3165 /// 3166 @safe pure unittest 3167 { 3168 import std.array : array; 3169 import std.utf : byChar, byWchar, byDchar; 3170 3171 assert(stripLeft(" xxxyy hello world "w.byChar, "xy ").array == 3172 "hello world "); 3173 3174 assert(stripLeft("\u2028\u2020hello world\u2028"w.byWchar, 3175 "\u2028").array == "\u2020hello world\u2028"); 3176 assert(stripLeft("\U00010001hello world"w.byWchar, " ").array == 3177 "\U00010001hello world"w); 3178 assert(stripLeft("\U00010001 xyhello world"d.byDchar, 3179 "\U00010001 xy").array == "hello world"d); 3180 3181 assert(stripLeft("\u2020hello"w, "\u2020"w) == "hello"w); 3182 assert(stripLeft("\U00010001hello"d, "\U00010001"d) == "hello"d); 3183 assert(stripLeft(" hello ", "") == " hello "); 3184 } 3185 3186 @safe pure unittest 3187 { 3188 assert(testAliasedString!stripLeft(" xyz hello", "xyz ")); 3189 } 3190 3191 /++ 3192 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)) or 3193 as specified in the second argument. 3194 3195 Params: 3196 str = string or random access range of characters 3197 chars = string of characters to be stripped 3198 3199 Returns: 3200 slice of `str` stripped of trailing whitespace or characters 3201 specified in the second argument. 3202 3203 See_Also: 3204 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation) 3205 +/ 3206 auto stripRight(Range)(Range str) 3207 if (isSomeString!Range || 3208 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 3209 !isConvertibleToString!Range && 3210 isSomeChar!(ElementEncodingType!Range)) 3211 { 3212 import std.traits : isDynamicArray; 3213 import std.uni : isWhite; 3214 alias C = Unqual!(ElementEncodingType!(typeof(str))); 3215 3216 static if (isSomeString!(typeof(str)) && C.sizeof >= 2) 3217 { 3218 // No whitespace takes multiple wchars to encode and due to 3219 // the design of UTF-16 those wchars will not occur as part 3220 // of the encoding of multi-wchar codepoints. 3221 foreach_reverse (i, C c; str) 3222 { 3223 if (!isWhite(c)) 3224 return str[0 .. i + 1]; 3225 } 3226 return str[0 .. 0]; 3227 } 3228 else 3229 { 3230 // ASCII optimization for dynamic arrays. 3231 static if (isDynamicArray!(typeof(str))) 3232 { 3233 static import std.ascii; 3234 foreach_reverse (i, C c; str) 3235 { 3236 if (c >= 0x80) 3237 { 3238 str = str[0 .. i + 1]; 3239 goto NonAsciiPath; 3240 } 3241 if (!std.ascii.isWhite(c)) 3242 { 3243 return str[0 .. i + 1]; 3244 } 3245 } 3246 return str[0 .. 0]; 3247 } 3248 3249 NonAsciiPath: 3250 3251 size_t i = str.length; 3252 while (i--) 3253 { 3254 static if (C.sizeof >= 2) 3255 { 3256 // No whitespace takes multiple wchars to encode and due to 3257 // the design of UTF-16 those wchars will not occur as part 3258 // of the encoding of multi-wchar codepoints. 3259 if (isWhite(str[i])) 3260 continue; 3261 break; 3262 } 3263 else static if (C.sizeof == 1) 3264 { 3265 const cx = str[i]; 3266 if (cx <= 0x7F) 3267 { 3268 if (isWhite(cx)) 3269 continue; 3270 break; 3271 } 3272 else 3273 { 3274 if (i == 0 || (0b1100_0000 & cx) != 0b1000_0000) 3275 break; 3276 const uint d = 0b0011_1111 & cx; 3277 const c2 = str[i - 1]; 3278 if ((c2 & 0b1110_0000) == 0b1100_0000) // 2 byte encoding. 3279 { 3280 if (isWhite(d + (uint(c2 & 0b0001_1111) << 6))) 3281 { 3282 i--; 3283 continue; 3284 } 3285 break; 3286 } 3287 if (i == 1 || (c2 & 0b1100_0000) != 0b1000_0000) 3288 break; 3289 const c3 = str[i - 2]; 3290 // In UTF-8 all whitespace is encoded in 3 bytes or fewer. 3291 if ((c3 & 0b1111_0000) == 0b1110_0000 && 3292 isWhite(d + (uint(c2 & 0b0011_1111) << 6) + (uint(c3 & 0b0000_1111) << 12))) 3293 { 3294 i -= 2; 3295 continue; 3296 } 3297 break; 3298 } 3299 } 3300 else 3301 static assert(0); 3302 } 3303 3304 return str[0 .. i + 1]; 3305 } 3306 } 3307 3308 /// 3309 nothrow @safe pure 3310 unittest 3311 { 3312 import std.uni : lineSep, paraSep; 3313 assert(stripRight(" hello world ") == 3314 " hello world"); 3315 assert(stripRight("\n\t\v\rhello world\n\t\v\r") == 3316 "\n\t\v\rhello world"); 3317 assert(stripRight("hello world") == 3318 "hello world"); 3319 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) == 3320 [lineSep] ~ "hello world"); 3321 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) == 3322 [paraSep] ~ "hello world"); 3323 } 3324 3325 auto stripRight(Range)(auto ref Range str) 3326 if (isConvertibleToString!Range) 3327 { 3328 return stripRight!(StringTypeOf!Range)(str); 3329 } 3330 3331 @nogc nothrow @safe pure unittest 3332 { 3333 assert(testAliasedString!stripRight("hello ")); 3334 } 3335 3336 @safe pure unittest 3337 { 3338 import std.array : array; 3339 import std.uni : lineSep, paraSep; 3340 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings; 3341 assert(stripRight(" hello world ".byChar).array == " hello world"); 3342 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w); 3343 assert(stripRight("hello world"d.byDchar).array == "hello world"d); 3344 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020"); 3345 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w); 3346 3347 static foreach (C; AliasSeq!(char, wchar, dchar)) 3348 { 3349 foreach (s; invalidUTFstrings!C()) 3350 { 3351 cast(void) stripRight(s.byUTF!C).array; 3352 } 3353 } 3354 3355 cast(void) stripRight("a\x80".byUTF!char).array; 3356 wstring ws = ['a', cast(wchar) 0xDC00]; 3357 cast(void) stripRight(ws.byUTF!wchar).array; 3358 } 3359 3360 /// Ditto 3361 auto stripRight(Range, Char)(Range str, const(Char)[] chars) 3362 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3363 isConvertibleToString!Range) && isSomeChar!Char) 3364 { 3365 static if (isConvertibleToString!Range) 3366 return stripRight!(StringTypeOf!Range)(str, chars); 3367 else 3368 { 3369 for (; !str.empty; str.popBack) 3370 { 3371 if (chars.indexOf(str.back) == -1) 3372 break; 3373 } 3374 return str; 3375 } 3376 } 3377 3378 /// 3379 @safe pure 3380 unittest 3381 { 3382 assert(stripRight(" hello world ", "x") == 3383 " hello world "); 3384 assert(stripRight(" hello world ", " ") == 3385 " hello world"); 3386 assert(stripRight(" hello worldxy ", "xy ") == 3387 " hello world"); 3388 } 3389 3390 @safe pure unittest 3391 { 3392 assert(testAliasedString!stripRight("hello xyz ", "xyz ")); 3393 } 3394 3395 @safe pure unittest 3396 { 3397 import std.array : array; 3398 import std.utf : byChar, byDchar, byUTF, byWchar; 3399 3400 assert(stripRight(" hello world xyz ".byChar, 3401 "xyz ").array == " hello world"); 3402 assert(stripRight("\u2028hello world\u2020\u2028"w.byWchar, 3403 "\u2028").array == "\u2028hello world\u2020"); 3404 assert(stripRight("hello world\U00010001"w.byWchar, 3405 " ").array == "hello world\U00010001"w); 3406 assert(stripRight("hello world\U00010001 xy"d.byDchar, 3407 "\U00010001 xy").array == "hello world"d); 3408 assert(stripRight("hello\u2020"w, "\u2020"w) == "hello"w); 3409 assert(stripRight("hello\U00010001"d, "\U00010001"d) == "hello"d); 3410 assert(stripRight(" hello ", "") == " hello "); 3411 } 3412 3413 3414 /++ 3415 Strips both leading and trailing whitespace (as defined by 3416 $(REF isWhite, std,uni)) or as specified in the second argument. 3417 3418 Params: 3419 str = string or random access range of characters 3420 chars = string of characters to be stripped 3421 leftChars = string of leading characters to be stripped 3422 rightChars = string of trailing characters to be stripped 3423 3424 Returns: 3425 slice of `str` stripped of leading and trailing whitespace 3426 or characters as specified in the second argument. 3427 3428 See_Also: 3429 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation) 3430 +/ 3431 auto strip(Range)(Range str) 3432 if (isSomeString!Range || 3433 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 3434 !isConvertibleToString!Range && 3435 isSomeChar!(ElementEncodingType!Range)) 3436 { 3437 return stripRight(stripLeft(str)); 3438 } 3439 3440 /// 3441 @safe pure unittest 3442 { 3443 import std.uni : lineSep, paraSep; 3444 assert(strip(" hello world ") == 3445 "hello world"); 3446 assert(strip("\n\t\v\rhello world\n\t\v\r") == 3447 "hello world"); 3448 assert(strip("hello world") == 3449 "hello world"); 3450 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) == 3451 "hello world"); 3452 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) == 3453 "hello world"); 3454 } 3455 3456 auto strip(Range)(auto ref Range str) 3457 if (isConvertibleToString!Range) 3458 { 3459 return strip!(StringTypeOf!Range)(str); 3460 } 3461 3462 @safe pure unittest 3463 { 3464 assert(testAliasedString!strip(" hello world ")); 3465 } 3466 3467 @safe pure unittest 3468 { 3469 import std.algorithm.comparison : equal; 3470 import std.conv : to; 3471 import std.exception : assertCTFEable; 3472 3473 assertCTFEable!( 3474 { 3475 static foreach (S; AliasSeq!( char[], const char[], string, 3476 wchar[], const wchar[], wstring, 3477 dchar[], const dchar[], dstring)) 3478 { 3479 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t ")); 3480 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007")); 3481 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r")); 3482 assert(equal(stripLeft(to!S("1")), "1")); 3483 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE")); 3484 assert(equal(stripLeft(to!S("")), "")); 3485 3486 assert(equal(stripRight(to!S(" foo\t ")), " foo")); 3487 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo")); 3488 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB")); 3489 assert(equal(stripRight(to!S("1")), "1")); 3490 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE")); 3491 assert(equal(stripRight(to!S("")), "")); 3492 3493 assert(equal(strip(to!S(" foo\t ")), "foo")); 3494 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo")); 3495 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB")); 3496 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE")); 3497 assert(equal(strip(to!S("")), "")); 3498 } 3499 }); 3500 } 3501 3502 @safe pure unittest 3503 { 3504 import std.array : sameHead, sameTail; 3505 import std.exception : assertCTFEable; 3506 assertCTFEable!( 3507 { 3508 wstring s = " "; 3509 assert(s.sameTail(s.stripLeft())); 3510 assert(s.sameHead(s.stripRight())); 3511 }); 3512 } 3513 3514 /// Ditto 3515 auto strip(Range, Char)(Range str, const(Char)[] chars) 3516 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3517 isConvertibleToString!Range) && isSomeChar!Char) 3518 { 3519 static if (isConvertibleToString!Range) 3520 return strip!(StringTypeOf!Range)(str, chars); 3521 else 3522 return stripRight(stripLeft(str, chars), chars); 3523 } 3524 3525 /// 3526 @safe pure unittest 3527 { 3528 assert(strip(" hello world ", "x") == 3529 " hello world "); 3530 assert(strip(" hello world ", " ") == 3531 "hello world"); 3532 assert(strip(" xyxyhello worldxyxy ", "xy ") == 3533 "hello world"); 3534 assert(strip("\u2020hello\u2020"w, "\u2020"w) == "hello"w); 3535 assert(strip("\U00010001hello\U00010001"d, "\U00010001"d) == "hello"d); 3536 assert(strip(" hello ", "") == " hello "); 3537 } 3538 3539 @safe pure unittest 3540 { 3541 assert(testAliasedString!strip(" xyz hello world xyz ", "xyz ")); 3542 } 3543 3544 /// Ditto 3545 auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars) 3546 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) || 3547 isConvertibleToString!Range) && isSomeChar!Char) 3548 { 3549 static if (isConvertibleToString!Range) 3550 return strip!(StringTypeOf!Range)(str, leftChars, rightChars); 3551 else 3552 return stripRight(stripLeft(str, leftChars), rightChars); 3553 } 3554 3555 /// 3556 @safe pure unittest 3557 { 3558 assert(strip("xxhelloyy", "x", "y") == "hello"); 3559 assert(strip(" xyxyhello worldxyxyzz ", "xy ", "xyz ") == 3560 "hello world"); 3561 assert(strip("\u2020hello\u2028"w, "\u2020"w, "\u2028"w) == "hello"w); 3562 assert(strip("\U00010001hello\U00010002"d, "\U00010001"d, "\U00010002"d) == 3563 "hello"d); 3564 assert(strip(" hello ", "", "") == " hello "); 3565 } 3566 3567 @safe pure unittest 3568 { 3569 assert(testAliasedString!strip(" xy hello world pq ", "xy ", "pq ")); 3570 } 3571 3572 @safe pure unittest 3573 { 3574 import std.algorithm.comparison : equal; 3575 import std.conv : to; 3576 import std.exception : assertCTFEable; 3577 3578 assertCTFEable!( 3579 { 3580 static foreach (S; AliasSeq!( char[], const char[], string, 3581 wchar[], const wchar[], wstring, 3582 dchar[], const dchar[], dstring)) 3583 { 3584 assert(equal(stripLeft(to!S(" \tfoo\t "), "\t "), "foo\t ")); 3585 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007"), "\u2008 "), 3586 "foo\t \u2007")); 3587 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085 "), 3588 "μ \u0085 \u00BB \r")); 3589 assert(equal(stripLeft(to!S("1"), " "), "1")); 3590 assert(equal(stripLeft(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3591 assert(equal(stripLeft(to!S(""), " "), "")); 3592 3593 assert(equal(stripRight(to!S(" foo\t "), "\t "), " foo")); 3594 assert(equal(stripRight(to!S("\u2008 foo\t \u2007"), "\u2007\t "), 3595 "\u2008 foo")); 3596 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r"), "\r "), 3597 "\u0085 μ \u0085 \u00BB")); 3598 assert(equal(stripRight(to!S("1"), " "), "1")); 3599 assert(equal(stripRight(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3600 assert(equal(stripRight(to!S(""), " "), "")); 3601 3602 assert(equal(strip(to!S(" foo\t "), "\t "), "foo")); 3603 assert(equal(strip(to!S("\u2008 foo\t \u2007"), "\u2008\u2007\t "), 3604 "foo")); 3605 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085\r "), 3606 "μ \u0085 \u00BB")); 3607 assert(equal(strip(to!S("\U0010FFFE"), " "), "\U0010FFFE")); 3608 assert(equal(strip(to!S(""), " "), "")); 3609 3610 assert(equal(strip(to!S(" \nfoo\t "), "\n ", "\t "), "foo")); 3611 assert(equal(strip(to!S("\u2008\n foo\t \u2007"), 3612 "\u2008\n ", "\u2007\t "), "foo")); 3613 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB μ \u00BB\r"), 3614 "\u0085 ", "\u00BB\r "), "μ \u0085 \u00BB μ")); 3615 assert(equal(strip(to!S("\U0010FFFE"), " ", " "), "\U0010FFFE")); 3616 assert(equal(strip(to!S(""), " ", " "), "")); 3617 } 3618 }); 3619 } 3620 3621 @safe pure unittest 3622 { 3623 import std.array : sameHead, sameTail; 3624 import std.exception : assertCTFEable; 3625 assertCTFEable!( 3626 { 3627 wstring s = " xyz "; 3628 assert(s.sameTail(s.stripLeft(" "))); 3629 assert(s.sameHead(s.stripRight(" "))); 3630 }); 3631 } 3632 3633 3634 /++ 3635 If `str` ends with `delimiter`, then `str` is returned without 3636 `delimiter` on its end. If it `str` does $(I not) end with 3637 `delimiter`, then it is returned unchanged. 3638 3639 If no `delimiter` is given, then one trailing `'\r'`, `'\n'`, 3640 `"\r\n"`, `'\f'`, `'\v'`, $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni) 3641 is removed from the end of `str`. If `str` does not end with any of those characters, 3642 then it is returned unchanged. 3643 3644 Params: 3645 str = string or indexable range of characters 3646 delimiter = string of characters to be sliced off end of str[] 3647 3648 Returns: 3649 slice of str 3650 +/ 3651 Range chomp(Range)(Range str) 3652 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) || 3653 isNarrowString!Range) && 3654 !isConvertibleToString!Range) 3655 { 3656 import std.uni : lineSep, paraSep, nelSep; 3657 if (str.empty) 3658 return str; 3659 3660 alias C = ElementEncodingType!Range; 3661 3662 switch (str[$ - 1]) 3663 { 3664 case '\n': 3665 { 3666 if (str.length > 1 && str[$ - 2] == '\r') 3667 return str[0 .. $ - 2]; 3668 goto case; 3669 } 3670 case '\r', '\v', '\f': 3671 return str[0 .. $ - 1]; 3672 3673 // Pop off the last character if lineSep, paraSep, or nelSep 3674 static if (is(C : const char)) 3675 { 3676 /* Manually decode: 3677 * lineSep is E2 80 A8 3678 * paraSep is E2 80 A9 3679 */ 3680 case 0xA8: // Last byte of lineSep 3681 case 0xA9: // Last byte of paraSep 3682 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2) 3683 return str [0 .. $ - 3]; 3684 goto default; 3685 3686 /* Manually decode: 3687 * NEL is C2 85 3688 */ 3689 case 0x85: 3690 if (str.length > 1 && str[$ - 2] == 0xC2) 3691 return str [0 .. $ - 2]; 3692 goto default; 3693 } 3694 else 3695 { 3696 case lineSep: 3697 case paraSep: 3698 case nelSep: 3699 return str[0 .. $ - 1]; 3700 } 3701 default: 3702 return str; 3703 } 3704 } 3705 3706 /// Ditto 3707 Range chomp(Range, C2)(Range str, const(C2)[] delimiter) 3708 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 3709 isNarrowString!Range) && 3710 !isConvertibleToString!Range && 3711 isSomeChar!C2) 3712 { 3713 if (delimiter.empty) 3714 return chomp(str); 3715 3716 alias C1 = ElementEncodingType!Range; 3717 3718 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3719 { 3720 import std.algorithm.searching : endsWith; 3721 if (str.endsWith(delimiter)) 3722 return str[0 .. $ - delimiter.length]; 3723 return str; 3724 } 3725 else 3726 { 3727 auto orig = str.save; 3728 3729 static if (isSomeString!Range) 3730 alias C = dchar; // because strings auto-decode 3731 else 3732 alias C = C1; // and ranges do not 3733 3734 foreach_reverse (C c; delimiter) 3735 { 3736 if (str.empty || str.back != c) 3737 return orig; 3738 3739 str.popBack(); 3740 } 3741 3742 return str; 3743 } 3744 } 3745 3746 /// 3747 @safe pure 3748 unittest 3749 { 3750 import std.uni : lineSep, paraSep, nelSep; 3751 import std.utf : decode; 3752 assert(chomp(" hello world \n\r") == " hello world \n"); 3753 assert(chomp(" hello world \r\n") == " hello world "); 3754 assert(chomp(" hello world \f") == " hello world "); 3755 assert(chomp(" hello world \v") == " hello world "); 3756 assert(chomp(" hello world \n\n") == " hello world \n"); 3757 assert(chomp(" hello world \n\n ") == " hello world \n\n "); 3758 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n"); 3759 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n"); 3760 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n"); 3761 assert(chomp(" hello world") == " hello world"); 3762 assert(chomp("") == ""); 3763 3764 assert(chomp(" hello world", "orld") == " hello w"); 3765 assert(chomp(" hello world", " he") == " hello world"); 3766 assert(chomp("", "hello") == ""); 3767 3768 // Don't decode pointlessly 3769 assert(chomp("hello\xFE", "\r") == "hello\xFE"); 3770 } 3771 3772 StringTypeOf!Range chomp(Range)(auto ref Range str) 3773 if (isConvertibleToString!Range) 3774 { 3775 return chomp!(StringTypeOf!Range)(str); 3776 } 3777 3778 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3779 if (isConvertibleToString!Range) 3780 { 3781 return chomp!(StringTypeOf!Range, C2)(str, delimiter); 3782 } 3783 3784 @safe pure unittest 3785 { 3786 assert(testAliasedString!chomp(" hello world \n\r")); 3787 assert(testAliasedString!chomp(" hello world", "orld")); 3788 } 3789 3790 @safe pure unittest 3791 { 3792 import std.conv : to; 3793 import std.exception : assertCTFEable; 3794 3795 assertCTFEable!( 3796 { 3797 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3798 { 3799 // @@@ BUG IN COMPILER, MUST INSERT CAST 3800 assert(chomp(cast(S) null) is null); 3801 assert(chomp(to!S("hello")) == "hello"); 3802 assert(chomp(to!S("hello\n")) == "hello"); 3803 assert(chomp(to!S("hello\r")) == "hello"); 3804 assert(chomp(to!S("hello\r\n")) == "hello"); 3805 assert(chomp(to!S("hello\n\r")) == "hello\n"); 3806 assert(chomp(to!S("hello\n\n")) == "hello\n"); 3807 assert(chomp(to!S("hello\r\r")) == "hello\r"); 3808 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx"); 3809 assert(chomp(to!S("hello\u2028")) == "hello"); 3810 assert(chomp(to!S("hello\u2029")) == "hello"); 3811 assert(chomp(to!S("hello\u0085")) == "hello"); 3812 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028"); 3813 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029"); 3814 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129"); 3815 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185"); 3816 3817 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3818 { 3819 // @@@ BUG IN COMPILER, MUST INSERT CAST 3820 assert(chomp(cast(S) null, cast(T) null) is null); 3821 assert(chomp(to!S("hello\n"), cast(T) null) == "hello"); 3822 assert(chomp(to!S("hello"), to!T("o")) == "hell"); 3823 assert(chomp(to!S("hello"), to!T("p")) == "hello"); 3824 // @@@ BUG IN COMPILER, MUST INSERT CAST 3825 assert(chomp(to!S("hello"), cast(T) null) == "hello"); 3826 assert(chomp(to!S("hello"), to!T("llo")) == "he"); 3827 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e"); 3828 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e"); 3829 } 3830 } 3831 }); 3832 3833 // Ranges 3834 import std.array : array; 3835 import std.utf : byChar, byWchar, byDchar; 3836 assert(chomp("hello world\r\n" .byChar ).array == "hello world"); 3837 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w); 3838 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d); 3839 3840 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d); 3841 3842 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello"); 3843 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d); 3844 } 3845 3846 3847 /++ 3848 If `str` starts with `delimiter`, then the part of `str` following 3849 `delimiter` is returned. If `str` does $(I not) start with 3850 3851 `delimiter`, then it is returned unchanged. 3852 3853 Params: 3854 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 3855 of characters 3856 delimiter = string of characters to be sliced off front of str[] 3857 3858 Returns: 3859 slice of str 3860 +/ 3861 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter) 3862 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) || 3863 isNarrowString!Range) && 3864 !isConvertibleToString!Range && 3865 isSomeChar!C2) 3866 { 3867 alias C1 = ElementEncodingType!Range; 3868 3869 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3870 { 3871 import std.algorithm.searching : startsWith; 3872 if (str.startsWith(delimiter)) 3873 return str[delimiter.length .. $]; 3874 return str; 3875 } 3876 else 3877 { 3878 auto orig = str.save; 3879 3880 static if (isSomeString!Range) 3881 alias C = dchar; // because strings auto-decode 3882 else 3883 alias C = C1; // and ranges do not 3884 3885 foreach (C c; delimiter) 3886 { 3887 if (str.empty || str.front != c) 3888 return orig; 3889 3890 str.popFront(); 3891 } 3892 3893 return str; 3894 } 3895 } 3896 3897 /// 3898 @safe pure unittest 3899 { 3900 assert(chompPrefix("hello world", "he") == "llo world"); 3901 assert(chompPrefix("hello world", "hello w") == "orld"); 3902 assert(chompPrefix("hello world", " world") == "hello world"); 3903 assert(chompPrefix("", "hello") == ""); 3904 } 3905 3906 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3907 if (isConvertibleToString!Range) 3908 { 3909 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter); 3910 } 3911 3912 @safe pure 3913 unittest 3914 { 3915 import std.algorithm.comparison : equal; 3916 import std.conv : to; 3917 import std.exception : assertCTFEable; 3918 assertCTFEable!( 3919 { 3920 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3921 { 3922 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3923 { 3924 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh")); 3925 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde")); 3926 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), "")); 3927 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co")); 3928 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el")); 3929 } 3930 } 3931 }); 3932 3933 // Ranges 3934 import std.array : array; 3935 import std.utf : byChar, byWchar, byDchar; 3936 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world"); 3937 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w); 3938 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d); 3939 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d); 3940 3941 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d); 3942 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d); 3943 3944 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world"); 3945 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d); 3946 } 3947 3948 @safe pure unittest 3949 { 3950 assert(testAliasedString!chompPrefix("hello world", "hello")); 3951 } 3952 3953 /++ 3954 Returns `str` without its last character, if there is one. If `str` 3955 ends with `"\r\n"`, then both are removed. If `str` is empty, then 3956 it is returned unchanged. 3957 3958 Params: 3959 str = string (must be valid UTF) 3960 Returns: 3961 slice of str 3962 +/ 3963 3964 Range chop(Range)(Range str) 3965 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 3966 isNarrowString!Range) && 3967 !isConvertibleToString!Range) 3968 { 3969 if (str.empty) 3970 return str; 3971 3972 static if (isSomeString!Range) 3973 { 3974 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r') 3975 return str[0 .. $ - 2]; 3976 str.popBack(); 3977 return str; 3978 } 3979 else 3980 { 3981 alias C = Unqual!(ElementEncodingType!Range); 3982 C c = str.back; 3983 str.popBack(); 3984 if (c == '\n') 3985 { 3986 if (!str.empty && str.back == '\r') 3987 str.popBack(); 3988 return str; 3989 } 3990 // Pop back a dchar, not just a code unit 3991 static if (C.sizeof == 1) 3992 { 3993 int cnt = 1; 3994 while ((c & 0xC0) == 0x80) 3995 { 3996 if (str.empty) 3997 break; 3998 c = str.back; 3999 str.popBack(); 4000 if (++cnt > 4) 4001 break; 4002 } 4003 } 4004 else static if (C.sizeof == 2) 4005 { 4006 if (c >= 0xD800 && c <= 0xDBFF) 4007 { 4008 if (!str.empty) 4009 str.popBack(); 4010 } 4011 } 4012 else static if (C.sizeof == 4) 4013 { 4014 } 4015 else 4016 static assert(0); 4017 return str; 4018 } 4019 } 4020 4021 /// 4022 @safe pure unittest 4023 { 4024 assert(chop("hello world") == "hello worl"); 4025 assert(chop("hello world\n") == "hello world"); 4026 assert(chop("hello world\r") == "hello world"); 4027 assert(chop("hello world\n\r") == "hello world\n"); 4028 assert(chop("hello world\r\n") == "hello world"); 4029 assert(chop("Walter Bright") == "Walter Brigh"); 4030 assert(chop("") == ""); 4031 } 4032 4033 StringTypeOf!Range chop(Range)(auto ref Range str) 4034 if (isConvertibleToString!Range) 4035 { 4036 return chop!(StringTypeOf!Range)(str); 4037 } 4038 4039 @safe pure unittest 4040 { 4041 assert(testAliasedString!chop("hello world")); 4042 } 4043 4044 @safe pure unittest 4045 { 4046 import std.array : array; 4047 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings; 4048 4049 assert(chop("hello world".byChar).array == "hello worl"); 4050 assert(chop("hello world\n"w.byWchar).array == "hello world"w); 4051 assert(chop("hello world\r"d.byDchar).array == "hello world"d); 4052 assert(chop("hello world\n\r".byChar).array == "hello world\n"); 4053 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w); 4054 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d); 4055 assert(chop("".byChar).array == ""); 4056 4057 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学"); 4058 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w); 4059 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d); 4060 4061 auto ca = invalidUTFstrings!char(); 4062 foreach (s; ca) 4063 { 4064 foreach (c; chop(s.byCodeUnit)) 4065 { 4066 } 4067 } 4068 4069 auto wa = invalidUTFstrings!wchar(); 4070 foreach (s; wa) 4071 { 4072 foreach (c; chop(s.byCodeUnit)) 4073 { 4074 } 4075 } 4076 } 4077 4078 @safe pure unittest 4079 { 4080 import std.algorithm.comparison : equal; 4081 import std.conv : to; 4082 import std.exception : assertCTFEable; 4083 4084 assertCTFEable!( 4085 { 4086 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4087 { 4088 assert(chop(cast(S) null) is null); 4089 assert(equal(chop(to!S("hello")), "hell")); 4090 assert(equal(chop(to!S("hello\r\n")), "hello")); 4091 assert(equal(chop(to!S("hello\n\r")), "hello\n")); 4092 assert(equal(chop(to!S("Verité")), "Verit")); 4093 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果")); 4094 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学")); 4095 } 4096 }); 4097 } 4098 4099 4100 /++ 4101 Left justify `s` in a field `width` characters wide. `fillChar` 4102 is the character that will be used to fill up the space in the field that 4103 `s` doesn't fill. 4104 4105 Params: 4106 s = string 4107 width = minimum field width 4108 fillChar = used to pad end up to `width` characters 4109 4110 Returns: 4111 GC allocated string 4112 4113 See_Also: 4114 $(LREF leftJustifier), which does not allocate 4115 +/ 4116 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') 4117 if (isSomeString!S) 4118 { 4119 import std.array : array; 4120 return leftJustifier(s, width, fillChar).array; 4121 } 4122 4123 /// 4124 @safe pure unittest 4125 { 4126 assert(leftJustify("hello", 7, 'X') == "helloXX"); 4127 assert(leftJustify("hello", 2, 'X') == "hello"); 4128 assert(leftJustify("hello", 9, 'X') == "helloXXXX"); 4129 } 4130 4131 /++ 4132 Left justify `s` in a field `width` characters wide. `fillChar` 4133 is the character that will be used to fill up the space in the field that 4134 `s` doesn't fill. 4135 4136 Params: 4137 r = string or range of characters 4138 width = minimum field width 4139 fillChar = used to pad end up to `width` characters 4140 4141 Returns: 4142 a lazy range of the left justified result 4143 4144 See_Also: 4145 $(LREF rightJustifier) 4146 +/ 4147 4148 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4149 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 4150 !isConvertibleToString!Range) 4151 { 4152 alias C = Unqual!(ElementEncodingType!Range); 4153 4154 static if (C.sizeof == 1) 4155 { 4156 import std.utf : byDchar, byChar; 4157 return leftJustifier(r.byDchar, width, fillChar).byChar; 4158 } 4159 else static if (C.sizeof == 2) 4160 { 4161 import std.utf : byDchar, byWchar; 4162 return leftJustifier(r.byDchar, width, fillChar).byWchar; 4163 } 4164 else static if (C.sizeof == 4) 4165 { 4166 static struct Result 4167 { 4168 private: 4169 Range _input; 4170 size_t _width; 4171 dchar _fillChar; 4172 size_t len; 4173 4174 public: 4175 4176 @property bool empty() 4177 { 4178 return len >= _width && _input.empty; 4179 } 4180 4181 @property C front() 4182 { 4183 return _input.empty ? _fillChar : _input.front; 4184 } 4185 4186 void popFront() 4187 { 4188 ++len; 4189 if (!_input.empty) 4190 _input.popFront(); 4191 } 4192 4193 static if (isForwardRange!Range) 4194 { 4195 @property typeof(this) save() return scope 4196 { 4197 auto ret = this; 4198 ret._input = _input.save; 4199 return ret; 4200 } 4201 } 4202 } 4203 4204 return Result(r, width, fillChar); 4205 } 4206 else 4207 static assert(0); 4208 } 4209 4210 /// 4211 @safe pure @nogc nothrow 4212 unittest 4213 { 4214 import std.algorithm.comparison : equal; 4215 import std.utf : byChar; 4216 assert(leftJustifier("hello", 2).equal("hello".byChar)); 4217 assert(leftJustifier("hello", 7).equal("hello ".byChar)); 4218 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar)); 4219 } 4220 4221 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4222 if (isConvertibleToString!Range) 4223 { 4224 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar); 4225 } 4226 4227 @safe pure unittest 4228 { 4229 auto r = "hello".leftJustifier(8); 4230 r.popFront(); 4231 auto save = r.save; 4232 r.popFront(); 4233 assert(r.front == 'l'); 4234 assert(save.front == 'e'); 4235 } 4236 4237 @safe pure unittest 4238 { 4239 assert(testAliasedString!leftJustifier("hello", 2)); 4240 } 4241 4242 /++ 4243 Right justify `s` in a field `width` characters wide. `fillChar` 4244 is the character that will be used to fill up the space in the field that 4245 `s` doesn't fill. 4246 4247 Params: 4248 s = string 4249 width = minimum field width 4250 fillChar = used to pad end up to `width` characters 4251 4252 Returns: 4253 GC allocated string 4254 4255 See_Also: 4256 $(LREF rightJustifier), which does not allocate 4257 +/ 4258 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') 4259 if (isSomeString!S) 4260 { 4261 import std.array : array; 4262 return rightJustifier(s, width, fillChar).array; 4263 } 4264 4265 /// 4266 @safe pure unittest 4267 { 4268 assert(rightJustify("hello", 7, 'X') == "XXhello"); 4269 assert(rightJustify("hello", 2, 'X') == "hello"); 4270 assert(rightJustify("hello", 9, 'X') == "XXXXhello"); 4271 } 4272 4273 /++ 4274 Right justify `s` in a field `width` characters wide. `fillChar` 4275 is the character that will be used to fill up the space in the field that 4276 `s` doesn't fill. 4277 4278 Params: 4279 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4280 of characters 4281 width = minimum field width 4282 fillChar = used to pad end up to `width` characters 4283 4284 Returns: 4285 a lazy range of the right justified result 4286 4287 See_Also: 4288 $(LREF leftJustifier) 4289 +/ 4290 4291 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4292 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4293 !isConvertibleToString!Range) 4294 { 4295 alias C = Unqual!(ElementEncodingType!Range); 4296 4297 static if (C.sizeof == 1) 4298 { 4299 import std.utf : byDchar, byChar; 4300 return rightJustifier(r.byDchar, width, fillChar).byChar; 4301 } 4302 else static if (C.sizeof == 2) 4303 { 4304 import std.utf : byDchar, byWchar; 4305 return rightJustifier(r.byDchar, width, fillChar).byWchar; 4306 } 4307 else static if (C.sizeof == 4) 4308 { 4309 static struct Result 4310 { 4311 private: 4312 Range _input; 4313 size_t _width; 4314 alias nfill = _width; // number of fill characters to prepend 4315 dchar _fillChar; 4316 bool inited; 4317 4318 // Lazy initialization so constructor is trivial and cannot fail 4319 void initialize() 4320 { 4321 // Replace _width with nfill 4322 // (use alias instead of union because CTFE cannot deal with unions) 4323 assert(_width, "width of 0 not allowed"); 4324 static if (hasLength!Range) 4325 { 4326 immutable len = _input.length; 4327 nfill = (_width > len) ? _width - len : 0; 4328 } 4329 else 4330 { 4331 // Lookahead to see now many fill characters are needed 4332 import std.range : take; 4333 import std.range.primitives : walkLength; 4334 nfill = _width - walkLength(_input.save.take(_width), _width); 4335 } 4336 inited = true; 4337 } 4338 4339 public: 4340 this(Range input, size_t width, dchar fillChar) pure nothrow 4341 { 4342 _input = input; 4343 _fillChar = fillChar; 4344 _width = width; 4345 } 4346 4347 @property bool empty() 4348 { 4349 return !nfill && _input.empty; 4350 } 4351 4352 @property C front() 4353 { 4354 if (!nfill) 4355 return _input.front; // fast path 4356 if (!inited) 4357 initialize(); 4358 return nfill ? _fillChar : _input.front; 4359 } 4360 4361 void popFront() 4362 { 4363 if (!nfill) 4364 _input.popFront(); // fast path 4365 else 4366 { 4367 if (!inited) 4368 initialize(); 4369 if (nfill) 4370 --nfill; 4371 else 4372 _input.popFront(); 4373 } 4374 } 4375 4376 @property typeof(this) save() 4377 { 4378 auto ret = this; 4379 ret._input = _input.save; 4380 return ret; 4381 } 4382 } 4383 4384 return Result(r, width, fillChar); 4385 } 4386 else 4387 static assert(0, "Invalid character type of " ~ C.stringof); 4388 } 4389 4390 /// 4391 @safe pure @nogc nothrow 4392 unittest 4393 { 4394 import std.algorithm.comparison : equal; 4395 import std.utf : byChar; 4396 assert(rightJustifier("hello", 2).equal("hello".byChar)); 4397 assert(rightJustifier("hello", 7).equal(" hello".byChar)); 4398 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar)); 4399 } 4400 4401 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4402 if (isConvertibleToString!Range) 4403 { 4404 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar); 4405 } 4406 4407 @safe pure unittest 4408 { 4409 assert(testAliasedString!rightJustifier("hello", 2)); 4410 } 4411 4412 @safe pure unittest 4413 { 4414 auto r = "hello"d.rightJustifier(6); 4415 r.popFront(); 4416 auto save = r.save; 4417 r.popFront(); 4418 assert(r.front == 'e'); 4419 assert(save.front == 'h'); 4420 4421 auto t = "hello".rightJustifier(7); 4422 t.popFront(); 4423 assert(t.front == ' '); 4424 t.popFront(); 4425 assert(t.front == 'h'); 4426 4427 auto u = "hello"d.rightJustifier(5); 4428 u.popFront(); 4429 u.popFront(); 4430 u.popFront(); 4431 } 4432 4433 /++ 4434 Center `s` in a field `width` characters wide. `fillChar` 4435 is the character that will be used to fill up the space in the field that 4436 `s` doesn't fill. 4437 4438 Params: 4439 s = The string to center 4440 width = Width of the field to center `s` in 4441 fillChar = The character to use for filling excess space in the field 4442 4443 Returns: 4444 The resulting _center-justified string. The returned string is 4445 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier) 4446 instead. 4447 +/ 4448 S center(S)(S s, size_t width, dchar fillChar = ' ') 4449 if (isSomeString!S) 4450 { 4451 import std.array : array; 4452 return centerJustifier(s, width, fillChar).array; 4453 } 4454 4455 /// 4456 @safe pure unittest 4457 { 4458 assert(center("hello", 7, 'X') == "XhelloX"); 4459 assert(center("hello", 2, 'X') == "hello"); 4460 assert(center("hello", 9, 'X') == "XXhelloXX"); 4461 } 4462 4463 @safe pure 4464 unittest 4465 { 4466 import std.conv : to; 4467 import std.exception : assertCTFEable; 4468 4469 assertCTFEable!( 4470 { 4471 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4472 {{ 4473 S s = to!S("hello"); 4474 4475 assert(leftJustify(s, 2) == "hello"); 4476 assert(rightJustify(s, 2) == "hello"); 4477 assert(center(s, 2) == "hello"); 4478 4479 assert(leftJustify(s, 7) == "hello "); 4480 assert(rightJustify(s, 7) == " hello"); 4481 assert(center(s, 7) == " hello "); 4482 4483 assert(leftJustify(s, 8) == "hello "); 4484 assert(rightJustify(s, 8) == " hello"); 4485 assert(center(s, 8) == " hello "); 4486 4487 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100"); 4488 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello"); 4489 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100"); 4490 4491 assert(leftJustify(s, 8, 'ö') == "helloööö"); 4492 assert(rightJustify(s, 8, 'ö') == "öööhello"); 4493 assert(center(s, 8, 'ö') == "öhelloöö"); 4494 }} 4495 }); 4496 } 4497 4498 /++ 4499 Center justify `r` in a field `width` characters wide. `fillChar` 4500 is the character that will be used to fill up the space in the field that 4501 `r` doesn't fill. 4502 4503 Params: 4504 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4505 of characters 4506 width = minimum field width 4507 fillChar = used to pad end up to `width` characters 4508 4509 Returns: 4510 a lazy range of the center justified result 4511 4512 See_Also: 4513 $(LREF leftJustifier) 4514 $(LREF rightJustifier) 4515 +/ 4516 4517 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4518 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4519 !isConvertibleToString!Range) 4520 { 4521 alias C = Unqual!(ElementEncodingType!Range); 4522 4523 static if (C.sizeof == 1) 4524 { 4525 import std.utf : byDchar, byChar; 4526 return centerJustifier(r.byDchar, width, fillChar).byChar; 4527 } 4528 else static if (C.sizeof == 2) 4529 { 4530 import std.utf : byDchar, byWchar; 4531 return centerJustifier(r.byDchar, width, fillChar).byWchar; 4532 } 4533 else static if (C.sizeof == 4) 4534 { 4535 import std.range : chain, repeat; 4536 import std.range.primitives : walkLength; 4537 4538 auto len = walkLength(r.save, width); 4539 if (len > width) 4540 len = width; 4541 const nleft = (width - len) / 2; 4542 const nright = width - len - nleft; 4543 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright)); 4544 } 4545 else 4546 static assert(0); 4547 } 4548 4549 /// 4550 @safe pure @nogc nothrow 4551 unittest 4552 { 4553 import std.algorithm.comparison : equal; 4554 import std.utf : byChar; 4555 assert(centerJustifier("hello", 2).equal("hello".byChar)); 4556 assert(centerJustifier("hello", 8).equal(" hello ".byChar)); 4557 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar)); 4558 } 4559 4560 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4561 if (isConvertibleToString!Range) 4562 { 4563 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar); 4564 } 4565 4566 @safe pure unittest 4567 { 4568 assert(testAliasedString!centerJustifier("hello", 8)); 4569 } 4570 4571 @safe unittest 4572 { 4573 static auto byFwdRange(dstring s) 4574 { 4575 static struct FRange 4576 { 4577 @safe: 4578 dstring str; 4579 this(dstring s) { str = s; } 4580 @property bool empty() { return str.length == 0; } 4581 @property dchar front() { return str[0]; } 4582 void popFront() { str = str[1 .. $]; } 4583 @property FRange save() { return this; } 4584 } 4585 return FRange(s); 4586 } 4587 4588 auto r = centerJustifier(byFwdRange("hello"d), 6); 4589 r.popFront(); 4590 auto save = r.save; 4591 r.popFront(); 4592 assert(r.front == 'l'); 4593 assert(save.front == 'e'); 4594 4595 auto t = "hello".centerJustifier(7); 4596 t.popFront(); 4597 assert(t.front == 'h'); 4598 t.popFront(); 4599 assert(t.front == 'e'); 4600 4601 auto u = byFwdRange("hello"d).centerJustifier(6); 4602 u.popFront(); 4603 u.popFront(); 4604 u.popFront(); 4605 u.popFront(); 4606 u.popFront(); 4607 u.popFront(); 4608 } 4609 4610 4611 /++ 4612 Replace each tab character in `s` with the number of spaces necessary 4613 to align the following character at the next tab stop. 4614 4615 Params: 4616 s = string 4617 tabSize = distance between tab stops 4618 4619 Returns: 4620 GC allocated string with tabs replaced with spaces 4621 +/ 4622 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure 4623 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4624 || __traits(compiles, StringTypeOf!Range)) 4625 { 4626 import std.array : array; 4627 return detabber(s, tabSize).array; 4628 } 4629 4630 /// 4631 @safe pure unittest 4632 { 4633 assert(detab(" \n\tx", 9) == " \n x"); 4634 } 4635 4636 @safe pure unittest 4637 { 4638 static struct TestStruct 4639 { 4640 string s; 4641 alias s this; 4642 } 4643 4644 static struct TestStruct2 4645 { 4646 string s; 4647 alias s this; 4648 @disable this(this); 4649 } 4650 4651 string s = " \n\tx"; 4652 string cmp = " \n x"; 4653 auto t = TestStruct(s); 4654 assert(detab(t, 9) == cmp); 4655 assert(detab(TestStruct(s), 9) == cmp); 4656 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9)); 4657 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9)); 4658 assert(detab(TestStruct2(s), 9) == cmp); 4659 } 4660 4661 /++ 4662 Replace each tab character in `r` with the number of spaces 4663 necessary to align the following character at the next tab stop. 4664 4665 Params: 4666 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4667 tabSize = distance between tab stops 4668 4669 Returns: 4670 lazy forward range with tabs replaced with spaces 4671 +/ 4672 auto detabber(Range)(Range r, size_t tabSize = 8) 4673 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4674 !isConvertibleToString!Range) 4675 { 4676 import std.uni : lineSep, paraSep, nelSep; 4677 import std.utf : codeUnitLimit, decodeFront; 4678 4679 assert(tabSize > 0); 4680 4681 alias C = Unqual!(ElementEncodingType!(Range)); 4682 4683 static struct Result 4684 { 4685 private: 4686 Range _input; 4687 size_t _tabSize; 4688 size_t nspaces; 4689 int column; 4690 size_t index; 4691 4692 public: 4693 4694 this(Range input, size_t tabSize) 4695 { 4696 _input = input; 4697 _tabSize = tabSize; 4698 } 4699 4700 static if (isInfinite!(Range)) 4701 { 4702 enum bool empty = false; 4703 } 4704 else 4705 { 4706 @property bool empty() 4707 { 4708 return _input.empty && nspaces == 0; 4709 } 4710 } 4711 4712 @property C front() 4713 { 4714 if (nspaces) 4715 return ' '; 4716 static if (isSomeString!(Range)) 4717 C c = _input[0]; 4718 else 4719 C c = _input.front; 4720 if (index) 4721 return c; 4722 dchar dc; 4723 if (c < codeUnitLimit!(immutable(C)[])) 4724 { 4725 dc = c; 4726 index = 1; 4727 } 4728 else 4729 { 4730 auto r = _input.save; 4731 dc = decodeFront(r, index); // lookahead to decode 4732 } 4733 switch (dc) 4734 { 4735 case '\r': 4736 case '\n': 4737 case paraSep: 4738 case lineSep: 4739 case nelSep: 4740 column = 0; 4741 break; 4742 4743 case '\t': 4744 nspaces = _tabSize - (column % _tabSize); 4745 column += nspaces; 4746 c = ' '; 4747 break; 4748 4749 default: 4750 ++column; 4751 break; 4752 } 4753 return c; 4754 } 4755 4756 void popFront() 4757 { 4758 if (!index) 4759 front; 4760 if (nspaces) 4761 --nspaces; 4762 if (!nspaces) 4763 { 4764 static if (isSomeString!(Range)) 4765 _input = _input[1 .. $]; 4766 else 4767 _input.popFront(); 4768 --index; 4769 } 4770 } 4771 4772 @property typeof(this) save() 4773 { 4774 auto ret = this; 4775 ret._input = _input.save; 4776 return ret; 4777 } 4778 } 4779 4780 return Result(r, tabSize); 4781 } 4782 4783 /// 4784 @safe pure unittest 4785 { 4786 import std.array : array; 4787 4788 assert(detabber(" \n\tx", 9).array == " \n x"); 4789 } 4790 4791 auto detabber(Range)(auto ref Range r, size_t tabSize = 8) 4792 if (isConvertibleToString!Range) 4793 { 4794 return detabber!(StringTypeOf!Range)(r, tabSize); 4795 } 4796 4797 @safe pure unittest 4798 { 4799 assert(testAliasedString!detabber( " ab\t asdf ", 8)); 4800 } 4801 4802 @safe pure unittest 4803 { 4804 import std.algorithm.comparison : cmp; 4805 import std.conv : to; 4806 import std.exception : assertCTFEable; 4807 4808 assertCTFEable!( 4809 { 4810 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4811 {{ 4812 S s = to!S("This \tis\t a fofof\tof list"); 4813 assert(cmp(detab(s), "This is a fofof of list") == 0); 4814 4815 assert(detab(cast(S) null) is null); 4816 assert(detab("").empty); 4817 assert(detab("a") == "a"); 4818 assert(detab("\t") == " "); 4819 assert(detab("\t", 3) == " "); 4820 assert(detab("\t", 9) == " "); 4821 assert(detab( " ab\t asdf ") == " ab asdf "); 4822 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf "); 4823 assert(detab("\r\t", 9) == "\r "); 4824 assert(detab("\n\t", 9) == "\n "); 4825 assert(detab("\u0085\t", 9) == "\u0085 "); 4826 assert(detab("\u2028\t", 9) == "\u2028 "); 4827 assert(detab(" \u2029\t", 9) == " \u2029 "); 4828 }} 4829 }); 4830 } 4831 4832 /// 4833 @safe pure unittest 4834 { 4835 import std.array : array; 4836 import std.utf : byChar, byWchar; 4837 4838 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 "); 4839 auto r = "hel\tx".byWchar.detabber(); 4840 assert(r.front == 'h'); 4841 auto s = r.save; 4842 r.popFront(); 4843 r.popFront(); 4844 assert(r.front == 'l'); 4845 assert(s.front == 'h'); 4846 } 4847 4848 /++ 4849 Replaces spaces in `s` with the optimal number of tabs. 4850 All spaces and tabs at the end of a line are removed. 4851 4852 Params: 4853 s = String to convert. 4854 tabSize = Tab columns are `tabSize` spaces apart. 4855 4856 Returns: 4857 GC allocated string with spaces replaced with tabs; 4858 use $(LREF entabber) to not allocate. 4859 4860 See_Also: 4861 $(LREF entabber) 4862 +/ 4863 auto entab(Range)(Range s, size_t tabSize = 8) 4864 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4865 { 4866 import std.array : array; 4867 return entabber(s, tabSize).array; 4868 } 4869 4870 /// 4871 @safe pure unittest 4872 { 4873 assert(entab(" x \n") == "\tx\n"); 4874 } 4875 4876 auto entab(Range)(auto ref Range s, size_t tabSize = 8) 4877 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) && 4878 is(StringTypeOf!Range)) 4879 { 4880 return entab!(StringTypeOf!Range)(s, tabSize); 4881 } 4882 4883 @safe pure unittest 4884 { 4885 assert(testAliasedString!entab(" x \n")); 4886 } 4887 4888 /++ 4889 Replaces spaces in range `r` with the optimal number of tabs. 4890 All spaces and tabs at the end of a line are removed. 4891 4892 Params: 4893 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4894 tabSize = distance between tab stops 4895 4896 Returns: 4897 lazy forward range with spaces replaced with tabs 4898 4899 See_Also: 4900 $(LREF entab) 4901 +/ 4902 auto entabber(Range)(Range r, size_t tabSize = 8) 4903 if (isForwardRange!Range && !isConvertibleToString!Range) 4904 { 4905 import std.uni : lineSep, paraSep, nelSep; 4906 import std.utf : codeUnitLimit, decodeFront; 4907 4908 assert(tabSize > 0, "tabSize must be greater than 0"); 4909 alias C = Unqual!(ElementEncodingType!Range); 4910 4911 static struct Result 4912 { 4913 private: 4914 Range _input; 4915 size_t _tabSize; 4916 size_t nspaces; 4917 size_t ntabs; 4918 int column; 4919 size_t index; 4920 4921 @property C getFront() 4922 { 4923 static if (isSomeString!Range) 4924 return _input[0]; // avoid autodecode 4925 else 4926 return _input.front; 4927 } 4928 4929 public: 4930 4931 this(Range input, size_t tabSize) 4932 { 4933 _input = input; 4934 _tabSize = tabSize; 4935 } 4936 4937 @property bool empty() 4938 { 4939 if (ntabs || nspaces) 4940 return false; 4941 4942 /* Since trailing spaces are removed, 4943 * look ahead for anything that is not a trailing space 4944 */ 4945 static if (isSomeString!Range) 4946 { 4947 foreach (c; _input) 4948 { 4949 if (c != ' ' && c != '\t') 4950 return false; 4951 } 4952 return true; 4953 } 4954 else 4955 { 4956 if (_input.empty) 4957 return true; 4958 immutable c = _input.front; 4959 if (c != ' ' && c != '\t') 4960 return false; 4961 auto t = _input.save; 4962 t.popFront(); 4963 foreach (c2; t) 4964 { 4965 if (c2 != ' ' && c2 != '\t') 4966 return false; 4967 } 4968 return true; 4969 } 4970 } 4971 4972 @property C front() 4973 { 4974 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 4975 if (ntabs) 4976 return '\t'; 4977 if (nspaces) 4978 return ' '; 4979 C c = getFront; 4980 if (index) 4981 return c; 4982 dchar dc; 4983 if (c < codeUnitLimit!(immutable(C)[])) 4984 { 4985 index = 1; 4986 dc = c; 4987 if (c == ' ' || c == '\t') 4988 { 4989 // Consume input until a non-blank is encountered 4990 immutable startcol = column; 4991 C cx; 4992 static if (isSomeString!Range) 4993 { 4994 while (1) 4995 { 4996 assert(_input.length, "input did not contain non " 4997 ~ "whitespace character"); 4998 cx = _input[0]; 4999 if (cx == ' ') 5000 ++column; 5001 else if (cx == '\t') 5002 column += _tabSize - (column % _tabSize); 5003 else 5004 break; 5005 _input = _input[1 .. $]; 5006 } 5007 } 5008 else 5009 { 5010 while (1) 5011 { 5012 assert(_input.length, "input did not contain non " 5013 ~ "whitespace character"); 5014 cx = _input.front; 5015 if (cx == ' ') 5016 ++column; 5017 else if (cx == '\t') 5018 column += _tabSize - (column % _tabSize); 5019 else 5020 break; 5021 _input.popFront(); 5022 } 5023 } 5024 // Compute ntabs+nspaces to get from startcol to column 5025 immutable n = column - startcol; 5026 if (n == 1) 5027 { 5028 nspaces = 1; 5029 } 5030 else 5031 { 5032 ntabs = column / _tabSize - startcol / _tabSize; 5033 if (ntabs == 0) 5034 nspaces = column - startcol; 5035 else 5036 nspaces = column % _tabSize; 5037 } 5038 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize); 5039 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces); 5040 if (cx < codeUnitLimit!(immutable(C)[])) 5041 { 5042 dc = cx; 5043 index = 1; 5044 } 5045 else 5046 { 5047 auto r = _input.save; 5048 dc = decodeFront(r, index); // lookahead to decode 5049 } 5050 switch (dc) 5051 { 5052 case '\r': 5053 case '\n': 5054 case paraSep: 5055 case lineSep: 5056 case nelSep: 5057 column = 0; 5058 // Spaces followed by newline are ignored 5059 ntabs = 0; 5060 nspaces = 0; 5061 return cx; 5062 5063 default: 5064 ++column; 5065 break; 5066 } 5067 return ntabs ? '\t' : ' '; 5068 } 5069 } 5070 else 5071 { 5072 auto r = _input.save; 5073 dc = decodeFront(r, index); // lookahead to decode 5074 } 5075 //writefln("dc = x%x", dc); 5076 switch (dc) 5077 { 5078 case '\r': 5079 case '\n': 5080 case paraSep: 5081 case lineSep: 5082 case nelSep: 5083 column = 0; 5084 break; 5085 5086 default: 5087 ++column; 5088 break; 5089 } 5090 return c; 5091 } 5092 5093 void popFront() 5094 { 5095 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 5096 if (!index) 5097 front; 5098 if (ntabs) 5099 --ntabs; 5100 else if (nspaces) 5101 --nspaces; 5102 else if (!ntabs && !nspaces) 5103 { 5104 static if (isSomeString!Range) 5105 _input = _input[1 .. $]; 5106 else 5107 _input.popFront(); 5108 --index; 5109 } 5110 } 5111 5112 @property typeof(this) save() 5113 { 5114 auto ret = this; 5115 ret._input = _input.save; 5116 return ret; 5117 } 5118 } 5119 5120 return Result(r, tabSize); 5121 } 5122 5123 /// 5124 @safe pure unittest 5125 { 5126 import std.array : array; 5127 assert(entabber(" x \n").array == "\tx\n"); 5128 } 5129 5130 auto entabber(Range)(auto ref Range r, size_t tabSize = 8) 5131 if (isConvertibleToString!Range) 5132 { 5133 return entabber!(StringTypeOf!Range)(r, tabSize); 5134 } 5135 5136 @safe pure unittest 5137 { 5138 assert(testAliasedString!entabber(" ab asdf ", 8)); 5139 } 5140 5141 @safe pure 5142 unittest 5143 { 5144 import std.conv : to; 5145 import std.exception : assertCTFEable; 5146 5147 assertCTFEable!( 5148 { 5149 assert(entab(cast(string) null) is null); 5150 assert(entab("").empty); 5151 assert(entab("a") == "a"); 5152 assert(entab(" ") == ""); 5153 assert(entab(" x") == "\tx"); 5154 assert(entab(" ab asdf ") == " ab\tasdf"); 5155 assert(entab(" ab asdf ") == " ab\t asdf"); 5156 assert(entab(" ab \t asdf ") == " ab\t asdf"); 5157 assert(entab("1234567 \ta") == "1234567\t\ta"); 5158 assert(entab("1234567 \ta") == "1234567\t\ta"); 5159 assert(entab("1234567 \ta") == "1234567\t\ta"); 5160 assert(entab("1234567 \ta") == "1234567\t\ta"); 5161 assert(entab("1234567 \ta") == "1234567\t\ta"); 5162 assert(entab("1234567 \ta") == "1234567\t\ta"); 5163 assert(entab("1234567 \ta") == "1234567\t\ta"); 5164 assert(entab("1234567 \ta") == "1234567\t\ta"); 5165 assert(entab("1234567 \ta") == "1234567\t\t\ta"); 5166 5167 assert(entab("a ") == "a"); 5168 assert(entab("a\v") == "a\v"); 5169 assert(entab("a\f") == "a\f"); 5170 assert(entab("a\n") == "a\n"); 5171 assert(entab("a\n\r") == "a\n\r"); 5172 assert(entab("a\r\n") == "a\r\n"); 5173 assert(entab("a\u2028") == "a\u2028"); 5174 assert(entab("a\u2029") == "a\u2029"); 5175 assert(entab("a\u0085") == "a\u0085"); 5176 assert(entab("a ") == "a"); 5177 assert(entab("a\t") == "a"); 5178 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") == 5179 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F"); 5180 assert(entab(" \naa") == "\naa"); 5181 assert(entab(" \r aa") == "\r aa"); 5182 assert(entab(" \u2028 aa") == "\u2028 aa"); 5183 assert(entab(" \u2029 aa") == "\u2029 aa"); 5184 assert(entab(" \u0085 aa") == "\u0085 aa"); 5185 }); 5186 } 5187 5188 @safe pure 5189 unittest 5190 { 5191 import std.array : array; 5192 import std.utf : byChar; 5193 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa"); 5194 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa"); 5195 5196 auto r = entabber("1234", 4); 5197 r.popFront(); 5198 auto rsave = r.save; 5199 r.popFront(); 5200 assert(r.front == '3'); 5201 assert(rsave.front == '2'); 5202 } 5203 5204 5205 /++ 5206 Replaces the characters in `str` which are keys in `transTable` with 5207 their corresponding values in `transTable`. `transTable` is an AA 5208 where its keys are `dchar` and its values are either `dchar` or some 5209 type of string. Also, if `toRemove` is given, the characters in it are 5210 removed from `str` prior to translation. `str` itself is unaltered. 5211 A copy with the changes is returned. 5212 5213 See_Also: 5214 $(LREF tr), 5215 $(REF replace, std,array), 5216 $(REF substitute, std,algorithm,iteration) 5217 5218 Params: 5219 str = The original string. 5220 transTable = The AA indicating which characters to replace and what to 5221 replace them with. 5222 toRemove = The characters to remove from the string. 5223 +/ 5224 C1[] translate(C1, C2 = immutable char)(C1[] str, 5225 in dchar[dchar] transTable, 5226 const(C2)[] toRemove = null) @safe pure 5227 if (isSomeChar!C1 && isSomeChar!C2) 5228 { 5229 import std.array : appender; 5230 auto buffer = appender!(C1[])(); 5231 translateImpl(str, transTable, toRemove, buffer); 5232 return buffer.data; 5233 } 5234 5235 /// 5236 @safe pure unittest 5237 { 5238 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5239 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5240 5241 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5242 5243 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5244 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 5245 } 5246 5247 // https://issues.dlang.org/show_bug.cgi?id=13018 5248 @safe pure unittest 5249 { 5250 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5251 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5252 5253 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5254 5255 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5256 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 5257 } 5258 5259 @system pure unittest 5260 { 5261 import std.conv : to; 5262 import std.exception : assertCTFEable; 5263 5264 assertCTFEable!( 5265 { 5266 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 5267 wchar[], const(wchar)[], immutable(wchar)[], 5268 dchar[], const(dchar)[], immutable(dchar)[])) 5269 {(){ // workaround slow optimizations for large functions 5270 // https://issues.dlang.org/show_bug.cgi?id=2396 5271 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) == 5272 to!S("qe55o wor5d")); 5273 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) == 5274 to!S("he\U00010143\U00010143l wlr\U00010143d")); 5275 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) == 5276 to!S("qe55o \U00010143 wor5d")); 5277 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) == 5278 to!S("hell0 o w0rld")); 5279 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world")); 5280 5281 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 5282 wchar[], const(wchar)[], immutable(wchar)[], 5283 dchar[], const(dchar)[], immutable(dchar)[])) 5284 (){ // workaround slow optimizations for large functions 5285 // https://issues.dlang.org/show_bug.cgi?id=2396 5286 static foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar], 5287 immutable dchar[dchar])) 5288 {{ 5289 R tt = ['h' : 'q', 'l' : '5']; 5290 assert(translate(to!S("hello world"), tt, to!T("r")) 5291 == to!S("qe55o wo5d")); 5292 assert(translate(to!S("hello world"), tt, to!T("helo")) 5293 == to!S(" wrd")); 5294 assert(translate(to!S("hello world"), tt, to!T("q5")) 5295 == to!S("qe55o wor5d")); 5296 }} 5297 }(); 5298 5299 auto s = to!S("hello world"); 5300 dchar[dchar] transTable = ['h' : 'q', 'l' : '5']; 5301 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 5302 assert(translate(s, transTable) == "qe55o wor5d"); 5303 }();} 5304 }); 5305 } 5306 5307 /++ Ditto +/ 5308 C1[] translate(C1, S, C2 = immutable char)(C1[] str, 5309 in S[dchar] transTable, 5310 const(C2)[] toRemove = null) @safe pure 5311 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2) 5312 { 5313 import std.array : appender; 5314 auto buffer = appender!(C1[])(); 5315 translateImpl(str, transTable, toRemove, buffer); 5316 return buffer.data; 5317 } 5318 5319 @system pure unittest 5320 { 5321 import std.conv : to; 5322 import std.exception : assertCTFEable; 5323 5324 assertCTFEable!( 5325 { 5326 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 5327 wchar[], const(wchar)[], immutable(wchar)[], 5328 dchar[], const(dchar)[], immutable(dchar)[])) 5329 {(){ // workaround slow optimizations for large functions 5330 // https://issues.dlang.org/show_bug.cgi?id=2396 5331 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) == 5332 to!S("yellowe4242o wor42d")); 5333 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 5334 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d")); 5335 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) == 5336 to!S("yellowe4242o \U00010143 wor42d")); 5337 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 5338 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d")); 5339 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) == 5340 to!S("ello \U00010143 world")); 5341 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) == 5342 to!S("hello world")); 5343 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world")); 5344 5345 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 5346 wchar[], const(wchar)[], immutable(wchar)[], 5347 dchar[], const(dchar)[], immutable(dchar)[])) 5348 (){ // workaround slow optimizations for large functions 5349 // https://issues.dlang.org/show_bug.cgi?id=2396 5350 static foreach (R; AliasSeq!(string[dchar], const string[dchar], 5351 immutable string[dchar])) 5352 {{ 5353 R tt = ['h' : "yellow", 'l' : "42"]; 5354 assert(translate(to!S("hello world"), tt, to!T("r")) == 5355 to!S("yellowe4242o wo42d")); 5356 assert(translate(to!S("hello world"), tt, to!T("helo")) == 5357 to!S(" wrd")); 5358 assert(translate(to!S("hello world"), tt, to!T("y42")) == 5359 to!S("yellowe4242o wor42d")); 5360 assert(translate(to!S("hello world"), tt, to!T("hello world")) == 5361 to!S("")); 5362 assert(translate(to!S("hello world"), tt, to!T("42")) == 5363 to!S("yellowe4242o wor42d")); 5364 }} 5365 }(); 5366 5367 auto s = to!S("hello world"); 5368 string[dchar] transTable = ['h' : "silly", 'l' : "putty"]; 5369 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 5370 assert(translate(s, transTable) == "sillyeputtyputtyo worputtyd"); 5371 }();} 5372 }); 5373 } 5374 5375 /++ 5376 This is an overload of `translate` which takes an existing buffer to write the contents to. 5377 5378 Params: 5379 str = The original string. 5380 transTable = The AA indicating which characters to replace and what to 5381 replace them with. 5382 toRemove = The characters to remove from the string. 5383 buffer = An output range to write the contents to. 5384 +/ 5385 void translate(C1, C2 = immutable char, Buffer)(const(C1)[] str, 5386 in dchar[dchar] transTable, 5387 const(C2)[] toRemove, 5388 Buffer buffer) 5389 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1)) 5390 { 5391 translateImpl(str, transTable, toRemove, buffer); 5392 } 5393 5394 /// 5395 @safe pure unittest 5396 { 5397 import std.array : appender; 5398 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5399 auto buffer = appender!(dchar[])(); 5400 translate("hello world", transTable1, null, buffer); 5401 assert(buffer.data == "h5ll7 w7rld"); 5402 5403 buffer.clear(); 5404 translate("hello world", transTable1, "low", buffer); 5405 assert(buffer.data == "h5 rd"); 5406 5407 buffer.clear(); 5408 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5409 translate("hello world", transTable2, null, buffer); 5410 assert(buffer.data == "h5llorange worangerld"); 5411 } 5412 5413 // https://issues.dlang.org/show_bug.cgi?id=13018 5414 @safe pure unittest 5415 { 5416 import std.array : appender; 5417 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 5418 auto buffer = appender!(dchar[])(); 5419 translate("hello world", transTable1, null, buffer); 5420 assert(buffer.data == "h5ll7 w7rld"); 5421 5422 buffer.clear(); 5423 translate("hello world", transTable1, "low", buffer); 5424 assert(buffer.data == "h5 rd"); 5425 5426 buffer.clear(); 5427 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 5428 translate("hello world", transTable2, null, buffer); 5429 assert(buffer.data == "h5llorange worangerld"); 5430 } 5431 5432 /++ Ditto +/ 5433 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str, 5434 in S[dchar] transTable, 5435 const(C2)[] toRemove, 5436 Buffer buffer) 5437 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S)) 5438 { 5439 translateImpl(str, transTable, toRemove, buffer); 5440 } 5441 5442 private void translateImpl(C1, T, C2, Buffer)(const(C1)[] str, 5443 scope T transTable, 5444 const(C2)[] toRemove, 5445 Buffer buffer) 5446 { 5447 bool[dchar] removeTable; 5448 5449 foreach (dchar c; toRemove) 5450 removeTable[c] = true; 5451 5452 foreach (dchar c; str) 5453 { 5454 if (c in removeTable) 5455 continue; 5456 5457 auto newC = c in transTable; 5458 5459 if (newC) 5460 put(buffer, *newC); 5461 else 5462 put(buffer, c); 5463 } 5464 } 5465 5466 /++ 5467 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It 5468 will $(I not) work with Unicode. It exists as an optimization for the 5469 cases where Unicode processing is not necessary. 5470 5471 Unlike the other overloads of $(LREF _translate), this one does not take 5472 an AA. Rather, it takes a `string` generated by $(LREF makeTransTable). 5473 5474 The array generated by `makeTransTable` is `256` elements long such that 5475 the index is equal to the ASCII character being replaced and the value is 5476 equal to the character that it's being replaced with. Note that translate 5477 does not decode any of the characters, so you can actually pass it Extended 5478 ASCII characters if you want to (ASCII only actually uses `128` 5479 characters), but be warned that Extended ASCII characters are not valid 5480 Unicode and therefore will result in a `UTFException` being thrown from 5481 most other Phobos functions. 5482 5483 Also, because no decoding occurs, it is possible to use this overload to 5484 translate ASCII characters within a proper UTF-8 string without altering the 5485 other, non-ASCII characters. It's replacing any code unit greater than 5486 `127` with another code unit or replacing any code unit with another code 5487 unit greater than `127` which will cause UTF validation issues. 5488 5489 See_Also: 5490 $(LREF tr), 5491 $(REF replace, std,array), 5492 $(REF substitute, std,algorithm,iteration) 5493 5494 Params: 5495 str = The original string. 5496 transTable = The string indicating which characters to replace and what 5497 to replace them with. It is generated by $(LREF makeTransTable). 5498 toRemove = The characters to remove from the string. 5499 +/ 5500 C[] translate(C = immutable char)(scope const(char)[] str, scope const(char)[] transTable, 5501 scope const(char)[] toRemove = null) @trusted pure nothrow 5502 if (is(immutable C == immutable char)) 5503 in 5504 { 5505 import std.conv : to; 5506 assert(transTable.length == 256, "transTable had invalid length of " ~ 5507 to!string(transTable.length)); 5508 } 5509 do 5510 { 5511 bool[256] remTable = false; 5512 5513 foreach (char c; toRemove) 5514 remTable[c] = true; 5515 5516 size_t count = 0; 5517 foreach (char c; str) 5518 { 5519 if (!remTable[c]) 5520 ++count; 5521 } 5522 5523 auto buffer = new char[count]; 5524 5525 size_t i = 0; 5526 foreach (char c; str) 5527 { 5528 if (!remTable[c]) 5529 buffer[i++] = transTable[c]; 5530 } 5531 5532 return cast(C[])(buffer); 5533 } 5534 5535 /// 5536 @safe pure nothrow unittest 5537 { 5538 auto transTable1 = makeTrans("eo5", "57q"); 5539 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5540 5541 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5542 } 5543 5544 /** 5545 * Do same thing as $(LREF makeTransTable) but allocate the translation table 5546 * on the GC heap. 5547 * 5548 * Use $(LREF makeTransTable) instead. 5549 */ 5550 string makeTrans(scope const(char)[] from, scope const(char)[] to) @trusted pure nothrow 5551 { 5552 return makeTransTable(from, to)[].idup; 5553 } 5554 5555 /// 5556 @safe pure nothrow unittest 5557 { 5558 auto transTable1 = makeTrans("eo5", "57q"); 5559 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5560 5561 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5562 } 5563 5564 /******* 5565 * Construct 256 character translation table, where characters in from[] are replaced 5566 * by corresponding characters in to[]. 5567 * 5568 * Params: 5569 * from = array of chars, less than or equal to 256 in length 5570 * to = corresponding array of chars to translate to 5571 * Returns: 5572 * translation array 5573 */ 5574 char[256] makeTransTable(scope const(char)[] from, scope const(char)[] to) @safe pure nothrow @nogc 5575 in 5576 { 5577 import std.ascii : isASCII; 5578 assert(from.length == to.length, "from.length must match to.length"); 5579 assert(from.length <= 256, "from.length must be <= 256"); 5580 foreach (char c; from) 5581 assert(isASCII(c), 5582 "all characters in from must be valid ascii character"); 5583 foreach (char c; to) 5584 assert(isASCII(c), 5585 "all characters in to must be valid ascii character"); 5586 } 5587 do 5588 { 5589 char[256] result = void; 5590 5591 foreach (i; 0 .. result.length) 5592 result[i] = cast(char) i; 5593 foreach (i, c; from) 5594 result[c] = to[i]; 5595 return result; 5596 } 5597 5598 /// 5599 @safe pure unittest 5600 { 5601 assert(translate("hello world", makeTransTable("hl", "q5")) == "qe55o wor5d"); 5602 assert(translate("hello world", makeTransTable("12345", "67890")) == "hello world"); 5603 } 5604 5605 @safe pure unittest 5606 { 5607 import std.conv : to; 5608 import std.exception : assertCTFEable; 5609 5610 assertCTFEable!( 5611 { 5612 static foreach (C; AliasSeq!(char, const char, immutable char)) 5613 {{ 5614 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d")); 5615 5616 auto s = to!(C[])("hello world"); 5617 auto transTable = makeTransTable("hl", "q5"); 5618 static assert(is(typeof(s) == typeof(translate!C(s, transTable)))); 5619 assert(translate(s, transTable) == "qe55o wor5d"); 5620 }} 5621 5622 static foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[])) 5623 { 5624 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d")); 5625 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) == 5626 to!S("qe55o \U00010143 wor5d")); 5627 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod")); 5628 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world")); 5629 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world")); 5630 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) == 5631 to!S("hello \U00010143 world")); 5632 5633 static foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[])) 5634 { 5635 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) == 5636 to!S("qe55o wo5d")); 5637 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) == 5638 to!S("qe55o \U00010143 wo5d")); 5639 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) == 5640 to!S(" wrd")); 5641 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) == 5642 to!S("qe55o wor5d")); 5643 } 5644 } 5645 }); 5646 } 5647 5648 /++ 5649 This is an $(I $(RED ASCII-only)) overload of `translate` which takes an existing buffer to write the contents to. 5650 5651 Params: 5652 str = The original string. 5653 transTable = The string indicating which characters to replace and what 5654 to replace them with. It is generated by $(LREF makeTransTable). 5655 toRemove = The characters to remove from the string. 5656 buffer = An output range to write the contents to. 5657 +/ 5658 void translate(C = immutable char, Buffer)(scope const(char)[] str, scope const(char)[] transTable, 5659 scope const(char)[] toRemove, Buffer buffer) @trusted pure 5660 if (is(immutable C == immutable char) && isOutputRange!(Buffer, char)) 5661 in 5662 { 5663 assert(transTable.length == 256, format! 5664 "transTable.length %s must equal 256"(transTable.length)); 5665 } 5666 do 5667 { 5668 bool[256] remTable = false; 5669 5670 foreach (char c; toRemove) 5671 remTable[c] = true; 5672 5673 foreach (char c; str) 5674 { 5675 if (!remTable[c]) 5676 put(buffer, transTable[c]); 5677 } 5678 } 5679 5680 /// 5681 @safe pure unittest 5682 { 5683 import std.array : appender; 5684 auto buffer = appender!(char[])(); 5685 auto transTable1 = makeTransTable("eo5", "57q"); 5686 translate("hello world", transTable1, null, buffer); 5687 assert(buffer.data == "h5ll7 w7rld"); 5688 5689 buffer.clear(); 5690 translate("hello world", transTable1, "low", buffer); 5691 assert(buffer.data == "h5 rd"); 5692 } 5693 5694 /********************************************** 5695 * Return string that is the 'successor' to s[]. 5696 * If the rightmost character is a-zA-Z0-9, it is incremented within 5697 * its case or digits. If it generates a carry, the process is 5698 * repeated with the one to its immediate left. 5699 */ 5700 5701 S succ(S)(S s) @safe pure 5702 if (isSomeString!S) 5703 { 5704 import std.ascii : isAlphaNum; 5705 5706 if (s.length && isAlphaNum(s[$ - 1])) 5707 { 5708 auto r = s.dup; 5709 size_t i = r.length - 1; 5710 5711 while (1) 5712 { 5713 dchar c = s[i]; 5714 dchar carry; 5715 5716 switch (c) 5717 { 5718 case '9': 5719 c = '0'; 5720 carry = '1'; 5721 goto Lcarry; 5722 case 'z': 5723 case 'Z': 5724 c -= 'Z' - 'A'; 5725 carry = c; 5726 Lcarry: 5727 r[i] = cast(char) c; 5728 if (i == 0) 5729 { 5730 auto t = new typeof(r[0])[r.length + 1]; 5731 t[0] = cast(char) carry; 5732 t[1 .. $] = r[]; 5733 return t; 5734 } 5735 i--; 5736 break; 5737 5738 default: 5739 if (isAlphaNum(c)) 5740 r[i]++; 5741 return r; 5742 } 5743 } 5744 } 5745 return s; 5746 } 5747 5748 /// 5749 @safe pure unittest 5750 { 5751 assert(succ("1") == "2"); 5752 assert(succ("9") == "10"); 5753 assert(succ("999") == "1000"); 5754 assert(succ("zz99") == "aaa00"); 5755 } 5756 5757 @safe pure unittest 5758 { 5759 import std.conv : to; 5760 import std.exception : assertCTFEable; 5761 5762 assertCTFEable!( 5763 { 5764 assert(succ(string.init) is null); 5765 assert(succ("!@#$%") == "!@#$%"); 5766 assert(succ("1") == "2"); 5767 assert(succ("9") == "10"); 5768 assert(succ("999") == "1000"); 5769 assert(succ("zz99") == "aaa00"); 5770 }); 5771 } 5772 5773 5774 /++ 5775 Replaces the characters in `str` which are in `from` with the 5776 the corresponding characters in `to` and returns the resulting string. 5777 5778 `tr` is based on 5779 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr), 5780 though it doesn't do everything that the Posix utility does. 5781 5782 Params: 5783 str = The original string. 5784 from = The characters to replace. 5785 to = The characters to replace with. 5786 modifiers = String containing modifiers. 5787 5788 Modifiers: 5789 $(BOOKTABLE, 5790 $(TR $(TD Modifier) $(TD Description)) 5791 $(TR $(TD `'c'`) $(TD Complement the list of characters in `from`)) 5792 $(TR $(TD `'d'`) $(TD Removes matching characters with no corresponding 5793 replacement in `to`)) 5794 $(TR $(TD `'s'`) $(TD Removes adjacent duplicates in the replaced 5795 characters)) 5796 ) 5797 5798 If the modifier `'d'` is present, then the number of characters in 5799 `to` may be only `0` or `1`. 5800 5801 If the modifier `'d'` is $(I not) present, and `to` is empty, then 5802 `to` is taken to be the same as `from`. 5803 5804 If the modifier `'d'` is $(I not) present, and `to` is shorter than 5805 `from`, then `to` is extended by replicating the last character in 5806 `to`. 5807 5808 Both `from` and `to` may contain ranges using the `'-'` character 5809 (e.g. `"a-d"` is synonymous with `"abcd"`.) Neither accept a leading 5810 `'^'` as meaning the complement of the string (use the `'c'` modifier 5811 for that). 5812 5813 See_Also: 5814 $(LREF translate), 5815 $(REF replace, std,array), 5816 $(REF substitute, std,algorithm,iteration) 5817 +/ 5818 C1[] tr(C1, C2, C3, C4 = immutable char) 5819 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null) 5820 { 5821 import std.array : appender; 5822 import std.conv : conv_to = to; 5823 import std.utf : decode; 5824 5825 bool mod_c; 5826 bool mod_d; 5827 bool mod_s; 5828 5829 foreach (char c; modifiers) 5830 { 5831 switch (c) 5832 { 5833 case 'c': mod_c = 1; break; // complement 5834 case 'd': mod_d = 1; break; // delete unreplaced chars 5835 case 's': mod_s = 1; break; // squeeze duplicated replaced chars 5836 default: assert(false, "modifier must be one of ['c', 'd', 's'] not " 5837 ~ c); 5838 } 5839 } 5840 5841 if (to.empty && !mod_d) 5842 to = conv_to!(typeof(to))(from); 5843 5844 auto result = appender!(C1[])(); 5845 bool modified; 5846 dchar lastc; 5847 5848 foreach (dchar c; str) 5849 { 5850 dchar lastf; 5851 dchar lastt; 5852 dchar newc; 5853 int n = 0; 5854 5855 for (size_t i = 0; i < from.length; ) 5856 { 5857 immutable f = decode(from, i); 5858 if (f == '-' && lastf != dchar.init && i < from.length) 5859 { 5860 immutable nextf = decode(from, i); 5861 if (lastf <= c && c <= nextf) 5862 { 5863 n += c - lastf - 1; 5864 if (mod_c) 5865 goto Lnotfound; 5866 goto Lfound; 5867 } 5868 n += nextf - lastf; 5869 lastf = lastf.init; 5870 continue; 5871 } 5872 5873 if (c == f) 5874 { if (mod_c) 5875 goto Lnotfound; 5876 goto Lfound; 5877 } 5878 lastf = f; 5879 n++; 5880 } 5881 if (!mod_c) 5882 goto Lnotfound; 5883 n = 0; // consider it 'found' at position 0 5884 5885 Lfound: 5886 5887 // Find the nth character in to[] 5888 dchar nextt; 5889 for (size_t i = 0; i < to.length; ) 5890 { 5891 immutable t = decode(to, i); 5892 if (t == '-' && lastt != dchar.init && i < to.length) 5893 { 5894 nextt = decode(to, i); 5895 n -= nextt - lastt; 5896 if (n < 0) 5897 { 5898 newc = nextt + n + 1; 5899 goto Lnewc; 5900 } 5901 lastt = dchar.init; 5902 continue; 5903 } 5904 if (n == 0) 5905 { newc = t; 5906 goto Lnewc; 5907 } 5908 lastt = t; 5909 nextt = t; 5910 n--; 5911 } 5912 if (mod_d) 5913 continue; 5914 newc = nextt; 5915 5916 Lnewc: 5917 if (mod_s && modified && newc == lastc) 5918 continue; 5919 result.put(newc); 5920 assert(newc != dchar.init, "character must not be dchar.init"); 5921 modified = true; 5922 lastc = newc; 5923 continue; 5924 5925 Lnotfound: 5926 result.put(c); 5927 lastc = c; 5928 modified = false; 5929 } 5930 5931 return result.data; 5932 } 5933 5934 /// 5935 @safe pure unittest 5936 { 5937 assert(tr("abcdef", "cd", "CD") == "abCDef"); 5938 assert(tr("1st March, 2018", "March", "MAR", "s") == "1st MAR, 2018"); 5939 assert(tr("abcdef", "ef", "", "d") == "abcd"); 5940 assert(tr("14-Jul-87", "a-zA-Z", " ", "cs") == " Jul "); 5941 } 5942 5943 @safe pure unittest 5944 { 5945 import std.algorithm.comparison : equal; 5946 import std.conv : to; 5947 import std.exception : assertCTFEable; 5948 5949 // Complete list of test types; too slow to test'em all 5950 // alias TestTypes = AliasSeq!( 5951 // char[], const( char)[], immutable( char)[], 5952 // wchar[], const(wchar)[], immutable(wchar)[], 5953 // dchar[], const(dchar)[], immutable(dchar)[]); 5954 5955 // Reduced list of test types 5956 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]); 5957 5958 assertCTFEable!( 5959 { 5960 foreach (S; TestTypes) 5961 { 5962 foreach (T; TestTypes) 5963 { 5964 foreach (U; TestTypes) 5965 { 5966 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef")); 5967 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef")); 5968 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx")); 5969 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx")); 5970 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx")); 5971 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef")); 5972 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd")); 5973 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye")); 5974 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye")); 5975 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul ")); 5976 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc")); 5977 } 5978 } 5979 5980 auto s = to!S("hello world"); 5981 static assert(is(typeof(s) == typeof(tr(s, "he", "if")))); 5982 assert(tr(s, "he", "if") == "ifllo world"); 5983 } 5984 }); 5985 } 5986 5987 @system pure unittest 5988 { 5989 import core.exception : AssertError; 5990 import std.exception : assertThrown; 5991 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X")); 5992 } 5993 5994 /** 5995 * Takes a string `s` and determines if it represents a number. This function 5996 * also takes an optional parameter, `bAllowSep`, which will accept the 5997 * separator characters `','` and `'__'` within the string. But these 5998 * characters should be stripped from the string before using any 5999 * of the conversion functions like `to!int()`, `to!float()`, and etc 6000 * else an error will occur. 6001 * 6002 * Also please note, that no spaces are allowed within the string 6003 * anywhere whether it's a leading, trailing, or embedded space(s), 6004 * thus they too must be stripped from the string before using this 6005 * function, or any of the conversion functions. 6006 * 6007 * Params: 6008 * s = the string or random access range to check 6009 * bAllowSep = accept separator characters or not 6010 * 6011 * Returns: 6012 * `bool` 6013 */ 6014 bool isNumeric(S)(S s, bool bAllowSep = false) 6015 if (isSomeString!S || 6016 (isRandomAccessRange!S && 6017 hasSlicing!S && 6018 isSomeChar!(ElementType!S) && 6019 !isInfinite!S)) 6020 { 6021 import std.algorithm.comparison : among; 6022 import std.ascii : isASCII; 6023 6024 // ASCII only case insensitive comparison with two ranges 6025 static bool asciiCmp(S1)(S1 a, string b) 6026 { 6027 import std.algorithm.comparison : equal; 6028 import std.algorithm.iteration : map; 6029 import std.ascii : toLower; 6030 import std.utf : byChar; 6031 return a.map!toLower.equal(b.byChar.map!toLower); 6032 } 6033 6034 // auto-decoding special case, we're only comparing characters 6035 // in the ASCII range so there's no reason to decode 6036 static if (isSomeString!S) 6037 { 6038 import std.utf : byCodeUnit; 6039 auto codeUnits = s.byCodeUnit; 6040 } 6041 else 6042 { 6043 alias codeUnits = s; 6044 } 6045 6046 if (codeUnits.empty) 6047 return false; 6048 6049 // Check for NaN (Not a Number) and for Infinity 6050 if (codeUnits.among!((a, b) => asciiCmp(a.save, b)) 6051 ("nan", "nani", "nan+nani", "inf", "-inf")) 6052 return true; 6053 6054 immutable frontResult = codeUnits.front; 6055 if (frontResult == '-' || frontResult == '+') 6056 codeUnits.popFront; 6057 6058 immutable iLen = codeUnits.length; 6059 bool bDecimalPoint, bExponent, bComplex, sawDigits; 6060 6061 for (size_t i = 0; i < iLen; i++) 6062 { 6063 immutable c = codeUnits[i]; 6064 6065 if (!c.isASCII) 6066 return false; 6067 6068 // Digits are good, skip to the next character 6069 if (c >= '0' && c <= '9') 6070 { 6071 sawDigits = true; 6072 continue; 6073 } 6074 6075 // Check for the complex type, and if found 6076 // reset the flags for checking the 2nd number. 6077 if (c == '+') 6078 { 6079 if (!i) 6080 return false; 6081 bDecimalPoint = false; 6082 bExponent = false; 6083 bComplex = true; 6084 sawDigits = false; 6085 continue; 6086 } 6087 6088 // Allow only one exponent per number 6089 if (c == 'e' || c == 'E') 6090 { 6091 // A 2nd exponent found, return not a number 6092 if (bExponent || i + 1 >= iLen) 6093 return false; 6094 // Look forward for the sign, and if 6095 // missing then this is not a number. 6096 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+') 6097 return false; 6098 bExponent = true; 6099 i++; 6100 continue; 6101 } 6102 // Allow only one decimal point per number to be used 6103 if (c == '.') 6104 { 6105 // A 2nd decimal point found, return not a number 6106 if (bDecimalPoint) 6107 return false; 6108 bDecimalPoint = true; 6109 continue; 6110 } 6111 // Check for ending literal characters: "f,u,l,i,ul,fi,li", 6112 // and whether they're being used with the correct datatype. 6113 if (i == iLen - 2) 6114 { 6115 if (!sawDigits) 6116 return false; 6117 // Integer Whole Number 6118 if (asciiCmp(codeUnits[i .. iLen], "ul") && 6119 (!bDecimalPoint && !bExponent && !bComplex)) 6120 return true; 6121 // Floating-Point Number 6122 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") && 6123 (bDecimalPoint || bExponent || bComplex)) 6124 return true; 6125 if (asciiCmp(codeUnits[i .. iLen], "ul") && 6126 (bDecimalPoint || bExponent || bComplex)) 6127 return false; 6128 // Could be a Integer or a Float, thus 6129 // all these suffixes are valid for both 6130 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b)) 6131 ("ul", "fi", "li") != 0; 6132 } 6133 if (i == iLen - 1) 6134 { 6135 if (!sawDigits) 6136 return false; 6137 // Integer Whole Number 6138 if (c.among!('u', 'l', 'U', 'L')() && 6139 (!bDecimalPoint && !bExponent && !bComplex)) 6140 return true; 6141 // Check to see if the last character in the string 6142 // is the required 'i' character 6143 if (bComplex) 6144 return c.among!('i', 'I')() != 0; 6145 // Floating-Point Number 6146 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0; 6147 } 6148 // Check if separators are allowed to be in the numeric string 6149 if (!bAllowSep || !c.among!('_', ',')()) 6150 return false; 6151 } 6152 6153 return sawDigits; 6154 } 6155 6156 /** 6157 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong) 6158 * ['+'|'-']digit(s)[U|L|UL] 6159 */ 6160 @safe @nogc pure nothrow unittest 6161 { 6162 assert(isNumeric("123")); 6163 assert(isNumeric("123UL")); 6164 assert(isNumeric("123L")); 6165 assert(isNumeric("+123U")); 6166 assert(isNumeric("-123L")); 6167 } 6168 6169 /** 6170 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal) 6171 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 6172 * or [nan|nani|inf|-inf] 6173 */ 6174 @safe @nogc pure nothrow unittest 6175 { 6176 assert(isNumeric("+123")); 6177 assert(isNumeric("-123.01")); 6178 assert(isNumeric("123.3e-10f")); 6179 assert(isNumeric("123.3e-10fi")); 6180 assert(isNumeric("123.3e-10L")); 6181 6182 assert(isNumeric("nan")); 6183 assert(isNumeric("nani")); 6184 assert(isNumeric("-inf")); 6185 } 6186 6187 /** 6188 * Floating-Point Number: (cfloat, cdouble, and creal) 6189 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+] 6190 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 6191 * or [nan|nani|nan+nani|inf|-inf] 6192 */ 6193 @safe @nogc pure nothrow unittest 6194 { 6195 assert(isNumeric("-123e-1+456.9e-10Li")); 6196 assert(isNumeric("+123e+10+456i")); 6197 assert(isNumeric("123+456")); 6198 } 6199 6200 @safe @nogc pure nothrow unittest 6201 { 6202 assert(!isNumeric("F")); 6203 assert(!isNumeric("L")); 6204 assert(!isNumeric("U")); 6205 assert(!isNumeric("i")); 6206 assert(!isNumeric("fi")); 6207 assert(!isNumeric("ul")); 6208 assert(!isNumeric("li")); 6209 assert(!isNumeric(".")); 6210 assert(!isNumeric("-")); 6211 assert(!isNumeric("+")); 6212 assert(!isNumeric("e-")); 6213 assert(!isNumeric("e+")); 6214 assert(!isNumeric(".f")); 6215 assert(!isNumeric("e+f")); 6216 assert(!isNumeric("++1")); 6217 assert(!isNumeric("")); 6218 assert(!isNumeric("1E+1E+1")); 6219 assert(!isNumeric("1E1")); 6220 assert(!isNumeric("\x81")); 6221 } 6222 6223 // Test string types 6224 @safe unittest 6225 { 6226 import std.conv : to; 6227 6228 static foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[])) 6229 { 6230 assert("123".to!T.isNumeric()); 6231 assert("123UL".to!T.isNumeric()); 6232 assert("123fi".to!T.isNumeric()); 6233 assert("123li".to!T.isNumeric()); 6234 assert(!"--123L".to!T.isNumeric()); 6235 } 6236 } 6237 6238 // test ranges 6239 @system pure unittest 6240 { 6241 import std.range : refRange; 6242 import std.utf : byCodeUnit; 6243 6244 assert("123".byCodeUnit.isNumeric()); 6245 assert("123UL".byCodeUnit.isNumeric()); 6246 assert("123fi".byCodeUnit.isNumeric()); 6247 assert("123li".byCodeUnit.isNumeric()); 6248 assert(!"--123L".byCodeUnit.isNumeric()); 6249 6250 dstring z = "0"; 6251 assert(isNumeric(refRange(&z))); 6252 6253 dstring nani = "nani"; 6254 assert(isNumeric(refRange(&nani))); 6255 } 6256 6257 /// isNumeric works with CTFE 6258 @safe pure unittest 6259 { 6260 enum a = isNumeric("123.00E-5+1234.45E-12Li"); 6261 enum b = isNumeric("12345xxxx890"); 6262 6263 static assert( a); 6264 static assert(!b); 6265 } 6266 6267 @system unittest 6268 { 6269 import std.conv : to; 6270 import std.exception : assertCTFEable; 6271 6272 assertCTFEable!( 6273 { 6274 // Test the isNumeric(in string) function 6275 assert(isNumeric("1") == true ); 6276 assert(isNumeric("1.0") == true ); 6277 assert(isNumeric("1e-1") == true ); 6278 assert(isNumeric("12345xxxx890") == false ); 6279 assert(isNumeric("567L") == true ); 6280 assert(isNumeric("23UL") == true ); 6281 assert(isNumeric("-123..56f") == false ); 6282 assert(isNumeric("12.3.5.6") == false ); 6283 assert(isNumeric(" 12.356") == false ); 6284 assert(isNumeric("123 5.6") == false ); 6285 assert(isNumeric("1233E-1+1.0e-1i") == true ); 6286 6287 assert(isNumeric("123.00E-5+1234.45E-12Li") == true); 6288 assert(isNumeric("123.00e-5+1234.45E-12iL") == false); 6289 assert(isNumeric("123.00e-5+1234.45e-12uL") == false); 6290 assert(isNumeric("123.00E-5+1234.45e-12lu") == false); 6291 6292 assert(isNumeric("123fi") == true); 6293 assert(isNumeric("123li") == true); 6294 assert(isNumeric("--123L") == false); 6295 assert(isNumeric("+123.5UL") == false); 6296 assert(isNumeric("123f") == true); 6297 assert(isNumeric("123.u") == false); 6298 6299 // @@@BUG@@ to!string(float) is not CTFEable. 6300 // Related: formatValue(T) if (is(FloatingPointTypeOf!T)) 6301 if (!__ctfe) 6302 { 6303 assert(isNumeric(to!string(real.nan)) == true); 6304 assert(isNumeric(to!string(-real.infinity)) == true); 6305 } 6306 6307 string s = "$250.99-"; 6308 assert(isNumeric(s[1 .. s.length - 2]) == true); 6309 assert(isNumeric(s) == false); 6310 assert(isNumeric(s[0 .. s.length - 1]) == false); 6311 }); 6312 6313 assert(!isNumeric("-")); 6314 assert(!isNumeric("+")); 6315 } 6316 6317 version (TestComplex) 6318 deprecated 6319 @safe unittest 6320 { 6321 import std.conv : to; 6322 assert(isNumeric(to!string(123e+2+1234.78Li)) == true); 6323 } 6324 6325 /***************************** 6326 * Soundex algorithm. 6327 * 6328 * The Soundex algorithm converts a word into 4 characters 6329 * based on how the word sounds phonetically. The idea is that 6330 * two spellings that sound alike will have the same Soundex 6331 * value, which means that Soundex can be used for fuzzy matching 6332 * of names. 6333 * 6334 * Params: 6335 * str = String or InputRange to convert to Soundex representation. 6336 * 6337 * Returns: 6338 * The four character array with the Soundex result in it. 6339 * The array has zero's in it if there is no Soundex representation for the string. 6340 * 6341 * See_Also: 6342 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia), 6343 * $(LUCKY The Soundex Indexing System) 6344 * $(LREF soundex) 6345 * 6346 * Note: 6347 * Only works well with English names. 6348 */ 6349 char[4] soundexer(Range)(Range str) 6350 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 6351 !isConvertibleToString!Range) 6352 { 6353 alias C = Unqual!(ElementEncodingType!Range); 6354 6355 static immutable dex = 6356 // ABCDEFGHIJKLMNOPQRSTUVWXYZ 6357 "01230120022455012623010202"; 6358 6359 char[4] result = void; 6360 size_t b = 0; 6361 C lastc; 6362 foreach (C c; str) 6363 { 6364 if (c >= 'a' && c <= 'z') 6365 c -= 'a' - 'A'; 6366 else if (c >= 'A' && c <= 'Z') 6367 { 6368 } 6369 else 6370 { 6371 lastc = lastc.init; 6372 continue; 6373 } 6374 if (b == 0) 6375 { 6376 result[0] = cast(char) c; 6377 b++; 6378 lastc = dex[c - 'A']; 6379 } 6380 else 6381 { 6382 if (c == 'H' || c == 'W') 6383 continue; 6384 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 6385 lastc = lastc.init; 6386 c = dex[c - 'A']; 6387 if (c != '0' && c != lastc) 6388 { 6389 result[b] = cast(char) c; 6390 b++; 6391 lastc = c; 6392 } 6393 if (b == 4) 6394 goto Lret; 6395 } 6396 } 6397 if (b == 0) 6398 result[] = 0; 6399 else 6400 result[b .. 4] = '0'; 6401 Lret: 6402 return result; 6403 } 6404 6405 /// ditto 6406 char[4] soundexer(Range)(auto ref Range str) 6407 if (isConvertibleToString!Range) 6408 { 6409 return soundexer!(StringTypeOf!Range)(str); 6410 } 6411 6412 /// 6413 @safe unittest 6414 { 6415 assert(soundexer("Gauss") == "G200"); 6416 assert(soundexer("Ghosh") == "G200"); 6417 6418 assert(soundexer("Robert") == "R163"); 6419 assert(soundexer("Rupert") == "R163"); 6420 6421 assert(soundexer("0123^&^^**&^") == ['\0', '\0', '\0', '\0']); 6422 } 6423 6424 /***************************** 6425 * Like $(LREF soundexer), but with different parameters 6426 * and return value. 6427 * 6428 * Params: 6429 * str = String to convert to Soundex representation. 6430 * buffer = Optional 4 char array to put the resulting Soundex 6431 * characters into. If null, the return value 6432 * buffer will be allocated on the heap. 6433 * Returns: 6434 * The four character array with the Soundex result in it. 6435 * Returns null if there is no Soundex representation for the string. 6436 * See_Also: 6437 * $(LREF soundexer) 6438 */ 6439 char[] soundex(scope const(char)[] str, char[] buffer = null) 6440 @safe pure nothrow 6441 in 6442 { 6443 assert(buffer is null || buffer.length >= 4); 6444 } 6445 out (result) 6446 { 6447 if (result !is null) 6448 { 6449 assert(result.length == 4, "Result must have length of 4"); 6450 assert(result[0] >= 'A' && result[0] <= 'Z', "The first character of " 6451 ~ " the result must be an upper character not " ~ result); 6452 foreach (char c; result[1 .. 4]) 6453 assert(c >= '0' && c <= '6', "the last three character of the" 6454 ~ " result must be number between 0 and 6 not " ~ result); 6455 } 6456 } 6457 do 6458 { 6459 char[4] result = soundexer(str); 6460 if (result[0] == 0) 6461 return null; 6462 if (buffer is null) 6463 buffer = new char[4]; 6464 buffer[] = result[]; 6465 return buffer; 6466 } 6467 6468 /// 6469 @safe unittest 6470 { 6471 assert(soundex("Gauss") == "G200"); 6472 assert(soundex("Ghosh") == "G200"); 6473 6474 assert(soundex("Robert") == "R163"); 6475 assert(soundex("Rupert") == "R163"); 6476 6477 assert(soundex("0123^&^^**&^") == null); 6478 } 6479 6480 @safe pure nothrow unittest 6481 { 6482 import std.exception : assertCTFEable; 6483 assertCTFEable!( 6484 { 6485 char[4] buffer; 6486 6487 assert(soundex(null) == null); 6488 assert(soundex("") == null); 6489 assert(soundex("0123^&^^**&^") == null); 6490 assert(soundex("Euler") == "E460"); 6491 assert(soundex(" Ellery ") == "E460"); 6492 assert(soundex("Gauss") == "G200"); 6493 assert(soundex("Ghosh") == "G200"); 6494 assert(soundex("Hilbert") == "H416"); 6495 assert(soundex("Heilbronn") == "H416"); 6496 assert(soundex("Knuth") == "K530"); 6497 assert(soundex("Kant", buffer) == "K530"); 6498 assert(soundex("Lloyd") == "L300"); 6499 assert(soundex("Ladd") == "L300"); 6500 assert(soundex("Lukasiewicz", buffer) == "L222"); 6501 assert(soundex("Lissajous") == "L222"); 6502 assert(soundex("Robert") == "R163"); 6503 assert(soundex("Rupert") == "R163"); 6504 assert(soundex("Rubin") == "R150"); 6505 assert(soundex("Washington") == "W252"); 6506 assert(soundex("Lee") == "L000"); 6507 assert(soundex("Gutierrez") == "G362"); 6508 assert(soundex("Pfister") == "P236"); 6509 assert(soundex("Jackson") == "J250"); 6510 assert(soundex("Tymczak") == "T522"); 6511 assert(soundex("Ashcraft") == "A261"); 6512 6513 assert(soundex("Woo") == "W000"); 6514 assert(soundex("Pilgrim") == "P426"); 6515 assert(soundex("Flingjingwaller") == "F452"); 6516 assert(soundex("PEARSE") == "P620"); 6517 assert(soundex("PIERCE") == "P620"); 6518 assert(soundex("Price") == "P620"); 6519 assert(soundex("CATHY") == "C300"); 6520 assert(soundex("KATHY") == "K300"); 6521 assert(soundex("Jones") == "J520"); 6522 assert(soundex("johnsons") == "J525"); 6523 assert(soundex("Hardin") == "H635"); 6524 assert(soundex("Martinez") == "M635"); 6525 6526 import std.utf : byChar, byDchar, byWchar; 6527 assert(soundexer("Martinez".byChar ) == "M635"); 6528 assert(soundexer("Martinez".byWchar) == "M635"); 6529 assert(soundexer("Martinez".byDchar) == "M635"); 6530 }); 6531 } 6532 6533 @safe pure unittest 6534 { 6535 assert(testAliasedString!soundexer("Martinez")); 6536 } 6537 6538 6539 /*************************************************** 6540 * Construct an associative array consisting of all 6541 * abbreviations that uniquely map to the strings in values. 6542 * 6543 * This is useful in cases where the user is expected to type 6544 * in one of a known set of strings, and the program will helpfully 6545 * auto-complete the string once sufficient characters have been 6546 * entered that uniquely identify it. 6547 */ 6548 string[string] abbrev(string[] values) @safe pure 6549 { 6550 import std.algorithm.sorting : sort; 6551 6552 string[string] result; 6553 6554 // Make a copy when sorting so we follow COW principles. 6555 values = values.dup; 6556 sort(values); 6557 6558 size_t values_length = values.length; 6559 size_t lasti = values_length; 6560 size_t nexti; 6561 6562 string nv; 6563 string lv; 6564 6565 for (size_t i = 0; i < values_length; i = nexti) 6566 { 6567 string value = values[i]; 6568 6569 // Skip dups 6570 for (nexti = i + 1; nexti < values_length; nexti++) 6571 { 6572 nv = values[nexti]; 6573 if (value != values[nexti]) 6574 break; 6575 } 6576 6577 import std.utf : stride; 6578 6579 for (size_t j = 0; j < value.length; j += stride(value, j)) 6580 { 6581 string v = value[0 .. j]; 6582 6583 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) && 6584 (lasti == values_length || j > lv.length || v != lv[0 .. j])) 6585 { 6586 result[v] = value; 6587 } 6588 } 6589 result[value] = value; 6590 lasti = i; 6591 lv = value; 6592 } 6593 6594 return result; 6595 } 6596 6597 /// 6598 @safe unittest 6599 { 6600 import std..string; 6601 6602 static string[] list = [ "food", "foxy" ]; 6603 auto abbrevs = abbrev(list); 6604 assert(abbrevs == ["fox": "foxy", "food": "food", 6605 "foxy": "foxy", "foo": "food"]); 6606 } 6607 6608 6609 @system pure unittest 6610 { 6611 import std.algorithm.sorting : sort; 6612 import std.conv : to; 6613 import std.exception : assertCTFEable; 6614 6615 assertCTFEable!( 6616 { 6617 string[] values; 6618 values ~= "hello"; 6619 values ~= "hello"; 6620 values ~= "he"; 6621 6622 string[string] r; 6623 6624 r = abbrev(values); 6625 auto keys = r.keys.dup; 6626 sort(keys); 6627 6628 assert(keys.length == 4); 6629 assert(keys[0] == "he"); 6630 assert(keys[1] == "hel"); 6631 assert(keys[2] == "hell"); 6632 assert(keys[3] == "hello"); 6633 6634 assert(r[keys[0]] == "he"); 6635 assert(r[keys[1]] == "hello"); 6636 assert(r[keys[2]] == "hello"); 6637 assert(r[keys[3]] == "hello"); 6638 }); 6639 } 6640 6641 6642 /****************************************** 6643 * Compute _column number at the end of the printed form of the string, 6644 * assuming the string starts in the leftmost _column, which is numbered 6645 * starting from 0. 6646 * 6647 * Tab characters are expanded into enough spaces to bring the _column number 6648 * to the next multiple of tabsize. 6649 * If there are multiple lines in the string, the _column number of the last 6650 * line is returned. 6651 * 6652 * Params: 6653 * str = string or InputRange to be analyzed 6654 * tabsize = number of columns a tab character represents 6655 * 6656 * Returns: 6657 * column number 6658 */ 6659 6660 size_t column(Range)(Range str, in size_t tabsize = 8) 6661 if ((isInputRange!Range && isSomeChar!(Unqual!(ElementEncodingType!Range)) || 6662 isNarrowString!Range) && 6663 !isConvertibleToString!Range) 6664 { 6665 static if (is(immutable ElementEncodingType!Range == immutable char)) 6666 { 6667 // decoding needed for chars 6668 import std.utf : byDchar; 6669 6670 return str.byDchar.column(tabsize); 6671 } 6672 else 6673 { 6674 // decoding not needed for wchars and dchars 6675 import std.uni : lineSep, paraSep, nelSep; 6676 6677 size_t column; 6678 6679 foreach (const c; str) 6680 { 6681 switch (c) 6682 { 6683 case '\t': 6684 column = (column + tabsize) / tabsize * tabsize; 6685 break; 6686 6687 case '\r': 6688 case '\n': 6689 case paraSep: 6690 case lineSep: 6691 case nelSep: 6692 column = 0; 6693 break; 6694 6695 default: 6696 column++; 6697 break; 6698 } 6699 } 6700 return column; 6701 } 6702 } 6703 6704 /// 6705 @safe pure unittest 6706 { 6707 import std.utf : byChar, byWchar, byDchar; 6708 6709 assert(column("1234 ") == 5); 6710 assert(column("1234 "w) == 5); 6711 assert(column("1234 "d) == 5); 6712 6713 assert(column("1234 ".byChar()) == 5); 6714 assert(column("1234 "w.byWchar()) == 5); 6715 assert(column("1234 "d.byDchar()) == 5); 6716 6717 // Tab stops are set at 8 spaces by default; tab characters insert enough 6718 // spaces to bring the column position to the next multiple of 8. 6719 assert(column("\t") == 8); 6720 assert(column("1\t") == 8); 6721 assert(column("\t1") == 9); 6722 assert(column("123\t") == 8); 6723 6724 // Other tab widths are possible by specifying it explicitly: 6725 assert(column("\t", 4) == 4); 6726 assert(column("1\t", 4) == 4); 6727 assert(column("\t1", 4) == 5); 6728 assert(column("123\t", 4) == 4); 6729 6730 // New lines reset the column number. 6731 assert(column("abc\n") == 0); 6732 assert(column("abc\n1") == 1); 6733 assert(column("abcdefg\r1234") == 4); 6734 assert(column("abc\u20281") == 1); 6735 assert(column("abc\u20291") == 1); 6736 assert(column("abc\u00851") == 1); 6737 assert(column("abc\u00861") == 5); 6738 } 6739 6740 size_t column(Range)(auto ref Range str, in size_t tabsize = 8) 6741 if (isConvertibleToString!Range) 6742 { 6743 return column!(StringTypeOf!Range)(str, tabsize); 6744 } 6745 6746 @safe pure unittest 6747 { 6748 assert(testAliasedString!column("abc\u00861")); 6749 } 6750 6751 @safe @nogc unittest 6752 { 6753 import std.conv : to; 6754 import std.exception : assertCTFEable; 6755 6756 assertCTFEable!( 6757 { 6758 assert(column(string.init) == 0); 6759 assert(column("") == 0); 6760 assert(column("\t") == 8); 6761 assert(column("abc\t") == 8); 6762 assert(column("12345678\t") == 16); 6763 }); 6764 } 6765 6766 /****************************************** 6767 * Wrap text into a paragraph. 6768 * 6769 * The input text string s is formed into a paragraph 6770 * by breaking it up into a sequence of lines, delineated 6771 * by \n, such that the number of columns is not exceeded 6772 * on each line. 6773 * The last line is terminated with a \n. 6774 * Params: 6775 * s = text string to be wrapped 6776 * columns = maximum number of _columns in the paragraph 6777 * firstindent = string used to _indent first line of the paragraph 6778 * indent = string to use to _indent following lines of the paragraph 6779 * tabsize = column spacing of tabs in firstindent[] and indent[] 6780 * Returns: 6781 * resulting paragraph as an allocated string 6782 */ 6783 6784 S wrap(S)(S s, in size_t columns = 80, S firstindent = null, 6785 S indent = null, in size_t tabsize = 8) 6786 if (isSomeString!S) 6787 { 6788 import std.uni : isWhite; 6789 typeof(s.dup) result; 6790 bool inword; 6791 bool first = true; 6792 size_t wordstart; 6793 6794 const indentcol = column(indent, tabsize); 6795 6796 result.length = firstindent.length + s.length; 6797 result.length = firstindent.length; 6798 result[] = firstindent[]; 6799 auto col = column(firstindent, tabsize); 6800 foreach (size_t i, dchar c; s) 6801 { 6802 if (isWhite(c)) 6803 { 6804 if (inword) 6805 { 6806 if (first) 6807 { 6808 } 6809 else if (col + 1 + (i - wordstart) > columns) 6810 { 6811 result ~= '\n'; 6812 result ~= indent; 6813 col = indentcol; 6814 } 6815 else 6816 { 6817 result ~= ' '; 6818 col += 1; 6819 } 6820 result ~= s[wordstart .. i]; 6821 col += i - wordstart; 6822 inword = false; 6823 first = false; 6824 } 6825 } 6826 else 6827 { 6828 if (!inword) 6829 { 6830 wordstart = i; 6831 inword = true; 6832 } 6833 } 6834 } 6835 6836 if (inword) 6837 { 6838 if (col + 1 + (s.length - wordstart) >= columns) 6839 { 6840 result ~= '\n'; 6841 result ~= indent; 6842 } 6843 else if (result.length != firstindent.length) 6844 result ~= ' '; 6845 result ~= s[wordstart .. s.length]; 6846 } 6847 result ~= '\n'; 6848 6849 return result; 6850 } 6851 6852 /// 6853 @safe pure unittest 6854 { 6855 assert(wrap("a short string", 7) == "a short\nstring\n"); 6856 6857 // wrap will not break inside of a word, but at the next space 6858 assert(wrap("a short string", 4) == "a\nshort\nstring\n"); 6859 6860 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n"); 6861 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n"); 6862 } 6863 6864 @safe pure unittest 6865 { 6866 import std.conv : to; 6867 import std.exception : assertCTFEable; 6868 6869 assertCTFEable!( 6870 { 6871 assert(wrap(string.init) == "\n"); 6872 assert(wrap(" a b df ") == "a b df\n"); 6873 assert(wrap(" a b df ", 3) == "a b\ndf\n"); 6874 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n"); 6875 assert(wrap(" abcd df ", 3) == "abcd\ndf\n"); 6876 assert(wrap("x") == "x\n"); 6877 assert(wrap("u u") == "u u\n"); 6878 assert(wrap("abcd", 3) == "\nabcd\n"); 6879 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n"); 6880 }); 6881 } 6882 6883 /****************************************** 6884 * Removes one level of indentation from a multi-line string. 6885 * 6886 * This uniformly outdents the text as much as possible. 6887 * Whitespace-only lines are always converted to blank lines. 6888 * 6889 * Does not allocate memory if it does not throw. 6890 * 6891 * Params: 6892 * str = multi-line string 6893 * 6894 * Returns: 6895 * outdented string 6896 * 6897 * Throws: 6898 * StringException if indentation is done with different sequences 6899 * of whitespace characters. 6900 */ 6901 S outdent(S)(S str) @safe pure 6902 if (isSomeString!S) 6903 { 6904 return str.splitLines(Yes.keepTerminator).outdent().join(); 6905 } 6906 6907 /// 6908 @safe pure unittest 6909 { 6910 enum pretty = q{ 6911 import std.stdio; 6912 void main() { 6913 writeln("Hello"); 6914 } 6915 }.outdent(); 6916 6917 enum ugly = q{ 6918 import std.stdio; 6919 void main() { 6920 writeln("Hello"); 6921 } 6922 }; 6923 6924 assert(pretty == ugly); 6925 } 6926 6927 6928 /****************************************** 6929 * Removes one level of indentation from an array of single-line strings. 6930 * 6931 * This uniformly outdents the text as much as possible. 6932 * Whitespace-only lines are always converted to blank lines. 6933 * 6934 * Params: 6935 * lines = array of single-line strings 6936 * 6937 * Returns: 6938 * lines[] is rewritten in place with outdented lines 6939 * 6940 * Throws: 6941 * StringException if indentation is done with different sequences 6942 * of whitespace characters. 6943 */ 6944 S[] outdent(S)(S[] lines) @safe pure 6945 if (isSomeString!S) 6946 { 6947 import std.algorithm.searching : startsWith; 6948 6949 if (lines.empty) 6950 { 6951 return null; 6952 } 6953 6954 static S leadingWhiteOf(S str) 6955 { 6956 return str[ 0 .. $ - stripLeft(str).length ]; 6957 } 6958 6959 S shortestIndent; 6960 foreach (ref line; lines) 6961 { 6962 const stripped = line.stripLeft(); 6963 6964 if (stripped.empty) 6965 { 6966 line = line[line.chomp().length .. $]; 6967 } 6968 else 6969 { 6970 const indent = leadingWhiteOf(line); 6971 6972 // Comparing number of code units instead of code points is OK here 6973 // because this function throws upon inconsistent indentation. 6974 if (shortestIndent is null || indent.length < shortestIndent.length) 6975 { 6976 if (indent.empty) 6977 return lines; 6978 shortestIndent = indent; 6979 } 6980 } 6981 } 6982 6983 foreach (ref line; lines) 6984 { 6985 const stripped = line.stripLeft(); 6986 6987 if (stripped.empty) 6988 { 6989 // Do nothing 6990 } 6991 else if (line.startsWith(shortestIndent)) 6992 { 6993 line = line[shortestIndent.length .. $]; 6994 } 6995 else 6996 { 6997 throw new StringException("outdent: Inconsistent indentation"); 6998 } 6999 } 7000 7001 return lines; 7002 } 7003 7004 /// 7005 @safe pure unittest 7006 { 7007 auto str1 = [ 7008 " void main()\n", 7009 " {\n", 7010 " test();\n", 7011 " }\n" 7012 ]; 7013 auto str1Expected = [ 7014 "void main()\n", 7015 "{\n", 7016 " test();\n", 7017 "}\n" 7018 ]; 7019 assert(str1.outdent == str1Expected); 7020 7021 auto str2 = [ 7022 "void main()\n", 7023 " {\n", 7024 " test();\n", 7025 " }\n" 7026 ]; 7027 assert(str2.outdent == str2); 7028 } 7029 7030 @safe pure unittest 7031 { 7032 import std.conv : to; 7033 import std.exception : assertCTFEable; 7034 7035 template outdent_testStr(S) 7036 { 7037 enum S outdent_testStr = 7038 " 7039 \t\tX 7040 \t\U00010143X 7041 \t\t 7042 7043 \t\t\tX 7044 \t "; 7045 } 7046 7047 template outdent_expected(S) 7048 { 7049 enum S outdent_expected = 7050 " 7051 \tX 7052 \U00010143X 7053 7054 7055 \t\tX 7056 "; 7057 } 7058 7059 assertCTFEable!( 7060 { 7061 7062 static foreach (S; AliasSeq!(string, wstring, dstring)) 7063 {{ 7064 enum S blank = ""; 7065 assert(blank.outdent() == blank); 7066 static assert(blank.outdent() == blank); 7067 7068 enum S testStr1 = " \n \t\n "; 7069 enum S expected1 = "\n\n"; 7070 assert(testStr1.outdent() == expected1); 7071 static assert(testStr1.outdent() == expected1); 7072 7073 assert(testStr1[0..$-1].outdent() == expected1); 7074 static assert(testStr1[0..$-1].outdent() == expected1); 7075 7076 enum S testStr2 = "a\n \t\nb"; 7077 assert(testStr2.outdent() == testStr2); 7078 static assert(testStr2.outdent() == testStr2); 7079 7080 enum S testStr3 = 7081 " 7082 \t\tX 7083 \t\U00010143X 7084 \t\t 7085 7086 \t\t\tX 7087 \t "; 7088 7089 enum S expected3 = 7090 " 7091 \tX 7092 \U00010143X 7093 7094 7095 \t\tX 7096 "; 7097 assert(testStr3.outdent() == expected3); 7098 static assert(testStr3.outdent() == expected3); 7099 7100 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X"; 7101 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X"; 7102 assert(testStr4.outdent() == expected4); 7103 static assert(testStr4.outdent() == expected4); 7104 7105 enum testStr5 = testStr4[0..$-1]; 7106 enum expected5 = expected4[0..$-1]; 7107 assert(testStr5.outdent() == expected5); 7108 static assert(testStr5.outdent() == expected5); 7109 7110 enum testStr6 = " \r \n \r\n \u2028 \u2029"; 7111 enum expected6 = "\r\n\r\n\u2028\u2029"; 7112 assert(testStr6.outdent() == expected6); 7113 static assert(testStr6.outdent() == expected6); 7114 7115 enum testStr7 = " a \n b "; 7116 enum expected7 = "a \nb "; 7117 assert(testStr7.outdent() == expected7); 7118 static assert(testStr7.outdent() == expected7); 7119 }} 7120 }); 7121 } 7122 7123 @safe pure unittest 7124 { 7125 import std.exception : assertThrown; 7126 auto bad = " a\n\tb\n c"; 7127 assertThrown!StringException(bad.outdent); 7128 } 7129 7130 /** Assume the given array of integers `arr` is a well-formed UTF string and 7131 return it typed as a UTF string. 7132 7133 `ubyte` becomes `char`, `ushort` becomes `wchar` and `uint` 7134 becomes `dchar`. Type qualifiers are preserved. 7135 7136 When compiled with debug mode, this function performs an extra check to make 7137 sure the return value is a valid Unicode string. 7138 7139 Params: 7140 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints 7141 7142 Returns: 7143 arr retyped as an array of chars, wchars, or dchars 7144 7145 Throws: 7146 In debug mode `AssertError`, when the result is not a well-formed UTF string. 7147 7148 See_Also: $(LREF representation) 7149 */ 7150 auto assumeUTF(T)(T[] arr) 7151 if (staticIndexOf!(immutable T, immutable ubyte, immutable ushort, immutable uint) != -1) 7152 { 7153 import std.traits : ModifyTypePreservingTQ; 7154 import std.exception : collectException; 7155 import std.utf : validate; 7156 7157 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2]; 7158 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[]) arr; 7159 7160 debug 7161 { 7162 scope ex = collectException(validate(asUTF)); 7163 assert(!ex, ex.msg); 7164 } 7165 7166 return asUTF; 7167 } 7168 7169 /// 7170 @safe pure unittest 7171 { 7172 string a = "Hölo World"; 7173 immutable(ubyte)[] b = a.representation; 7174 string c = b.assumeUTF; 7175 7176 assert(c == "Hölo World"); 7177 } 7178 7179 pure @system unittest 7180 { 7181 import std.algorithm.comparison : equal; 7182 static foreach (T; AliasSeq!(char[], wchar[], dchar[])) 7183 {{ 7184 immutable T jti = "Hello World"; 7185 T jt = jti.dup; 7186 7187 static if (is(T == char[])) 7188 { 7189 auto gt = cast(ubyte[]) jt; 7190 auto gtc = cast(const(ubyte)[])jt; 7191 auto gti = cast(immutable(ubyte)[])jt; 7192 } 7193 else static if (is(T == wchar[])) 7194 { 7195 auto gt = cast(ushort[]) jt; 7196 auto gtc = cast(const(ushort)[])jt; 7197 auto gti = cast(immutable(ushort)[])jt; 7198 } 7199 else static if (is(T == dchar[])) 7200 { 7201 auto gt = cast(uint[]) jt; 7202 auto gtc = cast(const(uint)[])jt; 7203 auto gti = cast(immutable(uint)[])jt; 7204 } 7205 7206 auto ht = assumeUTF(gt); 7207 auto htc = assumeUTF(gtc); 7208 auto hti = assumeUTF(gti); 7209 assert(equal(jt, ht)); 7210 assert(equal(jt, htc)); 7211 assert(equal(jt, hti)); 7212 }} 7213 } 7214 7215 pure @system unittest 7216 { 7217 import core.exception : AssertError; 7218 import std.exception : assertThrown, assertNotThrown; 7219 7220 immutable(ubyte)[] a = [ 0xC0 ]; 7221 7222 debug 7223 assertThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () ); 7224 else 7225 assertNotThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () ); 7226 }