/* Regualar expressions package test suite. */ module std.regex.internal.tests; package(std.regex): import std.algorithm, std.conv, std.exception, std.range, std.typecons, std.typetuple, std.regex; import std.regex.internal.parser : Escapables; // characters that need escaping alias Sequence(int B, int E) = staticIota!(B, E); unittest {//sanity checks regex("(a|b)*"); regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`); regex("abc|edf|ighrg"); auto r1 = regex("abc"); auto r2 = regex("(gylba)"); assert(match("abcdef", r1).hit == "abc"); assert(!match("wida",r2)); assert(bmatch("abcdef", r1).hit == "abc"); assert(!bmatch("wida", r2)); assert(match("abc", "abc".dup)); assert(bmatch("abc", "abc".dup)); Regex!char rc; assert(rc.empty); rc = regex("test"); assert(!rc.empty); } /* The test vectors in this file are altered from Henry Spencer's regexp test code. His copyright notice is: Copyright (c) 1986 by University of Toronto. Written by Henry Spencer. Not derived from licensed software. Permission is granted to anyone to use this software for any purpose on any computer system, and to redistribute it freely, subject to the following restrictions: 1. The author is not responsible for the consequences of use of this software, no matter how awful, even if they arise from defects in it. 2. The origin of this software must not be misrepresented, either by explicit claim or by omission. 3. Altered versions must be plainly marked as such, and must not be misrepresented as being the original software. */ unittest { struct TestVectors { string pattern; string input; string result; string format; string replace; string flags; } enum TestVectors[] tv = [ TestVectors( "a\\b", "a", "y", "$&", "a" ), TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ), TestVectors( "()b\\1", "aaab", "y", "$&", "b" ), TestVectors( "abc", "abc", "y", "$&", "abc" ), TestVectors( "abc", "xbc", "n", "-", "-" ), TestVectors( "abc", "axc", "n", "-", "-" ), TestVectors( "abc", "abx", "n", "-", "-" ), TestVectors( "abc", "xabcy","y", "$&", "abc" ), TestVectors( "abc", "ababc","y", "$&", "abc" ), TestVectors( "ab*c", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abc", "y", "$&", "abc" ), TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab+bc", "abc", "n", "-", "-" ), TestVectors( "ab+bc", "abq", "n", "-", "-" ), TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ), TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ), TestVectors( "ab?bc", "abc", "y", "$&", "abc" ), TestVectors( "ab?bc", "abbbbc","n", "-", "-" ), TestVectors( "ab?c", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abc", "y", "$&", "abc" ), TestVectors( "^abc$", "abcc", "n", "-", "-" ), TestVectors( "^abc", "abcc", "y", "$&", "abc" ), TestVectors( "^abc$", "aabc", "n", "-", "-" ), TestVectors( "abc$", "aabc", "y", "$&", "abc" ), TestVectors( "^", "abc", "y", "$&", "" ), TestVectors( "$", "abc", "y", "$&", "" ), TestVectors( "a.c", "abc", "y", "$&", "abc" ), TestVectors( "a.c", "axc", "y", "$&", "axc" ), TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ), TestVectors( "a.*c", "axyzd","n", "-", "-" ), TestVectors( "a[bc]d", "abc", "n", "-", "-" ), TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ), TestVectors( "a[b-d]e", "abd", "n", "-", "-" ), TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ), TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ), TestVectors( "a[-b]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-]", "a-", "y", "$&", "a-" ), TestVectors( "a[b-a]", "-", "c", "-", "-" ), TestVectors( "a[]b", "-", "c", "-", "-" ), TestVectors( "a[", "-", "c", "-", "-" ), TestVectors( "a]", "a]", "y", "$&", "a]" ), TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ), TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ), TestVectors( "a[^bc]d", "abd", "n", "-", "-" ), TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ), TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ), TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ), TestVectors( "ab|cd", "abc", "y", "$&", "ab" ), TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ), TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ), TestVectors( "()*", "-", "y", "-", "-" ), TestVectors( "*a", "-", "c", "-", "-" ), TestVectors( "^*", "-", "y", "-", "-" ), TestVectors( "$*", "-", "y", "-", "-" ), TestVectors( "(*)b", "-", "c", "-", "-" ), TestVectors( "$b", "b", "n", "-", "-" ), TestVectors( "a\\", "-", "c", "-", "-" ), TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ), TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ), TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ), TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ), TestVectors( "abc)", "-", "c", "-", "-" ), TestVectors( "(abc", "-", "c", "-", "-" ), TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ), TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ), TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ), TestVectors( "a**", "-", "c", "-", "-" ), TestVectors( "a*?a", "aa", "y", "$&", "a" ), TestVectors( "(a*)*", "aaa", "y", "-", "-" ), TestVectors( "(a*)+", "aaa", "y", "-", "-" ), TestVectors( "(a|)*", "-", "y", "-", "-" ), TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ), TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ), TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ), TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ), TestVectors( "(^)*", "-", "y", "-", "-" ), TestVectors( "(ab|)*", "-", "y", "-", "-" ), TestVectors( ")(", "-", "c", "-", "-" ), TestVectors( "", "abc", "y", "$&", "" ), TestVectors( "abc", "", "n", "-", "-" ), TestVectors( "a*", "", "y", "$&", "" ), TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ), TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ), TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ), TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ), TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ), TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ), TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ), TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ), TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ), TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ), TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ), TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ), TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ), TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ), TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ), TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ), TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ), TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ), TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ), TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ), TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ), TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ), TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ), TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ), TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ), TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ), TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ), TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ), TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ), TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ), TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ), TestVectors( "[k]", "ab", "n", "-", "-" ), TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ), TestVectors( "a{2}", "candy", "n", "", "" ), TestVectors( "a{2}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "candy", "n", "", "" ), TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ), TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ), TestVectors( "a{1,3}", "cndy", "n", "", "" ), TestVectors( "a{1,3}", "candy", "y", "$&", "a" ), TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ), TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ), TestVectors( "e?le?", "angel", "y", "$&", "el" ), TestVectors( "e?le?", "angle", "y", "$&", "le" ), TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ), TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ), TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ), TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ), TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ), TestVectors( "\\d", "B2 is", "y", "$&", "2" ), TestVectors( "\\D", "B2 is", "y", "$&", "B" ), TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ), TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ), TestVectors( "abc", "ababc", "y", "$&", "abc" ), TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ), TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ), TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ), TestVectors( ".*c", "abcde", "y", "$&", "abc" ), TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ), TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ), TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ), TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ), TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ), TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ), TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ), TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ), TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ), TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"), //more repetitions: TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ), TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ), //groups: TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"), TestVectors( "(?P\\d+)/(?P\\d+)", "2/3", "y", "${d}/${q}", "3/2"), //set operations: TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"), TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"), TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"), TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"), TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"), TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"), //unicode blocks & properties: TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"), TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."), TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"), TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"), TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"), TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"), TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"), TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"), TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"), //case insensitive: TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"), TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"), TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"), TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"), TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"), TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"), TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"), //escapes: TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"), TestVectors( `\u`, "", "c", "-", "-"), TestVectors( `\U`, "", "c", "-", "-"), TestVectors( `\u003`, "", "c", "-", "-"), TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"), TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"), TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"), TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"), TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"), TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"), TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"), TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"), TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"), TestVectors( `\pX`, "", "c", "-", "-"), // ^, $, \b, \B, multiline : TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"), TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"), TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"), TestVectors( `^$`, "\r\n", "y", "$&", "", "m"), TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"), TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"), TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"), TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"), TestVectors( `\b^.`, "ab", "y", "$&", "a"), TestVectors( `\B^.`, "ab", "n", "-", "-"), TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"), TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"), // luckily obtained regression on incremental matching in backtracker TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`, "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"), //lookahead TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ), TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ), TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"), TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"), TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"), TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"), TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"), //lookback TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"), TestVectors( `\w(?"); assert(bmatch("texttext", greed).hit == "text"); auto cr8 = ctRegex!("^(a)(b)?(c*)"); auto m8 = bmatch("abcc",cr8); assert(m8); assert(m8.captures[1] == "a"); assert(m8.captures[2] == "b"); assert(m8.captures[3] == "cc"); auto cr9 = ctRegex!("q(a|b)*q"); auto m9 = match("xxqababqyy",cr9); assert(m9); assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); auto rtr = regex("a|b|c"); enum ctr = regex("a|b|c"); assert(equal(rtr.ir,ctr.ir)); //CTFE parser BUG is triggered by group //in the middle of alternation (at least not first and not last) enum testCT = regex(`abc|(edf)|xyz`); auto testRT = regex(`abc|(edf)|xyz`); assert(equal(testCT.ir,testRT.ir)); } unittest { enum cx = ctRegex!"(A|B|C)"; auto mx = match("B",cx); assert(mx); assert(equal(mx.captures, [ "B", "B"])); enum cx2 = ctRegex!"(A|B)*"; assert(match("BAAA",cx2)); enum cx3 = ctRegex!("a{3,4}","i"); auto mx3 = match("AaA",cx3); assert(mx3); assert(mx3.captures[0] == "AaA"); enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); auto mx4 = match("aaaabc", cx4); assert(mx4); assert(mx4.captures[0] == "aaaab"); auto cr8 = ctRegex!("(a)(b)?(c*)"); auto m8 = bmatch("abcc",cr8); assert(m8); assert(m8.captures[1] == "a"); assert(m8.captures[2] == "b"); assert(m8.captures[3] == "cc"); auto cr9 = ctRegex!(".*$", "gm"); auto m9 = match("First\rSecond", cr9); assert(m9); assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); } unittest { //global matching void test_body(alias matchFn)() { string s = "a quick brown fox jumps over a lazy dog"; auto r1 = regex("\\b[a-z]+\\b","g"); string[] test; foreach(m; matchFn(s, r1)) test ~= m.hit; assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); auto free_reg = regex(` abc \s+ " ( [^"]+ | \\ " )+ " z `, "x"); auto m = match(`abc "quoted string with \" inside"z`,free_reg); assert(m); string mails = " hey@you.com no@spam.net "; auto rm = regex(`@(?<=\S+@)\S+`,"g"); assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); } test_body!bmatch(); test_body!match(); } //tests for accumulated std.regex issues and other regressions unittest { void test_body(alias matchFn)() { //issue 5857 //matching goes out of control if ... in (...){x} has .*/.+ auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; assert(c[0] == "axxxzayyyyyzd"); assert(c[1] == "ayyyyyz"); auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; assert(c2[0] == "axxxayyyyyd"); assert(c2[1] == "ayyyyy"); //issue 2108 //greedy vs non-greedy auto nogreed = regex(""); assert(matchFn("texttext", nogreed).hit == "text"); auto greed = regex(""); assert(matchFn("texttext", greed).hit == "texttext"); //issue 4574 //empty successful match still advances the input string[] pres, posts, hits; foreach(m; matchFn("abcabc", regex("","g"))) { pres ~= m.pre; posts ~= m.post; assert(m.hit.empty); } auto heads = [ "abcabc", "abcab", "abca", "abc", "ab", "a", "" ]; auto tails = [ "abcabc", "bcabc", "cabc", "abc", "bc", "c", "" ]; assert(pres == array(retro(heads))); assert(posts == tails); //issue 6076 //regression on .* auto re = regex("c.*|d"); auto m = matchFn("mm", re); assert(!m); debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); auto rprealloc = regex(`((.){5}.{1,10}){5}`); auto arr = array(repeat('0',100)); auto m2 = matchFn(arr, rprealloc); assert(m2); assert(collectException( regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") ) is null); foreach(ch; [Escapables]) { assert(match(to!string(ch),regex(`[\`~ch~`]`))); assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); } //bugzilla 7718 string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; auto reStrCmd = regex (`(".*")|('.*')`, "g"); assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); } test_body!bmatch(); test_body!match(); } // tests for replace unittest { void test(alias matchFn)() { import std.uni : toUpper; foreach(i, v; TypeTuple!(string, wstring, dstring)) { auto baz(Cap)(Cap m) if (is(Cap == Captures!(Cap.String))) { return toUpper(m.hit); } alias String = v; assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) == to!String("ack rapacity")); assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) == to!String("ack capacity")); assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) == to!String("[n]oon")); assert(std.regex.replace!(matchFn)(to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")) == to!String(": test2 test1 :")); auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), regex(to!String("[ar]"), "g")); assert(s == "StRAp A Rocket engine on A chicken."); } debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); } test!(bmatch)(); test!(match)(); } // tests for splitter unittest { auto s1 = ", abc, de, fg, hi, "; auto sp1 = splitter(s1, regex(", *")); auto w1 = ["", "abc", "de", "fg", "hi", ""]; assert(equal(sp1, w1)); auto s2 = ", abc, de, fg, hi"; auto sp2 = splitter(s2, regex(", *")); auto w2 = ["", "abc", "de", "fg", "hi"]; uint cnt; foreach(e; sp2) { assert(w2[cnt++] == e); } assert(equal(sp2, w2)); } unittest { char[] s1 = ", abc, de, fg, hi, ".dup; auto sp2 = splitter(s1, regex(", *")); } unittest { auto s1 = ", abc, de, fg, hi, "; auto w1 = ["", "abc", "de", "fg", "hi", ""]; assert(equal(split(s1, regex(", *")), w1[])); } unittest { // bugzilla 7141 string pattern = `[a\--b]`; assert(match("-", pattern)); assert(match("b", pattern)); string pattern2 = `[&-z]`; assert(match("b", pattern2)); } unittest {//bugzilla 7111 assert(match("", regex("^"))); } unittest {//bugzilla 7300 assert(!match("a"d, "aa"d)); } unittest {//bugzilla 7674 assert("1234".replace(regex("^"), "$$") == "$1234"); assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); } unittest {// bugzilla 7679 foreach(S; TypeTuple!(string, wstring, dstring)) { enum re = ctRegex!(to!S(r"\.")); auto str = to!S("a.b"); assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); assert(split(str, re) == [to!S("a"), to!S("b")]); } } unittest {//bugzilla 8203 string data = " NAME = XPAW01_STA:STATION NAME = XPAW01_STA "; auto uniFileOld = data; auto r = regex( r"^NAME = (?P[a-zA-Z0-9_]+):*(?P[a-zA-Z0-9_]*)","gm"); auto uniCapturesNew = match(uniFileOld, r); for(int i = 0; i < 20; i++) foreach (matchNew; uniCapturesNew) {} //a second issue with same symptoms auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); match("аллея Театральная", r2); } unittest {// bugzilla 8637 purity of enforce auto m = match("hello world", regex("world")); enforce(m); } // bugzilla 8725 unittest { static italic = regex( r"\* (?!\s+) (.*?) (?!\s+) \*", "gx" ); string input = "this * is* interesting, *very* interesting"; assert(replace(input, italic, "$1") == "this * is* interesting, very interesting"); } // bugzilla 8349 unittest { enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)"; enum peakRegex = ctRegex!(peakRegexStr); //note that the regex pattern itself is probably bogus assert(match(r"\>wgEncode-blah-Tfbs.narrow", peakRegex)); } // bugzilla 9211 unittest { auto rx_1 = regex(r"^(\w)*(\d)"); auto m = match("1234", rx_1); assert(equal(m.front, ["1234", "3", "4"])); auto rx_2 = regex(r"^([0-9])*(\d)"); auto m2 = match("1234", rx_2); assert(equal(m2.front, ["1234", "3", "4"])); } // bugzilla 9280 unittest { string tomatch = "a!b@c"; static r = regex(r"^(?P.*?)!(?P.*?)@(?P.*?)$"); auto nm = match(tomatch, r); assert(nm); auto c = nm.captures; assert(c[1] == "a"); assert(c["nick"] == "a"); } // bugzilla 9579 unittest { char[] input = ['a', 'b', 'c']; string format = "($1)"; // used to give a compile error: auto re = regex(`(a)`, "g"); auto r = replace(input, re, format); assert(r == "(a)bc"); } // bugzilla 9634 unittest { auto re = ctRegex!"(?:a+)"; assert(match("aaaa", re).hit == "aaaa"); } //bugzilla 10798 unittest { auto cr = ctRegex!("[abcd--c]*"); auto m = "abc".match(cr); assert(m); assert(m.hit == "ab"); } // bugzilla 10913 unittest { @system static string foo(const(char)[] s) { return s.dup; } @safe static string bar(const(char)[] s) { return s.dup; } () @system { replace!((a) => foo(a.hit))("blah", regex(`a`)); }(); () @safe { replace!((a) => bar(a.hit))("blah", regex(`a`)); }(); } // bugzilla 11262 unittest { enum reg = ctRegex!(r",", "g"); auto str = "This,List"; str = str.replace(reg, "-"); assert(str == "This-List"); } // bugzilla 11775 unittest { assert(collectException(regex("a{1,0}"))); } // bugzilla 11839 unittest { assert(regex(`(?P\w+)`).namedCaptures.equal(["var1"])); assert(collectException(regex(`(?P<1>\w+)`))); assert(regex(`(?P\w+)`).namedCaptures.equal(["v1"])); assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); } // bugzilla 12076 unittest { auto RE = ctRegex!(r"(?abc)`); assert(collectException("abc".matchFirst(r)["b"])); } // bugzilla 12691 unittest { assert(bmatch("e@", "^([a-z]|)*$").empty); assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); } //bugzilla 12713 unittest { assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); } //bugzilla 12747 unittest { assertThrown(regex(`^x(\1)`)); assertThrown(regex(`^(x(\1))`)); assertThrown(regex(`^((x)(?=\1))`)); }