diff --git a/posix.mak b/posix.mak index d0576574a..0e41d01a3 100644 --- a/posix.mak +++ b/posix.mak @@ -225,7 +225,7 @@ PACKAGE_std_experimental_allocator_building_blocks = \ PACKAGE_std_net = curl isemail PACKAGE_std_range = interfaces package primitives PACKAGE_std_regex = package $(addprefix internal/,generator ir parser \ - backtracking tests thompson kickstart) + backtracking tests tests2 thompson kickstart) # Modules in std (including those in packages) STD_MODULES=$(call P2MODULES,$(STD_PACKAGES)) diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d index cce8edf2b..3153ebd0a 100644 --- a/std/regex/internal/tests.d +++ b/std/regex/internal/tests.d @@ -465,684 +465,3 @@ import std.uni : Escapables; // characters that need escaping run_tests!match(); //thompson VM } -@safe unittest -{ - auto cr = ctRegex!("abc"); - assert(bmatch("abc",cr).hit == "abc"); - auto cr2 = ctRegex!("ab*c"); - assert(bmatch("abbbbc",cr2).hit == "abbbbc"); -} -@safe unittest -{ - auto cr3 = ctRegex!("^abc$"); - assert(bmatch("abc",cr3).hit == "abc"); - auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); - assert(array(match("azb",cr4).captures) == ["azb", "azb"]); -} - -@safe unittest -{ - auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); - assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); - auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); - assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); -} - -@safe unittest -{ - auto cr7 = ctRegex!(`\r.*?$`,"sm"); - assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); - auto greed = ctRegex!(""); - assert(bmatch("texttext", greed).hit - == "text"); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto cr8 = ctRegex!("^(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!("q(a|b)*q"); - auto m9 = match("xxqababqyy",cr9); - assert(m9); - assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto rtr = regex("a|b|c"); - static ctr = regex("a|b|c"); - assert(equal(rtr.ir,ctr.ir)); - //CTFE parser BUG is triggered by group - //in the middle of alternation (at least not first and not last) - static testCT = regex(`abc|(edf)|xyz`); - auto testRT = regex(`abc|(edf)|xyz`); - assert(equal(testCT.ir,testRT.ir)); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; - enum cx = ctRegex!"(A|B|C)"; - auto mx = match("B",cx); - assert(mx); - assert(equal(mx.captures, [ "B", "B"])); - enum cx2 = ctRegex!"(A|B)*"; - assert(match("BAAA",cx2)); - - enum cx3 = ctRegex!("a{3,4}","i"); - auto mx3 = match("AaA",cx3); - assert(mx3); - assert(mx3.captures[0] == "AaA"); - enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); - auto mx4 = match("aaaabc", cx4); - assert(mx4); - assert(mx4.captures[0] == "aaaab"); - auto cr8 = ctRegex!("(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!(".*$", "gm"); - auto m9 = match("First\rSecond", cr9); - assert(m9); - assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; -//global matching - void test_body(alias matchFn)() - { - string s = "a quick brown fox jumps over a lazy dog"; - auto r1 = regex("\\b[a-z]+\\b","g"); - string[] test; - foreach (m; matchFn(s, r1)) - test ~= m.hit; - assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); - auto free_reg = regex(` - - abc - \s+ - " - ( - [^"]+ - | \\ " - )+ - " - z - `, "x"); - auto m = match(`abc "quoted string with \" inside"z`,free_reg); - assert(m); - string mails = " hey@you.com no@spam.net "; - auto rm = regex(`@(?<=\S+@)\S+`,"g"); - assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); - auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); - assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); - auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); - assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); - auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); - assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); - debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); - } - test_body!bmatch(); - test_body!match(); -} - -//tests for accumulated std.regex issues and other regressions -@safe unittest -{ - import std.algorithm.comparison : equal; - import std.algorithm.iteration : map; - void test_body(alias matchFn)() - { - //issue 5857 - //matching goes out of control if ... in (...){x} has .*/.+ - auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; - assert(c[0] == "axxxzayyyyyzd"); - assert(c[1] == "ayyyyyz"); - auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; - assert(c2[0] == "axxxayyyyyd"); - assert(c2[1] == "ayyyyy"); - //issue 2108 - //greedy vs non-greedy - auto nogreed = regex(""); - assert(matchFn("texttext", nogreed).hit - == "text"); - auto greed = regex(""); - assert(matchFn("texttext", greed).hit - == "texttext"); - //issue 4574 - //empty successful match still advances the input - string[] pres, posts, hits; - foreach (m; matchFn("abcabc", regex("","g"))) - { - pres ~= m.pre; - posts ~= m.post; - assert(m.hit.empty); - - } - auto heads = [ - "abcabc", - "abcab", - "abca", - "abc", - "ab", - "a", - "" - ]; - auto tails = [ - "abcabc", - "bcabc", - "cabc", - "abc", - "bc", - "c", - "" - ]; - assert(pres == array(retro(heads))); - assert(posts == tails); - //issue 6076 - //regression on .* - auto re = regex("c.*|d"); - auto m = matchFn("mm", re); - assert(!m); - debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); - auto rprealloc = regex(`((.){5}.{1,10}){5}`); - auto arr = array(repeat('0',100)); - auto m2 = matchFn(arr, rprealloc); - assert(m2); - assert(collectException( - regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") - ) is null); - foreach (ch; [Escapables]) - { - assert(match(to!string(ch),regex(`[\`~ch~`]`))); - assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); - assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); - } - //bugzilla 7718 - string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; - auto reStrCmd = regex (`(".*")|('.*')`, "g"); - assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), - [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); - } - test_body!bmatch(); - test_body!match(); -} - -// tests for replace -@safe unittest -{ - void test(alias matchFn)() - { - import std.uni : toUpper; - - static foreach (i, v; AliasSeq!(string, wstring, dstring)) - {{ - auto baz(Cap)(Cap m) - if (is(Cap == Captures!(Cap.String))) - { - return toUpper(m.hit); - } - alias String = v; - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) - == to!String("ack rapacity")); - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) - == to!String("ack capacity")); - assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) - == to!String("[n]oon")); - assert(std.regex.replace!(matchFn)( - to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") - ) == to!String(": test2 test1 :")); - auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), - regex(to!String("[ar]"), "g")); - assert(s == "StRAp A Rocket engine on A chicken."); - }} - debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); - } - test!(bmatch)(); - test!(match)(); -} - -// tests for splitter -@safe unittest -{ - import std.algorithm.comparison : equal; - auto s1 = ", abc, de, fg, hi, "; - auto sp1 = splitter(s1, regex(", *")); - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(sp1, w1)); - - auto s2 = ", abc, de, fg, hi"; - auto sp2 = splitter(s2, regex(", *")); - auto w2 = ["", "abc", "de", "fg", "hi"]; - - uint cnt; - foreach (e; sp2) - { - assert(w2[cnt++] == e); - } - assert(equal(sp2, w2)); -} - -@safe unittest -{ - char[] s1 = ", abc, de, fg, hi, ".dup; - auto sp2 = splitter(s1, regex(", *")); -} - -@safe unittest -{ - import std.algorithm.comparison : equal; - auto s1 = ", abc, de, fg, hi, "; - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(split(s1, regex(", *")), w1[])); -} - -@safe unittest -{ // bugzilla 7141 - string pattern = `[a\--b]`; - assert(match("-", pattern)); - assert(match("b", pattern)); - string pattern2 = `[&-z]`; - assert(match("b", pattern2)); -} -@safe unittest -{//bugzilla 7111 - assert(match("", regex("^"))); -} -@safe unittest -{//bugzilla 7300 - assert(!match("a"d, "aa"d)); -} - -// bugzilla 7551 -@safe unittest -{ - auto r = regex("[]abc]*"); - assert("]ab".matchFirst(r).hit == "]ab"); - assertThrown(regex("[]")); - auto r2 = regex("[]abc--ab]*"); - assert("]ac".matchFirst(r2).hit == "]"); -} - -@safe unittest -{//bugzilla 7674 - assert("1234".replace(regex("^"), "$$") == "$1234"); - assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); - assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); -} -@safe unittest -{// bugzilla 7679 - import std.algorithm.comparison : equal; - static foreach (S; AliasSeq!(string, wstring, dstring)) - {{ - enum re = ctRegex!(to!S(r"\.")); - auto str = to!S("a.b"); - assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); - assert(split(str, re) == [to!S("a"), to!S("b")]); - }} -} -@safe unittest -{//bugzilla 8203 - string data = " - NAME = XPAW01_STA:STATION - NAME = XPAW01_STA - "; - auto uniFileOld = data; - auto r = regex( - r"^NAME = (?P[a-zA-Z0-9_]+):*(?P[a-zA-Z0-9_]*)","gm"); - auto uniCapturesNew = match(uniFileOld, r); - for (int i = 0; i < 20; i++) - foreach (matchNew; uniCapturesNew) {} - //a second issue with same symptoms - auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); - match("аллея Театральная", r2); -} -@safe unittest -{// bugzilla 8637 purity of enforce - auto m = match("hello world", regex("world")); - enforce(m); -} - -// bugzilla 8725 -@safe unittest -{ - static italic = regex( r"\* - (?!\s+) - (.*?) - (?!\s+) - \*", "gx" ); - string input = "this * is* interesting, *very* interesting"; - assert(replace(input, italic, "$1") == - "this * is* interesting, very interesting"); -} - -// bugzilla 8349 -@safe unittest -{ - enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)"; - enum peakRegex = ctRegex!(peakRegexStr); - //note that the regex pattern itself is probably bogus - assert(match(r"\>wgEncode-blah-Tfbs.narrow", peakRegex)); -} - -// bugzilla 9211 -@safe unittest -{ - import std.algorithm.comparison : equal; - auto rx_1 = regex(r"^(\w)*(\d)"); - auto m = match("1234", rx_1); - assert(equal(m.front, ["1234", "3", "4"])); - auto rx_2 = regex(r"^([0-9])*(\d)"); - auto m2 = match("1234", rx_2); - assert(equal(m2.front, ["1234", "3", "4"])); -} - -// bugzilla 9280 -@safe unittest -{ - string tomatch = "a!b@c"; - static r = regex(r"^(?P.*?)!(?P.*?)@(?P.*?)$"); - auto nm = match(tomatch, r); - assert(nm); - auto c = nm.captures; - assert(c[1] == "a"); - assert(c["nick"] == "a"); -} - - -// bugzilla 9579 -@safe unittest -{ - char[] input = ['a', 'b', 'c']; - string format = "($1)"; - // used to give a compile error: - auto re = regex(`(a)`, "g"); - auto r = replace(input, re, format); - assert(r == "(a)bc"); -} - -// bugzilla 9634 -@safe unittest -{ - auto re = ctRegex!"(?:a+)"; - assert(match("aaaa", re).hit == "aaaa"); -} - -//bugzilla 10798 -@safe unittest -{ - auto cr = ctRegex!("[abcd--c]*"); - auto m = "abc".match(cr); - assert(m); - assert(m.hit == "ab"); -} - -// bugzilla 10913 -@system unittest -{ - @system static string foo(const(char)[] s) - { - return s.dup; - } - @safe static string bar(const(char)[] s) - { - return s.dup; - } - () @system { - replace!((a) => foo(a.hit))("blah", regex(`a`)); - }(); - () @safe { - replace!((a) => bar(a.hit))("blah", regex(`a`)); - }(); -} - -// bugzilla 11262 -@safe unittest -{ - enum reg = ctRegex!(r",", "g"); - auto str = "This,List"; - str = str.replace(reg, "-"); - assert(str == "This-List"); -} - -// bugzilla 11775 -@safe unittest -{ - assert(collectException(regex("a{1,0}"))); -} - -// bugzilla 11839 -@safe unittest -{ - import std.algorithm.comparison : equal; - assert(regex(`(?P\w+)`).namedCaptures.equal(["var1"])); - assert(collectException(regex(`(?P<1>\w+)`))); - assert(regex(`(?P\w+)`).namedCaptures.equal(["v1"])); - assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); - assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); -} - -// bugzilla 12076 -@safe unittest -{ - auto RE = ctRegex!(r"(?abc)`); - assert(collectException("abc".matchFirst(r)["b"])); -} - -// bugzilla 12691 -@safe unittest -{ - assert(bmatch("e@", "^([a-z]|)*$").empty); - assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); -} - -//bugzilla 12713 -@safe unittest -{ - assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); -} - -//bugzilla 12747 -@safe unittest -{ - assertThrown(regex(`^x(\1)`)); - assertThrown(regex(`^(x(\1))`)); - assertThrown(regex(`^((x)(?=\1))`)); -} - -// bugzilla 13532 -version(none) // TODO: revist once we have proper benchmark framework -@safe unittest -{ - import std.datetime.stopwatch : StopWatch, AutoStart; - import std.math : abs; - import std.conv : to; - enum re1 = ctRegex!`[0-9][0-9]`; - immutable static re2 = ctRegex!`[0-9][0-9]`; - immutable iterations = 1_000_000; - size_t result1 = 0, result2 = 0; - auto sw = StopWatch(AutoStart.yes); - foreach (_; 0 .. iterations) - { - result1 += matchFirst("12345678", re1).length; - } - const staticTime = sw.peek(); - sw.reset(); - foreach (_; 0 .. iterations) - { - result2 += matchFirst("12345678", re2).length; - } - const enumTime = sw.peek(); - assert(result1 == result2); - auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs"; - // enum is faster or the diff is less < 30% - assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75, - "enum regex to static regex ratio "~to!string(ratio)); -} - -// bugzilla 14504 -@safe unittest -{ - auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); -} - -// bugzilla 14529 -@safe unittest -{ - auto ctPat2 = regex(r"^[CDF]$", "i"); - foreach (v; ["C", "c", "D", "d", "F", "f"]) - assert(matchAll(v, ctPat2).front.hit == v); -} - -// bugzilla 14615 -@safe unittest -{ - import std.array : appender; - import std.regex : replaceFirst, replaceFirstInto, regex; - import std.stdio : writeln; - - auto example = "Hello, world!"; - auto pattern = regex("^Hello, (bug)"); // won't find this one - auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); - assert(result == "Hello, world!"); // Ok. - - auto sink = appender!string; - replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!"); - replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!Hello, world!"); -} - -// bugzilla 15573 -@safe unittest -{ - auto rx = regex("[c d]", "x"); - assert("a b".matchFirst(rx)); -} - -// bugzilla 15864 -@safe unittest -{ - regex(`((.+)`; - static titleRegex = ctRegex!titlePattern; - string input = "" ~ "<".repeat(100_000).join; - assert(input.matchFirst(titleRegex).empty); -} - -// bugzilla 17212 -@safe unittest -{ - auto r = regex(" [a] ", "x"); - assert("a".matchFirst(r)); -} - -// bugzilla 17157 -@safe unittest -{ - import std.algorithm.comparison : equal; - auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; - auto r = regex("(a)|(b)|(c)|(d)", "g"); - auto s = "--a--b--c--d--"; - auto outcomes = [ - ["a", "a", "", "", ""], - ["b", "", "b", "", ""], - ["c", "", "", "c", ""], - ["d", "", "", "", "d"] - ]; - assert(equal!equal(s.matchAll(ctr), outcomes)); - assert(equal!equal(s.bmatch(r), outcomes)); -} - -// bugzilla 17667 -@safe unittest -{ - import std.algorithm.searching : canFind; - void willThrow(T, size_t line = __LINE__)(T arg, string msg) - { - auto e = collectException(regex(arg)); - assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); - } - willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); - willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); - willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); - willThrow([r"[a-\", r"123"], "no matching ']' found while parsing character class"); - willThrow([r"\", r"123"], "invalid escape sequence"); -} - -// bugzilla 17668 -@safe unittest -{ - import std.algorithm.searching; - auto e = collectException!RegexException(regex(q"<[^]>")); - assert(e.msg.canFind("no operand for '^'"), e.msg); -} - -// bugzilla 17673 -@safe unittest -{ - string str = `<">`; - string[] regexps = ["abc", "\"|x"]; - auto regexp = regex(regexps); - auto c = matchFirst(str, regexp); - assert(c); - assert(c.whichPattern == 2); -} - diff --git a/std/regex/internal/tests2.d b/std/regex/internal/tests2.d new file mode 100644 index 000000000..61d2d88fb --- /dev/null +++ b/std/regex/internal/tests2.d @@ -0,0 +1,691 @@ +// Split-up due to DMD's enormous memory consumption + +module std.regex.internal.tests2; + +package(std.regex): + +import std.conv, std.exception, std.meta, std.range, + std.typecons, std.regex; + +import std.uni : Escapables; // characters that need escaping + +@safe unittest +{ + auto cr = ctRegex!("abc"); + assert(bmatch("abc",cr).hit == "abc"); + auto cr2 = ctRegex!("ab*c"); + assert(bmatch("abbbbc",cr2).hit == "abbbbc"); +} +@safe unittest +{ + auto cr3 = ctRegex!("^abc$"); + assert(bmatch("abc",cr3).hit == "abc"); + auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); + assert(array(match("azb",cr4).captures) == ["azb", "azb"]); +} + +@safe unittest +{ + auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); + assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); + auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); + assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); +} + +@safe unittest +{ + auto cr7 = ctRegex!(`\r.*?$`,"sm"); + assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); + auto greed = ctRegex!("<packet.*?/packet>"); + assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit + == "<packet>text</packet>"); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto cr8 = ctRegex!("^(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!("q(a|b)*q"); + auto m9 = match("xxqababqyy",cr9); + assert(m9); + assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto rtr = regex("a|b|c"); + static ctr = regex("a|b|c"); + assert(equal(rtr.ir,ctr.ir)); + //CTFE parser BUG is triggered by group + //in the middle of alternation (at least not first and not last) + static testCT = regex(`abc|(edf)|xyz`); + auto testRT = regex(`abc|(edf)|xyz`); + assert(equal(testCT.ir,testRT.ir)); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; + enum cx = ctRegex!"(A|B|C)"; + auto mx = match("B",cx); + assert(mx); + assert(equal(mx.captures, [ "B", "B"])); + enum cx2 = ctRegex!"(A|B)*"; + assert(match("BAAA",cx2)); + + enum cx3 = ctRegex!("a{3,4}","i"); + auto mx3 = match("AaA",cx3); + assert(mx3); + assert(mx3.captures[0] == "AaA"); + enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); + auto mx4 = match("aaaabc", cx4); + assert(mx4); + assert(mx4.captures[0] == "aaaab"); + auto cr8 = ctRegex!("(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!(".*$", "gm"); + auto m9 = match("First\rSecond", cr9); + assert(m9); + assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; +//global matching + void test_body(alias matchFn)() + { + string s = "a quick brown fox jumps over a lazy dog"; + auto r1 = regex("\\b[a-z]+\\b","g"); + string[] test; + foreach (m; matchFn(s, r1)) + test ~= m.hit; + assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); + auto free_reg = regex(` + + abc + \s+ + " + ( + [^"]+ + | \\ " + )+ + " + z + `, "x"); + auto m = match(`abc "quoted string with \" inside"z`,free_reg); + assert(m); + string mails = " hey@you.com no@spam.net "; + auto rm = regex(`@(?<=\S+@)\S+`,"g"); + assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); + auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); + assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); + auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); + assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); + auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); + assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); + debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); + } + test_body!bmatch(); + test_body!match(); +} + +//tests for accumulated std.regex issues and other regressions +@safe unittest +{ + import std.algorithm.comparison : equal; + import std.algorithm.iteration : map; + void test_body(alias matchFn)() + { + //issue 5857 + //matching goes out of control if ... in (...){x} has .*/.+ + auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; + assert(c[0] == "axxxzayyyyyzd"); + assert(c[1] == "ayyyyyz"); + auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; + assert(c2[0] == "axxxayyyyyd"); + assert(c2[1] == "ayyyyy"); + //issue 2108 + //greedy vs non-greedy + auto nogreed = regex("<packet.*?/packet>"); + assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit + == "<packet>text</packet>"); + auto greed = regex("<packet.*/packet>"); + assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit + == "<packet>text</packet><packet>text</packet>"); + //issue 4574 + //empty successful match still advances the input + string[] pres, posts, hits; + foreach (m; matchFn("abcabc", regex("","g"))) + { + pres ~= m.pre; + posts ~= m.post; + assert(m.hit.empty); + + } + auto heads = [ + "abcabc", + "abcab", + "abca", + "abc", + "ab", + "a", + "" + ]; + auto tails = [ + "abcabc", + "bcabc", + "cabc", + "abc", + "bc", + "c", + "" + ]; + assert(pres == array(retro(heads))); + assert(posts == tails); + //issue 6076 + //regression on .* + auto re = regex("c.*|d"); + auto m = matchFn("mm", re); + assert(!m); + debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); + auto rprealloc = regex(`((.){5}.{1,10}){5}`); + auto arr = array(repeat('0',100)); + auto m2 = matchFn(arr, rprealloc); + assert(m2); + assert(collectException( + regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") + ) is null); + foreach (ch; [Escapables]) + { + assert(match(to!string(ch),regex(`[\`~ch~`]`))); + assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); + assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); + } + //bugzilla 7718 + string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; + auto reStrCmd = regex (`(".*")|('.*')`, "g"); + assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), + [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); + } + test_body!bmatch(); + test_body!match(); +} + +// tests for replace +@safe unittest +{ + void test(alias matchFn)() + { + import std.uni : toUpper; + + static foreach (i, v; AliasSeq!(string, wstring, dstring)) + {{ + auto baz(Cap)(Cap m) + if (is(Cap == Captures!(Cap.String))) + { + return toUpper(m.hit); + } + alias String = v; + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) + == to!String("ack rapacity")); + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) + == to!String("ack capacity")); + assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) + == to!String("[n]oon")); + assert(std.regex.replace!(matchFn)( + to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") + ) == to!String(": test2 test1 :")); + auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), + regex(to!String("[ar]"), "g")); + assert(s == "StRAp A Rocket engine on A chicken."); + }} + debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); + } + test!(bmatch)(); + test!(match)(); +} + +// tests for splitter +@safe unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto sp1 = splitter(s1, regex(", *")); + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(sp1, w1)); + + auto s2 = ", abc, de, fg, hi"; + auto sp2 = splitter(s2, regex(", *")); + auto w2 = ["", "abc", "de", "fg", "hi"]; + + uint cnt; + foreach (e; sp2) + { + assert(w2[cnt++] == e); + } + assert(equal(sp2, w2)); +} + +@safe unittest +{ + char[] s1 = ", abc, de, fg, hi, ".dup; + auto sp2 = splitter(s1, regex(", *")); +} + +@safe unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(split(s1, regex(", *")), w1[])); +} + +@safe unittest +{ // bugzilla 7141 + string pattern = `[a\--b]`; + assert(match("-", pattern)); + assert(match("b", pattern)); + string pattern2 = `[&-z]`; + assert(match("b", pattern2)); +} +@safe unittest +{//bugzilla 7111 + assert(match("", regex("^"))); +} +@safe unittest +{//bugzilla 7300 + assert(!match("a"d, "aa"d)); +} + +// bugzilla 7551 +@safe unittest +{ + auto r = regex("[]abc]*"); + assert("]ab".matchFirst(r).hit == "]ab"); + assertThrown(regex("[]")); + auto r2 = regex("[]abc--ab]*"); + assert("]ac".matchFirst(r2).hit == "]"); +} + +@safe unittest +{//bugzilla 7674 + assert("1234".replace(regex("^"), "$$") == "$1234"); + assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); + assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); +} +@safe unittest +{// bugzilla 7679 + import std.algorithm.comparison : equal; + static foreach (S; AliasSeq!(string, wstring, dstring)) + {{ + enum re = ctRegex!(to!S(r"\.")); + auto str = to!S("a.b"); + assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); + assert(split(str, re) == [to!S("a"), to!S("b")]); + }} +} +@safe unittest +{//bugzilla 8203 + string data = " + NAME = XPAW01_STA:STATION + NAME = XPAW01_STA + "; + auto uniFileOld = data; + auto r = regex( + r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); + auto uniCapturesNew = match(uniFileOld, r); + for (int i = 0; i < 20; i++) + foreach (matchNew; uniCapturesNew) {} + //a second issue with same symptoms + auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); + match("аллея Театральная", r2); +} +@safe unittest +{// bugzilla 8637 purity of enforce + auto m = match("hello world", regex("world")); + enforce(m); +} + +// bugzilla 8725 +@safe unittest +{ + static italic = regex( r"\* + (?!\s+) + (.*?) + (?!\s+) + \*", "gx" ); + string input = "this * is* interesting, *very* interesting"; + assert(replace(input, italic, "<i>$1</i>") == + "this * is* interesting, <i>very</i> interesting"); +} + +// bugzilla 8349 +@safe unittest +{ + enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; + enum peakRegex = ctRegex!(peakRegexStr); + //note that the regex pattern itself is probably bogus + assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); +} + +// bugzilla 9211 +@safe unittest +{ + import std.algorithm.comparison : equal; + auto rx_1 = regex(r"^(\w)*(\d)"); + auto m = match("1234", rx_1); + assert(equal(m.front, ["1234", "3", "4"])); + auto rx_2 = regex(r"^([0-9])*(\d)"); + auto m2 = match("1234", rx_2); + assert(equal(m2.front, ["1234", "3", "4"])); +} + +// bugzilla 9280 +@safe unittest +{ + string tomatch = "a!b@c"; + static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); + auto nm = match(tomatch, r); + assert(nm); + auto c = nm.captures; + assert(c[1] == "a"); + assert(c["nick"] == "a"); +} + + +// bugzilla 9579 +@safe unittest +{ + char[] input = ['a', 'b', 'c']; + string format = "($1)"; + // used to give a compile error: + auto re = regex(`(a)`, "g"); + auto r = replace(input, re, format); + assert(r == "(a)bc"); +} + +// bugzilla 9634 +@safe unittest +{ + auto re = ctRegex!"(?:a+)"; + assert(match("aaaa", re).hit == "aaaa"); +} + +//bugzilla 10798 +@safe unittest +{ + auto cr = ctRegex!("[abcd--c]*"); + auto m = "abc".match(cr); + assert(m); + assert(m.hit == "ab"); +} + +// bugzilla 10913 +@system unittest +{ + @system static string foo(const(char)[] s) + { + return s.dup; + } + @safe static string bar(const(char)[] s) + { + return s.dup; + } + () @system { + replace!((a) => foo(a.hit))("blah", regex(`a`)); + }(); + () @safe { + replace!((a) => bar(a.hit))("blah", regex(`a`)); + }(); +} + +// bugzilla 11262 +@safe unittest +{ + enum reg = ctRegex!(r",", "g"); + auto str = "This,List"; + str = str.replace(reg, "-"); + assert(str == "This-List"); +} + +// bugzilla 11775 +@safe unittest +{ + assert(collectException(regex("a{1,0}"))); +} + +// bugzilla 11839 +@safe unittest +{ + import std.algorithm.comparison : equal; + assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); + assert(collectException(regex(`(?P<1>\w+)`))); + assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); + assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); + assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); +} + +// bugzilla 12076 +@safe unittest +{ + auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); + string s = "one two"; + auto m = match(s, RE); +} + +// bugzilla 12105 +@safe unittest +{ + auto r = ctRegex!`.*?(?!a)`; + assert("aaab".matchFirst(r).hit == "aaa"); + auto r2 = ctRegex!`.*(?!a)`; + assert("aaab".matchFirst(r2).hit == "aaab"); +} + +//bugzilla 11784 +@safe unittest +{ + assert("abcdefghijklmnopqrstuvwxyz" + .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); +} + +//bugzilla 12366 +@safe unittest +{ + auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); + assert("xxxxxxxx".match(re).empty); + assert(!"xxxx".match(re).empty); +} + +// bugzilla 12582 +@safe unittest +{ + auto r = regex(`(?P<a>abc)`); + assert(collectException("abc".matchFirst(r)["b"])); +} + +// bugzilla 12691 +@safe unittest +{ + assert(bmatch("e@", "^([a-z]|)*$").empty); + assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); +} + +//bugzilla 12713 +@safe unittest +{ + assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); +} + +//bugzilla 12747 +@safe unittest +{ + assertThrown(regex(`^x(\1)`)); + assertThrown(regex(`^(x(\1))`)); + assertThrown(regex(`^((x)(?=\1))`)); +} + +// bugzilla 13532 +version(none) // TODO: revist once we have proper benchmark framework +@safe unittest +{ + import std.datetime.stopwatch : StopWatch, AutoStart; + import std.math : abs; + import std.conv : to; + enum re1 = ctRegex!`[0-9][0-9]`; + immutable static re2 = ctRegex!`[0-9][0-9]`; + immutable iterations = 1_000_000; + size_t result1 = 0, result2 = 0; + auto sw = StopWatch(AutoStart.yes); + foreach (_; 0 .. iterations) + { + result1 += matchFirst("12345678", re1).length; + } + const staticTime = sw.peek(); + sw.reset(); + foreach (_; 0 .. iterations) + { + result2 += matchFirst("12345678", re2).length; + } + const enumTime = sw.peek(); + assert(result1 == result2); + auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs"; + // enum is faster or the diff is less < 30% + assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75, + "enum regex to static regex ratio "~to!string(ratio)); +} + +// bugzilla 14504 +@safe unittest +{ + auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +} + +// bugzilla 14529 +@safe unittest +{ + auto ctPat2 = regex(r"^[CDF]$", "i"); + foreach (v; ["C", "c", "D", "d", "F", "f"]) + assert(matchAll(v, ctPat2).front.hit == v); +} + +// bugzilla 14615 +@safe unittest +{ + import std.array : appender; + import std.regex : replaceFirst, replaceFirstInto, regex; + import std.stdio : writeln; + + auto example = "Hello, world!"; + auto pattern = regex("^Hello, (bug)"); // won't find this one + auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); + assert(result == "Hello, world!"); // Ok. + + auto sink = appender!string; + replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!"); + replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!Hello, world!"); +} + +// bugzilla 15573 +@safe unittest +{ + auto rx = regex("[c d]", "x"); + assert("a b".matchFirst(rx)); +} + +// bugzilla 15864 +@safe unittest +{ + regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); +} + +@safe unittest +{ + auto r = regex("(?# comment)abc(?# comment2)"); + assert("abc".matchFirst(r)); + assertThrown(regex("(?#...")); +} + +// bugzilla 17075 +@safe unittest +{ + enum titlePattern = `<title>(.+)`; + static titleRegex = ctRegex!titlePattern; + string input = "" ~ "<".repeat(100_000).join; + assert(input.matchFirst(titleRegex).empty); +} + +// bugzilla 17212 +@safe unittest +{ + auto r = regex(" [a] ", "x"); + assert("a".matchFirst(r)); +} + +// bugzilla 17157 +@safe unittest +{ + import std.algorithm.comparison : equal; + auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; + auto r = regex("(a)|(b)|(c)|(d)", "g"); + auto s = "--a--b--c--d--"; + auto outcomes = [ + ["a", "a", "", "", ""], + ["b", "", "b", "", ""], + ["c", "", "", "c", ""], + ["d", "", "", "", "d"] + ]; + assert(equal!equal(s.matchAll(ctr), outcomes)); + assert(equal!equal(s.bmatch(r), outcomes)); +} + +// bugzilla 17667 +@safe unittest +{ + import std.algorithm.searching : canFind; + void willThrow(T, size_t line = __LINE__)(T arg, string msg) + { + auto e = collectException(regex(arg)); + assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); + } + willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); + willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); + willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); + willThrow([r"[a-\", r"123"], "no matching ']' found while parsing character class"); + willThrow([r"\", r"123"], "invalid escape sequence"); +} + +// bugzilla 17668 +@safe unittest +{ + import std.algorithm.searching; + auto e = collectException!RegexException(regex(q"<[^]>")); + assert(e.msg.canFind("no operand for '^'"), e.msg); +} + +// bugzilla 17673 +@safe unittest +{ + string str = `<">`; + string[] regexps = ["abc", "\"|x"]; + auto regexp = regex(regexps); + auto c = matchFirst(str, regexp); + assert(c); + assert(c.whichPattern == 2); +} diff --git a/win32.mak b/win32.mak index 1d6df70e8..3b07e20ca 100644 --- a/win32.mak +++ b/win32.mak @@ -216,6 +216,7 @@ SRC_STD_REGEX= \ std\regex\package.d \ std\regex\internal\parser.d \ std\regex\internal\tests.d \ + std\regex\internal\tests2.d \ std\regex\internal\backtracking.d \ std\regex\internal\thompson.d \ std\regex\internal\kickstart.d \ @@ -516,6 +517,7 @@ cov : $(SRC_TO_COMPILE) $(LIB) $(DMD) -conf= -cov=90 $(UDFLAGS) -main -run std\conv.d $(DMD) -conf= -cov=0 $(UDFLAGS) -main -run std\zip.d $(DMD) -conf= -cov=77 $(UDFLAGS) -main -run std\regex\tests.d + $(DMD) -conf= -cov=77 $(UDFLAGS) -main -run std\regex\tests2.d $(DMD) -conf= -cov=92 $(UDFLAGS) -main -run std\json.d $(DMD) -conf= -cov=87 $(UDFLAGS) -main -run std\parallelism.d $(DMD) -conf= -cov=50 $(UDFLAGS) -main -run std\mathspecial.d diff --git a/win64.mak b/win64.mak index 4b2d27360..b044d9839 100644 --- a/win64.mak +++ b/win64.mak @@ -241,6 +241,7 @@ SRC_STD_REGEX= \ std\regex\package.d \ std\regex\internal\parser.d \ std\regex\internal\tests.d \ + std\regex\internal\tests2.d \ std\regex\internal\backtracking.d \ std\regex\internal\thompson.d \ std\regex\internal\kickstart.d \