phobos/std/regex/internal/tests.d
Ilya Yaroshenko c8d9afedea clean scope imports
imports of `std.range, std.algorithm, std.array, std.string,
std.format, std.uni` are affected.
2014-11-21 00:08:35 +03:00

939 lines
40 KiB
D
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Regualar expressions package test suite.
*/
module std.regex.internal.tests;
package(std.regex):
import std.algorithm, std.conv, std.exception, std.range, std.typecons,
std.typetuple, std.regex;
import std.regex.internal.parser : Escapables; // characters that need escaping
alias Sequence(int B, int E) = staticIota!(B, E);
unittest
{//sanity checks
regex("(a|b)*");
regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
regex("abc|edf|ighrg");
auto r1 = regex("abc");
auto r2 = regex("(gylba)");
assert(match("abcdef", r1).hit == "abc");
assert(!match("wida",r2));
assert(bmatch("abcdef", r1).hit == "abc");
assert(!bmatch("wida", r2));
assert(match("abc", "abc".dup));
assert(bmatch("abc", "abc".dup));
Regex!char rc;
assert(rc.empty);
rc = regex("test");
assert(!rc.empty);
}
/* The test vectors in this file are altered from Henry Spencer's regexp
test code. His copyright notice is:
Copyright (c) 1986 by University of Toronto.
Written by Henry Spencer. Not derived from licensed software.
Permission is granted to anyone to use this software for any
purpose on any computer system, and to redistribute it freely,
subject to the following restrictions:
1. The author is not responsible for the consequences of use of
this software, no matter how awful, even if they arise
from defects in it.
2. The origin of this software must not be misrepresented, either
by explicit claim or by omission.
3. Altered versions must be plainly marked as such, and must not
be misrepresented as being the original software.
*/
unittest
{
struct TestVectors
{
string pattern;
string input;
string result;
string format;
string replace;
string flags;
}
enum TestVectors[] tv = [
TestVectors( "a\\b", "a", "y", "$&", "a" ),
TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ),
TestVectors( "()b\\1", "aaab", "y", "$&", "b" ),
TestVectors( "abc", "abc", "y", "$&", "abc" ),
TestVectors( "abc", "xbc", "n", "-", "-" ),
TestVectors( "abc", "axc", "n", "-", "-" ),
TestVectors( "abc", "abx", "n", "-", "-" ),
TestVectors( "abc", "xabcy","y", "$&", "abc" ),
TestVectors( "abc", "ababc","y", "$&", "abc" ),
TestVectors( "ab*c", "abc", "y", "$&", "abc" ),
TestVectors( "ab*bc", "abc", "y", "$&", "abc" ),
TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ),
TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ),
TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ),
TestVectors( "ab+bc", "abc", "n", "-", "-" ),
TestVectors( "ab+bc", "abq", "n", "-", "-" ),
TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ),
TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ),
TestVectors( "ab?bc", "abc", "y", "$&", "abc" ),
TestVectors( "ab?bc", "abbbbc","n", "-", "-" ),
TestVectors( "ab?c", "abc", "y", "$&", "abc" ),
TestVectors( "^abc$", "abc", "y", "$&", "abc" ),
TestVectors( "^abc$", "abcc", "n", "-", "-" ),
TestVectors( "^abc", "abcc", "y", "$&", "abc" ),
TestVectors( "^abc$", "aabc", "n", "-", "-" ),
TestVectors( "abc$", "aabc", "y", "$&", "abc" ),
TestVectors( "^", "abc", "y", "$&", "" ),
TestVectors( "$", "abc", "y", "$&", "" ),
TestVectors( "a.c", "abc", "y", "$&", "abc" ),
TestVectors( "a.c", "axc", "y", "$&", "axc" ),
TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ),
TestVectors( "a.*c", "axyzd","n", "-", "-" ),
TestVectors( "a[bc]d", "abc", "n", "-", "-" ),
TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ),
TestVectors( "a[b-d]e", "abd", "n", "-", "-" ),
TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ),
TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ),
TestVectors( "a[-b]", "a-", "y", "$&", "a-" ),
TestVectors( "a[b-]", "a-", "y", "$&", "a-" ),
TestVectors( "a[b-a]", "-", "c", "-", "-" ),
TestVectors( "a[]b", "-", "c", "-", "-" ),
TestVectors( "a[", "-", "c", "-", "-" ),
TestVectors( "a]", "a]", "y", "$&", "a]" ),
TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ),
TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ),
TestVectors( "a[^bc]d", "abd", "n", "-", "-" ),
TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ),
TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ),
TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ),
TestVectors( "ab|cd", "abc", "y", "$&", "ab" ),
TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ),
TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ),
TestVectors( "()*", "-", "y", "-", "-" ),
TestVectors( "*a", "-", "c", "-", "-" ),
TestVectors( "^*", "-", "y", "-", "-" ),
TestVectors( "$*", "-", "y", "-", "-" ),
TestVectors( "(*)b", "-", "c", "-", "-" ),
TestVectors( "$b", "b", "n", "-", "-" ),
TestVectors( "a\\", "-", "c", "-", "-" ),
TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ),
TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ),
TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ),
TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ),
TestVectors( "abc)", "-", "c", "-", "-" ),
TestVectors( "(abc", "-", "c", "-", "-" ),
TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ),
TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ),
TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ),
TestVectors( "a**", "-", "c", "-", "-" ),
TestVectors( "a*?a", "aa", "y", "$&", "a" ),
TestVectors( "(a*)*", "aaa", "y", "-", "-" ),
TestVectors( "(a*)+", "aaa", "y", "-", "-" ),
TestVectors( "(a|)*", "-", "y", "-", "-" ),
TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ),
TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ),
TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ),
TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ),
TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ),
TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ),
TestVectors( "(^)*", "-", "y", "-", "-" ),
TestVectors( "(ab|)*", "-", "y", "-", "-" ),
TestVectors( ")(", "-", "c", "-", "-" ),
TestVectors( "", "abc", "y", "$&", "" ),
TestVectors( "abc", "", "n", "-", "-" ),
TestVectors( "a*", "", "y", "$&", "" ),
TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ),
TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ),
TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ),
TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ),
TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ),
TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ),
TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ),
TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ),
TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ),
TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ),
TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ),
TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ),
TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ),
TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ),
TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ),
TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ),
TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ),
TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ),
TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ),
TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ),
TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ),
TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ),
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ),
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ),
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ),
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ),
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ),
TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ),
TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ),
TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ),
TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ),
TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ),
TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ),
TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ),
TestVectors( "[k]", "ab", "n", "-", "-" ),
TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
TestVectors( "a{2}", "candy", "n", "", "" ),
TestVectors( "a{2}", "caandy", "y", "$&", "aa" ),
TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ),
TestVectors( "a{2,}", "candy", "n", "", "" ),
TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ),
TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ),
TestVectors( "a{1,3}", "cndy", "n", "", "" ),
TestVectors( "a{1,3}", "candy", "y", "$&", "a" ),
TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ),
TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ),
TestVectors( "e?le?", "angel", "y", "$&", "el" ),
TestVectors( "e?le?", "angle", "y", "$&", "le" ),
TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ),
TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ),
TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ),
TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ),
TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ),
TestVectors( "\\d", "B2 is", "y", "$&", "2" ),
TestVectors( "\\D", "B2 is", "y", "$&", "B" ),
TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ),
TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ),
TestVectors( "abc", "ababc", "y", "$&", "abc" ),
TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ),
TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
TestVectors( ".*c", "abcde", "y", "$&", "abc" ),
TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ),
TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ),
TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ),
TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ),
TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ),
TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ),
TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"),
//more repetitions:
TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
//groups:
TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"),
TestVectors( "(?P<q>\\d+)/(?P<d>\\d+)", "2/3", "y", "${d}/${q}", "3/2"),
//set operations:
TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"),
TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"),
TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"),
TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"),
TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"),
TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"),
//unicode blocks & properties:
TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"),
TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"),
TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"),
TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"),
TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"),
TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"),
TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
//case insensitive:
TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"),
TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"),
TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"),
TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"),
TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"),
TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"),
//escapes:
TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"),
TestVectors( `\u`, "", "c", "-", "-"),
TestVectors( `\U`, "", "c", "-", "-"),
TestVectors( `\u003`, "", "c", "-", "-"),
TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"),
TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"),
TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"),
TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"),
TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"),
TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"),
TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"),
TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"),
TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"),
TestVectors( `\pX`, "", "c", "-", "-"),
// ^, $, \b, \B, multiline :
TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"),
TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"),
TestVectors( `^$`, "\r\n", "y", "$&", "", "m"),
TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"),
TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"),
TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"),
TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"),
TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"),
TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"),
TestVectors( `\b^.`, "ab", "y", "$&", "a"),
TestVectors( `\B^.`, "ab", "n", "-", "-"),
TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"),
TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"),
// luckily obtained regression on incremental matching in backtracker
TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
"0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"),
//lookahead
TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ),
TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"),
TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"),
TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"),
TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"),
TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"),
//lookback
TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"),
TestVectors( `\w(?<!\d)\w`, "123ab24", "y", "$&", "ab"),
TestVectors( `(?<=Dåb)x\w`, "DåbDÅBxdÅb", "y", "$&", "xd", "i"),
TestVectors( `(?<=(ab*c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
TestVectors( `(?<=(ab*?c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
TestVectors( `(?<=(a.*?c))x`, "ababbcxac", "y", "$&-$1", "x-abbc"),
TestVectors( `(?<=(a{2,4}b{1,3}))x`, "yyaaaabx", "y", "$&-$1", "x-aaaab"),
TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}))x`, "aabbbaaaabx", "y", "$&-$1", "x-aabbbaaaab"),
TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}?))x`, "aabbbaaaabx", "y", "$&-$1", "x-aaaab"),
TestVectors( `(?<=(abc|def|aef))x`, "abcx", "y", "$&-$1", "x-abc"),
TestVectors( `(?<=(abc|def|aef))x`, "aefx", "y", "$&-$1", "x-aef"),
TestVectors( `(?<=(abc|dabc))(x)`, "dabcx", "y", "$&-$1-$2", "x-abc-x"),
TestVectors( `(?<=(|abc))x`, "dabcx", "y", "$&-$1", "x-"),
TestVectors( `(?<=((ab|da)*))x`, "abdaabx", "y", "$&-$2-$1", "x-ab-abdaab"),
TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"),
TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"),
TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"),
TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"),
//mixed lookaround
TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"),
TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"),
];
string produceExpected(M,String)(auto ref M m, String fmt)
{
auto app = appender!(String)();
replaceFmt(fmt, m.captures, app, true);
return app.data;
}
void run_tests(alias matchFn)()
{
int i;
foreach(Char; TypeTuple!( char, wchar, dchar))
{
alias String = immutable(Char)[];
String produceExpected(M,Range)(auto ref M m, Range fmt)
{
auto app = appender!(String)();
replaceFmt(fmt, m.captures, app, true);
return app.data;
}
Regex!(Char) r;
foreach(a, tvd; tv)
{
uint c = tvd.result[0];
debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
try
{
i = 1;
r = regex(to!(String)(tvd.pattern), tvd.flags);
}
catch (RegexException e)
{
i = 0;
debug(std_regex_test) writeln(e.msg);
}
assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
if(c != 'c')
{
auto m = matchFn(to!(String)(tvd.input), r);
i = !m.empty;
assert((c == 'y') ? i : !i, text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern));
if(c == 'y')
{
auto result = produceExpected(m, to!(String)(tvd.format));
assert(result == to!String(tvd.replace),
text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
tvd.replace, " vs ", result));
}
}
}
}
debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
}
void ct_tests()
{
version(std_regex_ct1)
{
pragma(msg, "Testing 1st part of ctRegex");
alias Tests = Sequence!(0, 155);
}
else version(std_regex_ct2)
{
pragma(msg, "Testing 2nd part of ctRegex");
alias Tests = Sequence!(155, 174);
}
//FIXME: #174-178 contains CTFE parser bug
else version(std_regex_ct3)
{
pragma(msg, "Testing 3rd part of ctRegex");
alias Tests = Sequence!(178, 220);
}
else version(std_regex_ct4)
{
pragma(msg, "Testing 4th part of ctRegex");
alias Tests = Sequence!(220, tv.length);
}
else
alias Tests = TypeTuple!(Sequence!(0, 30), Sequence!(235, tv.length-5));
foreach(a, v; Tests)
{
enum tvd = tv[v];
static if(tvd.result == "c")
{
static assert(!__traits(compiles, (){
enum r = regex(tvd.pattern, tvd.flags);
}), "errornously compiles regex pattern: " ~ tvd.pattern);
}
else
{
//BUG: tv[v] is fine but tvd is not known at compile time?!
auto r = ctRegex!(tv[v].pattern, tv[v].flags);
auto nr = regex(tvd.pattern, tvd.flags);
assert(equal(r.ir, nr.ir),
text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
auto m = match(tvd.input, r);
auto c = tvd.result[0];
bool ok = (c == 'y') ^ m.empty;
assert(ok, text("ctRegex: failed to match pattern #",
a ,": ", tvd.pattern));
if(c == 'y')
{
import std.stdio;
auto result = produceExpected(m, tvd.format);
if(result != tvd.replace)
writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
tvd.replace, " vs ", result);
}
}
}
debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
}
ct_tests();
run_tests!bmatch(); //backtracker
run_tests!match(); //thompson VM
}
unittest
{
auto cr = ctRegex!("abc");
assert(bmatch("abc",cr).hit == "abc");
auto cr2 = ctRegex!("ab*c");
assert(bmatch("abbbbc",cr2).hit == "abbbbc");
auto cr3 = ctRegex!("^abc$");
assert(bmatch("abc",cr3).hit == "abc");
auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w);
auto cr7 = ctRegex!(`\r.*?$`,"sm");
assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy");
auto greed = ctRegex!("<packet.*?/packet>");
assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
== "<packet>text</packet>");
auto cr8 = ctRegex!("^(a)(b)?(c*)");
auto m8 = bmatch("abcc",cr8);
assert(m8);
assert(m8.captures[1] == "a");
assert(m8.captures[2] == "b");
assert(m8.captures[3] == "cc");
auto cr9 = ctRegex!("q(a|b)*q");
auto m9 = match("xxqababqyy",cr9);
assert(m9);
assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
auto rtr = regex("a|b|c");
enum ctr = regex("a|b|c");
assert(equal(rtr.ir,ctr.ir));
//CTFE parser BUG is triggered by group
//in the middle of alternation (at least not first and not last)
enum testCT = regex(`abc|(edf)|xyz`);
auto testRT = regex(`abc|(edf)|xyz`);
assert(equal(testCT.ir,testRT.ir));
}
unittest
{
enum cx = ctRegex!"(A|B|C)";
auto mx = match("B",cx);
assert(mx);
assert(equal(mx.captures, [ "B", "B"]));
enum cx2 = ctRegex!"(A|B)*";
assert(match("BAAA",cx2));
enum cx3 = ctRegex!("a{3,4}","i");
auto mx3 = match("AaA",cx3);
assert(mx3);
assert(mx3.captures[0] == "AaA");
enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
auto mx4 = match("aaaabc", cx4);
assert(mx4);
assert(mx4.captures[0] == "aaaab");
auto cr8 = ctRegex!("(a)(b)?(c*)");
auto m8 = bmatch("abcc",cr8);
assert(m8);
assert(m8.captures[1] == "a");
assert(m8.captures[2] == "b");
assert(m8.captures[3] == "cc");
auto cr9 = ctRegex!(".*$", "gm");
auto m9 = match("First\rSecond", cr9);
assert(m9);
assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
}
unittest
{
//global matching
void test_body(alias matchFn)()
{
string s = "a quick brown fox jumps over a lazy dog";
auto r1 = regex("\\b[a-z]+\\b","g");
string[] test;
foreach(m; matchFn(s, r1))
test ~= m.hit;
assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
auto free_reg = regex(`
abc
\s+
"
(
[^"]+
| \\ "
)+
"
z
`, "x");
auto m = match(`abc "quoted string with \" inside"z`,free_reg);
assert(m);
string mails = " hey@you.com no@spam.net ";
auto rm = regex(`@(?<=\S+@)\S+`,"g");
assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
}
test_body!bmatch();
test_body!match();
}
//tests for accumulated std.regex issues and other regressions
unittest
{
void test_body(alias matchFn)()
{
//issue 5857
//matching goes out of control if ... in (...){x} has .*/.+
auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
assert(c[0] == "axxxzayyyyyzd");
assert(c[1] == "ayyyyyz");
auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
assert(c2[0] == "axxxayyyyyd");
assert(c2[1] == "ayyyyy");
//issue 2108
//greedy vs non-greedy
auto nogreed = regex("<packet.*?/packet>");
assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
== "<packet>text</packet>");
auto greed = regex("<packet.*/packet>");
assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
== "<packet>text</packet><packet>text</packet>");
//issue 4574
//empty successful match still advances the input
string[] pres, posts, hits;
foreach(m; matchFn("abcabc", regex("","g"))) {
pres ~= m.pre;
posts ~= m.post;
assert(m.hit.empty);
}
auto heads = [
"abcabc",
"abcab",
"abca",
"abc",
"ab",
"a",
""
];
auto tails = [
"abcabc",
"bcabc",
"cabc",
"abc",
"bc",
"c",
""
];
assert(pres == array(retro(heads)));
assert(posts == tails);
//issue 6076
//regression on .*
auto re = regex("c.*|d");
auto m = matchFn("mm", re);
assert(!m);
debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
auto rprealloc = regex(`((.){5}.{1,10}){5}`);
auto arr = array(repeat('0',100));
auto m2 = matchFn(arr, rprealloc);
assert(m2);
assert(collectException(
regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
) is null);
foreach(ch; [Escapables])
{
assert(match(to!string(ch),regex(`[\`~ch~`]`)));
assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
}
//bugzilla 7718
string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
auto reStrCmd = regex (`(".*")|('.*')`, "g");
assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
[`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
}
test_body!bmatch();
test_body!match();
}
// tests for replace
unittest
{
void test(alias matchFn)()
{
import std.uni : toUpper;
foreach(i, v; TypeTuple!(string, wstring, dstring))
{
auto baz(Cap)(Cap m)
if (is(Cap == Captures!(Cap.String)))
{
return toUpper(m.hit);
}
alias String = v;
assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
== to!String("ack rapacity"));
assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
== to!String("ack capacity"));
assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
== to!String("[n]oon"));
assert(std.regex.replace!(matchFn)(to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'"))
== to!String(": test2 test1 :"));
auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
regex(to!String("[ar]"), "g"));
assert(s == "StRAp A Rocket engine on A chicken.");
}
debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!");
}
test!(bmatch)();
test!(match)();
}
// tests for splitter
unittest
{
auto s1 = ", abc, de, fg, hi, ";
auto sp1 = splitter(s1, regex(", *"));
auto w1 = ["", "abc", "de", "fg", "hi", ""];
assert(equal(sp1, w1));
auto s2 = ", abc, de, fg, hi";
auto sp2 = splitter(s2, regex(", *"));
auto w2 = ["", "abc", "de", "fg", "hi"];
uint cnt;
foreach(e; sp2) {
assert(w2[cnt++] == e);
}
assert(equal(sp2, w2));
}
unittest
{
char[] s1 = ", abc, de, fg, hi, ".dup;
auto sp2 = splitter(s1, regex(", *"));
}
unittest
{
auto s1 = ", abc, de, fg, hi, ";
auto w1 = ["", "abc", "de", "fg", "hi", ""];
assert(equal(split(s1, regex(", *")), w1[]));
}
unittest
{ // bugzilla 7141
string pattern = `[a\--b]`;
assert(match("-", pattern));
assert(match("b", pattern));
string pattern2 = `[&-z]`;
assert(match("b", pattern2));
}
unittest
{//bugzilla 7111
assert(match("", regex("^")));
}
unittest
{//bugzilla 7300
assert(!match("a"d, "aa"d));
}
unittest
{//bugzilla 7674
assert("1234".replace(regex("^"), "$$") == "$1234");
assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
}
unittest
{// bugzilla 7679
foreach(S; TypeTuple!(string, wstring, dstring))
{
enum re = ctRegex!(to!S(r"\."));
auto str = to!S("a.b");
assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
assert(split(str, re) == [to!S("a"), to!S("b")]);
}
}
unittest
{//bugzilla 8203
string data = "
NAME = XPAW01_STA:STATION
NAME = XPAW01_STA
";
auto uniFileOld = data;
auto r = regex(
r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
auto uniCapturesNew = match(uniFileOld, r);
for(int i = 0; i < 20; i++)
foreach (matchNew; uniCapturesNew) {}
//a second issue with same symptoms
auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
match("аллея Театральная", r2);
}
unittest
{// bugzilla 8637 purity of enforce
auto m = match("hello world", regex("world"));
enforce(m);
}
// bugzilla 8725
unittest
{
static italic = regex( r"\*
(?!\s+)
(.*?)
(?!\s+)
\*", "gx" );
string input = "this * is* interesting, *very* interesting";
assert(replace(input, italic, "<i>$1</i>") ==
"this * is* interesting, <i>very</i> interesting");
}
// bugzilla 8349
unittest
{
enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
enum peakRegex = ctRegex!(peakRegexStr);
//note that the regex pattern itself is probably bogus
assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
}
// bugzilla 9211
unittest
{
auto rx_1 = regex(r"^(\w)*(\d)");
auto m = match("1234", rx_1);
assert(equal(m.front, ["1234", "3", "4"]));
auto rx_2 = regex(r"^([0-9])*(\d)");
auto m2 = match("1234", rx_2);
assert(equal(m2.front, ["1234", "3", "4"]));
}
// bugzilla 9280
unittest
{
string tomatch = "a!b@c";
static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
auto nm = match(tomatch, r);
assert(nm);
auto c = nm.captures;
assert(c[1] == "a");
assert(c["nick"] == "a");
}
// bugzilla 9579
unittest
{
char[] input = ['a', 'b', 'c'];
string format = "($1)";
// used to give a compile error:
auto re = regex(`(a)`, "g");
auto r = replace(input, re, format);
assert(r == "(a)bc");
}
// bugzilla 9634
unittest
{
auto re = ctRegex!"(?:a+)";
assert(match("aaaa", re).hit == "aaaa");
}
//bugzilla 10798
unittest
{
auto cr = ctRegex!("[abcd--c]*");
auto m = "abc".match(cr);
assert(m);
assert(m.hit == "ab");
}
// bugzilla 10913
unittest
{
@system static string foo(const(char)[] s)
{
return s.dup;
}
@safe static string bar(const(char)[] s)
{
return s.dup;
}
() @system {
replace!((a) => foo(a.hit))("blah", regex(`a`));
}();
() @safe {
replace!((a) => bar(a.hit))("blah", regex(`a`));
}();
}
// bugzilla 11262
unittest
{
enum reg = ctRegex!(r",", "g");
auto str = "This,List";
str = str.replace(reg, "-");
assert(str == "This-List");
}
// bugzilla 11775
unittest
{
assert(collectException(regex("a{1,0}")));
}
// bugzilla 11839
unittest
{
assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
assert(collectException(regex(`(?P<1>\w+)`)));
assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
}
// bugzilla 12076
unittest
{
auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
string s = "one two";
auto m = match(s, RE);
}
// bugzilla 12105
unittest
{
auto r = ctRegex!`.*?(?!a)`;
assert("aaab".matchFirst(r).hit == "aaa");
auto r2 = ctRegex!`.*(?!a)`;
assert("aaab".matchFirst(r2).hit == "aaab");
}
//bugzilla 11784
unittest
{
assert("abcdefghijklmnopqrstuvwxyz"
.matchFirst("[a-z&&[^aeiuo]]").hit == "b");
}
//bugzilla 12366
unittest
{
auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
assert("xxxxxxxx".match(re).empty);
assert(!"xxxx".match(re).empty);
}
// bugzilla 12582
unittest
{
auto r = regex(`(?P<a>abc)`);
assert(collectException("abc".matchFirst(r)["b"]));
}
// bugzilla 12691
unittest
{
assert(bmatch("e@", "^([a-z]|)*$").empty);
assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
}
//bugzilla 12713
unittest
{
assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
}
//bugzilla 12747
unittest
{
assertThrown(regex(`^x(\1)`));
assertThrown(regex(`^(x(\1))`));
assertThrown(regex(`^((x)(?=\1))`));
}