mirror of
https://github.com/dlang/phobos.git
synced 2025-04-26 21:22:20 +03:00
1025 lines
42 KiB
D
1025 lines
42 KiB
D
/*
|
||
Regualar expressions package test suite.
|
||
*/
|
||
module std.regex.internal.tests;
|
||
|
||
package(std.regex):
|
||
|
||
import std.algorithm, std.conv, std.exception, std.meta, std.range,
|
||
std.typecons, std.regex;
|
||
|
||
import std.regex.internal.parser : Escapables; // characters that need escaping
|
||
|
||
alias Sequence(int B, int E) = staticIota!(B, E);
|
||
|
||
unittest
|
||
{//sanity checks
|
||
regex("(a|b)*");
|
||
regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
|
||
regex("abc|edf|ighrg");
|
||
auto r1 = regex("abc");
|
||
auto r2 = regex("(gylba)");
|
||
assert(match("abcdef", r1).hit == "abc");
|
||
assert(!match("wida",r2));
|
||
assert(bmatch("abcdef", r1).hit == "abc");
|
||
assert(!bmatch("wida", r2));
|
||
assert(match("abc", "abc".dup));
|
||
assert(bmatch("abc", "abc".dup));
|
||
Regex!char rc;
|
||
assert(rc.empty);
|
||
rc = regex("test");
|
||
assert(!rc.empty);
|
||
}
|
||
|
||
/* The test vectors in this file are altered from Henry Spencer's regexp
|
||
test code. His copyright notice is:
|
||
|
||
Copyright (c) 1986 by University of Toronto.
|
||
Written by Henry Spencer. Not derived from licensed software.
|
||
|
||
Permission is granted to anyone to use this software for any
|
||
purpose on any computer system, and to redistribute it freely,
|
||
subject to the following restrictions:
|
||
|
||
1. The author is not responsible for the consequences of use of
|
||
this software, no matter how awful, even if they arise
|
||
from defects in it.
|
||
|
||
2. The origin of this software must not be misrepresented, either
|
||
by explicit claim or by omission.
|
||
|
||
3. Altered versions must be plainly marked as such, and must not
|
||
be misrepresented as being the original software.
|
||
|
||
|
||
*/
|
||
|
||
unittest
|
||
{
|
||
struct TestVectors
|
||
{
|
||
string pattern;
|
||
string input;
|
||
string result;
|
||
string format;
|
||
string replace;
|
||
string flags;
|
||
}
|
||
|
||
static immutable TestVectors[] tv = [
|
||
TestVectors( "a\\b", "a", "y", "$&", "a" ),
|
||
TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ),
|
||
TestVectors( "()b\\1", "aaab", "y", "$&", "b" ),
|
||
TestVectors( "abc", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "abc", "xbc", "n", "-", "-" ),
|
||
TestVectors( "abc", "axc", "n", "-", "-" ),
|
||
TestVectors( "abc", "abx", "n", "-", "-" ),
|
||
TestVectors( "abc", "xabcy","y", "$&", "abc" ),
|
||
TestVectors( "abc", "ababc","y", "$&", "abc" ),
|
||
TestVectors( "ab*c", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "ab*bc", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ),
|
||
TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ),
|
||
TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ),
|
||
TestVectors( "ab+bc", "abc", "n", "-", "-" ),
|
||
TestVectors( "ab+bc", "abq", "n", "-", "-" ),
|
||
TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ),
|
||
TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ),
|
||
TestVectors( "ab?bc", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "ab?bc", "abbbbc","n", "-", "-" ),
|
||
TestVectors( "ab?c", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "^abc$", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "^abc$", "abcc", "n", "-", "-" ),
|
||
TestVectors( "^abc", "abcc", "y", "$&", "abc" ),
|
||
TestVectors( "^abc$", "aabc", "n", "-", "-" ),
|
||
TestVectors( "abc$", "aabc", "y", "$&", "abc" ),
|
||
TestVectors( "^", "abc", "y", "$&", "" ),
|
||
TestVectors( "$", "abc", "y", "$&", "" ),
|
||
TestVectors( "a.c", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "a.c", "axc", "y", "$&", "axc" ),
|
||
TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ),
|
||
TestVectors( "a.*c", "axyzd","n", "-", "-" ),
|
||
TestVectors( "a[bc]d", "abc", "n", "-", "-" ),
|
||
TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ),
|
||
TestVectors( "a[b-d]e", "abd", "n", "-", "-" ),
|
||
TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ),
|
||
TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ),
|
||
TestVectors( "a[-b]", "a-", "y", "$&", "a-" ),
|
||
TestVectors( "a[b-]", "a-", "y", "$&", "a-" ),
|
||
TestVectors( "a[b-a]", "-", "c", "-", "-" ),
|
||
TestVectors( "a[]b", "-", "c", "-", "-" ),
|
||
TestVectors( "a[", "-", "c", "-", "-" ),
|
||
TestVectors( "a]", "a]", "y", "$&", "a]" ),
|
||
TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ),
|
||
TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ),
|
||
TestVectors( "a[^bc]d", "abd", "n", "-", "-" ),
|
||
TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ),
|
||
TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ),
|
||
TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ),
|
||
TestVectors( "ab|cd", "abc", "y", "$&", "ab" ),
|
||
TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ),
|
||
TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ),
|
||
TestVectors( "()*", "-", "y", "-", "-" ),
|
||
TestVectors( "*a", "-", "c", "-", "-" ),
|
||
TestVectors( "^*", "-", "y", "-", "-" ),
|
||
TestVectors( "$*", "-", "y", "-", "-" ),
|
||
TestVectors( "(*)b", "-", "c", "-", "-" ),
|
||
TestVectors( "$b", "b", "n", "-", "-" ),
|
||
TestVectors( "a\\", "-", "c", "-", "-" ),
|
||
TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ),
|
||
TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ),
|
||
TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ),
|
||
TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ),
|
||
TestVectors( "abc)", "-", "c", "-", "-" ),
|
||
TestVectors( "(abc", "-", "c", "-", "-" ),
|
||
TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ),
|
||
TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ),
|
||
TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ),
|
||
TestVectors( "a**", "-", "c", "-", "-" ),
|
||
TestVectors( "a*?a", "aa", "y", "$&", "a" ),
|
||
TestVectors( "(a*)*", "aaa", "y", "-", "-" ),
|
||
TestVectors( "(a*)+", "aaa", "y", "-", "-" ),
|
||
TestVectors( "(a|)*", "-", "y", "-", "-" ),
|
||
TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ),
|
||
TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ),
|
||
TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ),
|
||
TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ),
|
||
TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ),
|
||
TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ),
|
||
TestVectors( "(^)*", "-", "y", "-", "-" ),
|
||
TestVectors( "(ab|)*", "-", "y", "-", "-" ),
|
||
TestVectors( ")(", "-", "c", "-", "-" ),
|
||
TestVectors( "", "abc", "y", "$&", "" ),
|
||
TestVectors( "abc", "", "n", "-", "-" ),
|
||
TestVectors( "a*", "", "y", "$&", "" ),
|
||
TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ),
|
||
TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ),
|
||
TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ),
|
||
TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ),
|
||
TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ),
|
||
TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ),
|
||
TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ),
|
||
TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ),
|
||
TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ),
|
||
TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ),
|
||
TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ),
|
||
TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ),
|
||
TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ),
|
||
TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ),
|
||
TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ),
|
||
TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
|
||
TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
|
||
TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ),
|
||
TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ),
|
||
TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ),
|
||
TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ),
|
||
TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ),
|
||
TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ),
|
||
TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ),
|
||
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ),
|
||
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ),
|
||
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ),
|
||
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ),
|
||
TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ),
|
||
TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ),
|
||
TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ),
|
||
TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ),
|
||
TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ),
|
||
TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ),
|
||
TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ),
|
||
TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ),
|
||
TestVectors( "[k]", "ab", "n", "-", "-" ),
|
||
TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
|
||
TestVectors( "a{2}", "candy", "n", "", "" ),
|
||
TestVectors( "a{2}", "caandy", "y", "$&", "aa" ),
|
||
TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ),
|
||
TestVectors( "a{2,}", "candy", "n", "", "" ),
|
||
TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ),
|
||
TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ),
|
||
TestVectors( "a{1,3}", "cndy", "n", "", "" ),
|
||
TestVectors( "a{1,3}", "candy", "y", "$&", "a" ),
|
||
TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ),
|
||
TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ),
|
||
TestVectors( "e?le?", "angel", "y", "$&", "el" ),
|
||
TestVectors( "e?le?", "angle", "y", "$&", "le" ),
|
||
TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ),
|
||
TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ),
|
||
TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ),
|
||
TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ),
|
||
TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ),
|
||
TestVectors( "\\d", "B2 is", "y", "$&", "2" ),
|
||
TestVectors( "\\D", "B2 is", "y", "$&", "B" ),
|
||
TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ),
|
||
TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ),
|
||
TestVectors( "abc", "ababc", "y", "$&", "abc" ),
|
||
TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
|
||
TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ),
|
||
TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
|
||
TestVectors( ".*c", "abcde", "y", "$&", "abc" ),
|
||
TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
|
||
TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ),
|
||
TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ),
|
||
TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
|
||
TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
|
||
TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
|
||
TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
|
||
TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
|
||
TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ),
|
||
TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
|
||
TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ),
|
||
TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ),
|
||
TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ),
|
||
TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"),
|
||
//more repetitions:
|
||
TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
|
||
TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
|
||
//groups:
|
||
TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"),
|
||
TestVectors( "(?P<q>\\d+)/(?P<d>\\d+)", "2/3", "y", "${d}/${q}", "3/2"),
|
||
//set operations:
|
||
TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"),
|
||
TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"),
|
||
TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"),
|
||
TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"),
|
||
TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"),
|
||
TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"),
|
||
TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"),
|
||
TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"),
|
||
TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"),
|
||
TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"),
|
||
//unicode blocks & properties:
|
||
TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
|
||
TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`, "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
|
||
TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"),
|
||
TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"),
|
||
TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"),
|
||
TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"),
|
||
TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"),
|
||
TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"),
|
||
TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
|
||
//case insensitive:
|
||
TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"),
|
||
TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
|
||
TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"),
|
||
TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"),
|
||
TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"),
|
||
TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"),
|
||
TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"),
|
||
//escapes:
|
||
TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"),
|
||
TestVectors( `\u`, "", "c", "-", "-"),
|
||
TestVectors( `\U`, "", "c", "-", "-"),
|
||
TestVectors( `\u003`, "", "c", "-", "-"),
|
||
TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"),
|
||
TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
|
||
TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"),
|
||
TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"),
|
||
TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"),
|
||
TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"),
|
||
TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"),
|
||
TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"),
|
||
TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"),
|
||
TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"),
|
||
TestVectors( `\pX`, "", "c", "-", "-"),
|
||
// ^, $, \b, \B, multiline :
|
||
TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
|
||
TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"),
|
||
TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"),
|
||
TestVectors( `^$`, "\r\n", "y", "$&", "", "m"),
|
||
TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"),
|
||
TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"),
|
||
TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"),
|
||
TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"),
|
||
TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"),
|
||
TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"),
|
||
TestVectors( `\b^.`, "ab", "y", "$&", "a"),
|
||
TestVectors( `\B^.`, "ab", "n", "-", "-"),
|
||
TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"),
|
||
TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"),
|
||
|
||
// luckily obtained regression on incremental matching in backtracker
|
||
TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
|
||
"0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"),
|
||
//lookahead
|
||
TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
|
||
TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ),
|
||
TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"),
|
||
TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"),
|
||
TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"),
|
||
TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"),
|
||
TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"),
|
||
//lookback
|
||
TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"),
|
||
TestVectors( `\w(?<!\d)\w`, "123ab24", "y", "$&", "ab"),
|
||
TestVectors( `(?<=Dåb)x\w`, "DåbDÅBxdÅb", "y", "$&", "xd", "i"),
|
||
TestVectors( `(?<=(ab*c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
|
||
TestVectors( `(?<=(ab*?c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
|
||
TestVectors( `(?<=(a.*?c))x`, "ababbcxac", "y", "$&-$1", "x-abbc"),
|
||
TestVectors( `(?<=(a{2,4}b{1,3}))x`, "yyaaaabx", "y", "$&-$1", "x-aaaab"),
|
||
TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}))x`, "aabbbaaaabx", "y", "$&-$1", "x-aabbbaaaab"),
|
||
TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}?))x`, "aabbbaaaabx", "y", "$&-$1", "x-aaaab"),
|
||
TestVectors( `(?<=(abc|def|aef))x`, "abcx", "y", "$&-$1", "x-abc"),
|
||
TestVectors( `(?<=(abc|def|aef))x`, "aefx", "y", "$&-$1", "x-aef"),
|
||
TestVectors( `(?<=(abc|dabc))(x)`, "dabcx", "y", "$&-$1-$2", "x-abc-x"),
|
||
TestVectors( `(?<=(|abc))x`, "dabcx", "y", "$&-$1", "x-"),
|
||
TestVectors( `(?<=((ab|da)*))x`, "abdaabx", "y", "$&-$2-$1", "x-ab-abdaab"),
|
||
TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
|
||
TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"),
|
||
TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"),
|
||
TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"),
|
||
TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"),
|
||
//mixed lookaround
|
||
TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"),
|
||
TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"),
|
||
];
|
||
string produceExpected(M,String)(auto ref M m, String fmt)
|
||
{
|
||
auto app = appender!(String)();
|
||
replaceFmt(fmt, m.captures, app, true);
|
||
return app.data;
|
||
}
|
||
void run_tests(alias matchFn)()
|
||
{
|
||
int i;
|
||
foreach (Char; AliasSeq!( char, wchar, dchar))
|
||
(){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
|
||
alias String = immutable(Char)[];
|
||
String produceExpected(M,Range)(auto ref M m, Range fmt)
|
||
{
|
||
auto app = appender!(String)();
|
||
replaceFmt(fmt, m.captures, app, true);
|
||
return app.data;
|
||
}
|
||
Regex!(Char) r;
|
||
foreach (a, tvd; tv)
|
||
{
|
||
uint c = tvd.result[0];
|
||
debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
|
||
try
|
||
{
|
||
i = 1;
|
||
r = regex(to!(String)(tvd.pattern), tvd.flags);
|
||
}
|
||
catch (RegexException e)
|
||
{
|
||
i = 0;
|
||
debug(std_regex_test) writeln(e.msg);
|
||
}
|
||
|
||
assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
|
||
|
||
if (c != 'c')
|
||
{
|
||
auto m = matchFn(to!(String)(tvd.input), r);
|
||
i = !m.empty;
|
||
assert((c == 'y') ? i : !i, text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern));
|
||
if (c == 'y')
|
||
{
|
||
auto result = produceExpected(m, to!(String)(tvd.format));
|
||
assert(result == to!String(tvd.replace),
|
||
text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
|
||
tvd.replace, " vs ", result));
|
||
}
|
||
}
|
||
}
|
||
}();
|
||
debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
|
||
}
|
||
|
||
|
||
void ct_tests()
|
||
{
|
||
version(std_regex_ct1)
|
||
{
|
||
pragma(msg, "Testing 1st part of ctRegex");
|
||
alias Tests = Sequence!(0, 155);
|
||
}
|
||
else version(std_regex_ct2)
|
||
{
|
||
pragma(msg, "Testing 2nd part of ctRegex");
|
||
alias Tests = Sequence!(155, 174);
|
||
}
|
||
//FIXME: #174-178 contains CTFE parser bug
|
||
else version(std_regex_ct3)
|
||
{
|
||
pragma(msg, "Testing 3rd part of ctRegex");
|
||
alias Tests = Sequence!(178, 220);
|
||
}
|
||
else version(std_regex_ct4)
|
||
{
|
||
pragma(msg, "Testing 4th part of ctRegex");
|
||
alias Tests = Sequence!(220, tv.length);
|
||
}
|
||
else
|
||
alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
|
||
foreach (a, v; Tests)
|
||
(){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
|
||
enum tvd = tv[v];
|
||
static if (tvd.result == "c")
|
||
{
|
||
static assert(!__traits(compiles, (){
|
||
enum r = regex(tvd.pattern, tvd.flags);
|
||
}), "errornously compiles regex pattern: " ~ tvd.pattern);
|
||
}
|
||
else
|
||
{
|
||
//BUG: tv[v] is fine but tvd is not known at compile time?!
|
||
auto r = ctRegex!(tv[v].pattern, tv[v].flags);
|
||
auto nr = regex(tvd.pattern, tvd.flags);
|
||
assert(equal(r.ir, nr.ir),
|
||
text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
|
||
auto m = match(tvd.input, r);
|
||
auto c = tvd.result[0];
|
||
bool ok = (c == 'y') ^ m.empty;
|
||
assert(ok, text("ctRegex: failed to match pattern #",
|
||
a ,": ", tvd.pattern));
|
||
if (c == 'y')
|
||
{
|
||
import std.stdio;
|
||
auto result = produceExpected(m, tvd.format);
|
||
if (result != tvd.replace)
|
||
writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
|
||
tvd.replace, " vs ", result);
|
||
}
|
||
}
|
||
}();
|
||
debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
|
||
}
|
||
|
||
ct_tests();
|
||
run_tests!bmatch(); //backtracker
|
||
run_tests!match(); //thompson VM
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto cr = ctRegex!("abc");
|
||
assert(bmatch("abc",cr).hit == "abc");
|
||
auto cr2 = ctRegex!("ab*c");
|
||
assert(bmatch("abbbbc",cr2).hit == "abbbbc");
|
||
}
|
||
unittest
|
||
{
|
||
auto cr3 = ctRegex!("^abc$");
|
||
assert(bmatch("abc",cr3).hit == "abc");
|
||
auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
|
||
assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
|
||
assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
|
||
auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
|
||
assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w);
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto cr7 = ctRegex!(`\r.*?$`,"sm");
|
||
assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy");
|
||
auto greed = ctRegex!("<packet.*?/packet>");
|
||
assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
|
||
== "<packet>text</packet>");
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto cr8 = ctRegex!("^(a)(b)?(c*)");
|
||
auto m8 = bmatch("abcc",cr8);
|
||
assert(m8);
|
||
assert(m8.captures[1] == "a");
|
||
assert(m8.captures[2] == "b");
|
||
assert(m8.captures[3] == "cc");
|
||
auto cr9 = ctRegex!("q(a|b)*q");
|
||
auto m9 = match("xxqababqyy",cr9);
|
||
assert(m9);
|
||
assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto rtr = regex("a|b|c");
|
||
enum ctr = regex("a|b|c");
|
||
assert(equal(rtr.ir,ctr.ir));
|
||
//CTFE parser BUG is triggered by group
|
||
//in the middle of alternation (at least not first and not last)
|
||
enum testCT = regex(`abc|(edf)|xyz`);
|
||
auto testRT = regex(`abc|(edf)|xyz`);
|
||
assert(equal(testCT.ir,testRT.ir));
|
||
}
|
||
|
||
unittest
|
||
{
|
||
enum cx = ctRegex!"(A|B|C)";
|
||
auto mx = match("B",cx);
|
||
assert(mx);
|
||
assert(equal(mx.captures, [ "B", "B"]));
|
||
enum cx2 = ctRegex!"(A|B)*";
|
||
assert(match("BAAA",cx2));
|
||
|
||
enum cx3 = ctRegex!("a{3,4}","i");
|
||
auto mx3 = match("AaA",cx3);
|
||
assert(mx3);
|
||
assert(mx3.captures[0] == "AaA");
|
||
enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
|
||
auto mx4 = match("aaaabc", cx4);
|
||
assert(mx4);
|
||
assert(mx4.captures[0] == "aaaab");
|
||
auto cr8 = ctRegex!("(a)(b)?(c*)");
|
||
auto m8 = bmatch("abcc",cr8);
|
||
assert(m8);
|
||
assert(m8.captures[1] == "a");
|
||
assert(m8.captures[2] == "b");
|
||
assert(m8.captures[3] == "cc");
|
||
auto cr9 = ctRegex!(".*$", "gm");
|
||
auto m9 = match("First\rSecond", cr9);
|
||
assert(m9);
|
||
assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
|
||
}
|
||
|
||
unittest
|
||
{
|
||
//global matching
|
||
void test_body(alias matchFn)()
|
||
{
|
||
string s = "a quick brown fox jumps over a lazy dog";
|
||
auto r1 = regex("\\b[a-z]+\\b","g");
|
||
string[] test;
|
||
foreach (m; matchFn(s, r1))
|
||
test ~= m.hit;
|
||
assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
|
||
auto free_reg = regex(`
|
||
|
||
abc
|
||
\s+
|
||
"
|
||
(
|
||
[^"]+
|
||
| \\ "
|
||
)+
|
||
"
|
||
z
|
||
`, "x");
|
||
auto m = match(`abc "quoted string with \" inside"z`,free_reg);
|
||
assert(m);
|
||
string mails = " hey@you.com no@spam.net ";
|
||
auto rm = regex(`@(?<=\S+@)\S+`,"g");
|
||
assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
|
||
auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
|
||
assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
|
||
auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
|
||
assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
|
||
auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
|
||
assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
|
||
debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
|
||
}
|
||
test_body!bmatch();
|
||
test_body!match();
|
||
}
|
||
|
||
//tests for accumulated std.regex issues and other regressions
|
||
unittest
|
||
{
|
||
void test_body(alias matchFn)()
|
||
{
|
||
//issue 5857
|
||
//matching goes out of control if ... in (...){x} has .*/.+
|
||
auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
|
||
assert(c[0] == "axxxzayyyyyzd");
|
||
assert(c[1] == "ayyyyyz");
|
||
auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
|
||
assert(c2[0] == "axxxayyyyyd");
|
||
assert(c2[1] == "ayyyyy");
|
||
//issue 2108
|
||
//greedy vs non-greedy
|
||
auto nogreed = regex("<packet.*?/packet>");
|
||
assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
|
||
== "<packet>text</packet>");
|
||
auto greed = regex("<packet.*/packet>");
|
||
assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
|
||
== "<packet>text</packet><packet>text</packet>");
|
||
//issue 4574
|
||
//empty successful match still advances the input
|
||
string[] pres, posts, hits;
|
||
foreach (m; matchFn("abcabc", regex("","g"))) {
|
||
pres ~= m.pre;
|
||
posts ~= m.post;
|
||
assert(m.hit.empty);
|
||
|
||
}
|
||
auto heads = [
|
||
"abcabc",
|
||
"abcab",
|
||
"abca",
|
||
"abc",
|
||
"ab",
|
||
"a",
|
||
""
|
||
];
|
||
auto tails = [
|
||
"abcabc",
|
||
"bcabc",
|
||
"cabc",
|
||
"abc",
|
||
"bc",
|
||
"c",
|
||
""
|
||
];
|
||
assert(pres == array(retro(heads)));
|
||
assert(posts == tails);
|
||
//issue 6076
|
||
//regression on .*
|
||
auto re = regex("c.*|d");
|
||
auto m = matchFn("mm", re);
|
||
assert(!m);
|
||
debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
|
||
auto rprealloc = regex(`((.){5}.{1,10}){5}`);
|
||
auto arr = array(repeat('0',100));
|
||
auto m2 = matchFn(arr, rprealloc);
|
||
assert(m2);
|
||
assert(collectException(
|
||
regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
|
||
) is null);
|
||
foreach (ch; [Escapables])
|
||
{
|
||
assert(match(to!string(ch),regex(`[\`~ch~`]`)));
|
||
assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
|
||
assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
|
||
}
|
||
//bugzilla 7718
|
||
string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
|
||
auto reStrCmd = regex (`(".*")|('.*')`, "g");
|
||
assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
|
||
[`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
|
||
}
|
||
test_body!bmatch();
|
||
test_body!match();
|
||
}
|
||
|
||
// tests for replace
|
||
unittest
|
||
{
|
||
void test(alias matchFn)()
|
||
{
|
||
import std.uni : toUpper;
|
||
|
||
foreach (i, v; AliasSeq!(string, wstring, dstring))
|
||
{
|
||
auto baz(Cap)(Cap m)
|
||
if (is(Cap == Captures!(Cap.String)))
|
||
{
|
||
return toUpper(m.hit);
|
||
}
|
||
alias String = v;
|
||
assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
|
||
== to!String("ack rapacity"));
|
||
assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
|
||
== to!String("ack capacity"));
|
||
assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
|
||
== to!String("[n]oon"));
|
||
assert(std.regex.replace!(matchFn)(to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'"))
|
||
== to!String(": test2 test1 :"));
|
||
auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
|
||
regex(to!String("[ar]"), "g"));
|
||
assert(s == "StRAp A Rocket engine on A chicken.");
|
||
}
|
||
debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!");
|
||
}
|
||
test!(bmatch)();
|
||
test!(match)();
|
||
}
|
||
|
||
// tests for splitter
|
||
unittest
|
||
{
|
||
auto s1 = ", abc, de, fg, hi, ";
|
||
auto sp1 = splitter(s1, regex(", *"));
|
||
auto w1 = ["", "abc", "de", "fg", "hi", ""];
|
||
assert(equal(sp1, w1));
|
||
|
||
auto s2 = ", abc, de, fg, hi";
|
||
auto sp2 = splitter(s2, regex(", *"));
|
||
auto w2 = ["", "abc", "de", "fg", "hi"];
|
||
|
||
uint cnt;
|
||
foreach (e; sp2) {
|
||
assert(w2[cnt++] == e);
|
||
}
|
||
assert(equal(sp2, w2));
|
||
}
|
||
|
||
unittest
|
||
{
|
||
char[] s1 = ", abc, de, fg, hi, ".dup;
|
||
auto sp2 = splitter(s1, regex(", *"));
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto s1 = ", abc, de, fg, hi, ";
|
||
auto w1 = ["", "abc", "de", "fg", "hi", ""];
|
||
assert(equal(split(s1, regex(", *")), w1[]));
|
||
}
|
||
|
||
unittest
|
||
{ // bugzilla 7141
|
||
string pattern = `[a\--b]`;
|
||
assert(match("-", pattern));
|
||
assert(match("b", pattern));
|
||
string pattern2 = `[&-z]`;
|
||
assert(match("b", pattern2));
|
||
}
|
||
unittest
|
||
{//bugzilla 7111
|
||
assert(match("", regex("^")));
|
||
}
|
||
unittest
|
||
{//bugzilla 7300
|
||
assert(!match("a"d, "aa"d));
|
||
}
|
||
|
||
// bugzilla 7551
|
||
unittest
|
||
{
|
||
auto r = regex("[]abc]*");
|
||
assert("]ab".matchFirst(r).hit == "]ab");
|
||
assertThrown(regex("[]"));
|
||
auto r2 = regex("[]abc--ab]*");
|
||
assert("]ac".matchFirst(r2).hit == "]");
|
||
}
|
||
|
||
unittest
|
||
{//bugzilla 7674
|
||
assert("1234".replace(regex("^"), "$$") == "$1234");
|
||
assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
|
||
assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
|
||
}
|
||
unittest
|
||
{// bugzilla 7679
|
||
foreach (S; AliasSeq!(string, wstring, dstring))
|
||
(){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
|
||
enum re = ctRegex!(to!S(r"\."));
|
||
auto str = to!S("a.b");
|
||
assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
|
||
assert(split(str, re) == [to!S("a"), to!S("b")]);
|
||
}();
|
||
}
|
||
unittest
|
||
{//bugzilla 8203
|
||
string data = "
|
||
NAME = XPAW01_STA:STATION
|
||
NAME = XPAW01_STA
|
||
";
|
||
auto uniFileOld = data;
|
||
auto r = regex(
|
||
r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
|
||
auto uniCapturesNew = match(uniFileOld, r);
|
||
for (int i = 0; i < 20; i++)
|
||
foreach (matchNew; uniCapturesNew) {}
|
||
//a second issue with same symptoms
|
||
auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
|
||
match("аллея Театральная", r2);
|
||
}
|
||
unittest
|
||
{// bugzilla 8637 purity of enforce
|
||
auto m = match("hello world", regex("world"));
|
||
enforce(m);
|
||
}
|
||
|
||
// bugzilla 8725
|
||
unittest
|
||
{
|
||
static italic = regex( r"\*
|
||
(?!\s+)
|
||
(.*?)
|
||
(?!\s+)
|
||
\*", "gx" );
|
||
string input = "this * is* interesting, *very* interesting";
|
||
assert(replace(input, italic, "<i>$1</i>") ==
|
||
"this * is* interesting, <i>very</i> interesting");
|
||
}
|
||
|
||
// bugzilla 8349
|
||
unittest
|
||
{
|
||
enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
|
||
enum peakRegex = ctRegex!(peakRegexStr);
|
||
//note that the regex pattern itself is probably bogus
|
||
assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
|
||
}
|
||
|
||
// bugzilla 9211
|
||
unittest
|
||
{
|
||
auto rx_1 = regex(r"^(\w)*(\d)");
|
||
auto m = match("1234", rx_1);
|
||
assert(equal(m.front, ["1234", "3", "4"]));
|
||
auto rx_2 = regex(r"^([0-9])*(\d)");
|
||
auto m2 = match("1234", rx_2);
|
||
assert(equal(m2.front, ["1234", "3", "4"]));
|
||
}
|
||
|
||
// bugzilla 9280
|
||
unittest
|
||
{
|
||
string tomatch = "a!b@c";
|
||
static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
|
||
auto nm = match(tomatch, r);
|
||
assert(nm);
|
||
auto c = nm.captures;
|
||
assert(c[1] == "a");
|
||
assert(c["nick"] == "a");
|
||
}
|
||
|
||
|
||
// bugzilla 9579
|
||
unittest
|
||
{
|
||
char[] input = ['a', 'b', 'c'];
|
||
string format = "($1)";
|
||
// used to give a compile error:
|
||
auto re = regex(`(a)`, "g");
|
||
auto r = replace(input, re, format);
|
||
assert(r == "(a)bc");
|
||
}
|
||
|
||
// bugzilla 9634
|
||
unittest
|
||
{
|
||
auto re = ctRegex!"(?:a+)";
|
||
assert(match("aaaa", re).hit == "aaaa");
|
||
}
|
||
|
||
//bugzilla 10798
|
||
unittest
|
||
{
|
||
auto cr = ctRegex!("[abcd--c]*");
|
||
auto m = "abc".match(cr);
|
||
assert(m);
|
||
assert(m.hit == "ab");
|
||
}
|
||
|
||
// bugzilla 10913
|
||
unittest
|
||
{
|
||
@system static string foo(const(char)[] s)
|
||
{
|
||
return s.dup;
|
||
}
|
||
@safe static string bar(const(char)[] s)
|
||
{
|
||
return s.dup;
|
||
}
|
||
() @system {
|
||
replace!((a) => foo(a.hit))("blah", regex(`a`));
|
||
}();
|
||
() @safe {
|
||
replace!((a) => bar(a.hit))("blah", regex(`a`));
|
||
}();
|
||
}
|
||
|
||
// bugzilla 11262
|
||
unittest
|
||
{
|
||
enum reg = ctRegex!(r",", "g");
|
||
auto str = "This,List";
|
||
str = str.replace(reg, "-");
|
||
assert(str == "This-List");
|
||
}
|
||
|
||
// bugzilla 11775
|
||
unittest
|
||
{
|
||
assert(collectException(regex("a{1,0}")));
|
||
}
|
||
|
||
// bugzilla 11839
|
||
unittest
|
||
{
|
||
assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
|
||
assert(collectException(regex(`(?P<1>\w+)`)));
|
||
assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
|
||
assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
|
||
assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
|
||
}
|
||
|
||
// bugzilla 12076
|
||
unittest
|
||
{
|
||
auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
|
||
string s = "one two";
|
||
auto m = match(s, RE);
|
||
}
|
||
|
||
// bugzilla 12105
|
||
unittest
|
||
{
|
||
auto r = ctRegex!`.*?(?!a)`;
|
||
assert("aaab".matchFirst(r).hit == "aaa");
|
||
auto r2 = ctRegex!`.*(?!a)`;
|
||
assert("aaab".matchFirst(r2).hit == "aaab");
|
||
}
|
||
|
||
//bugzilla 11784
|
||
unittest
|
||
{
|
||
assert("abcdefghijklmnopqrstuvwxyz"
|
||
.matchFirst("[a-z&&[^aeiuo]]").hit == "b");
|
||
}
|
||
|
||
//bugzilla 12366
|
||
unittest
|
||
{
|
||
auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
|
||
assert("xxxxxxxx".match(re).empty);
|
||
assert(!"xxxx".match(re).empty);
|
||
}
|
||
|
||
// bugzilla 12582
|
||
unittest
|
||
{
|
||
auto r = regex(`(?P<a>abc)`);
|
||
assert(collectException("abc".matchFirst(r)["b"]));
|
||
}
|
||
|
||
// bugzilla 12691
|
||
unittest
|
||
{
|
||
assert(bmatch("e@", "^([a-z]|)*$").empty);
|
||
assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
|
||
}
|
||
|
||
//bugzilla 12713
|
||
unittest
|
||
{
|
||
assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
|
||
}
|
||
|
||
//bugzilla 12747
|
||
unittest
|
||
{
|
||
assertThrown(regex(`^x(\1)`));
|
||
assertThrown(regex(`^(x(\1))`));
|
||
assertThrown(regex(`^((x)(?=\1))`));
|
||
}
|
||
|
||
// bugzilla 14504
|
||
unittest
|
||
{
|
||
auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
|
||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||
}
|
||
|
||
// bugzilla 14529
|
||
unittest
|
||
{
|
||
auto ctPat2 = regex(r"^[CDF]$", "i");
|
||
foreach (v; ["C", "c", "D", "d", "F", "f"])
|
||
assert(matchAll(v, ctPat2).front.hit == v);
|
||
}
|
||
|
||
// bugzilla 14615
|
||
unittest
|
||
{
|
||
import std.stdio : writeln;
|
||
import std.regex : replaceFirst, replaceFirstInto, regex;
|
||
import std.array : appender;
|
||
|
||
auto example = "Hello, world!";
|
||
auto pattern = regex("^Hello, (bug)"); // won't find this one
|
||
auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
|
||
assert(result == "Hello, world!"); // Ok.
|
||
|
||
auto sink = appender!string;
|
||
replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
|
||
assert(sink.data == "Hello, world!");
|
||
replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
|
||
assert(sink.data == "Hello, world!Hello, world!");
|
||
}
|
||
|
||
// bugzilla 15573
|
||
unittest
|
||
{
|
||
auto rx = regex("[c d]", "x");
|
||
assert("a b".matchFirst(rx));
|
||
}
|
||
|
||
// bugzilla 15864
|
||
unittest
|
||
{
|
||
regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
|
||
}
|
||
|
||
unittest
|
||
{
|
||
auto r = regex("(?# comment)abc(?# comment2)");
|
||
assert("abc".matchFirst(r));
|
||
assertThrown(regex("(?#..."));
|
||
}
|