mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 22:50:38 +03:00
Merge pull request #5722 from DmitryOlshansky/regex-matcher-interfaces
std.regex: major internal redesign, also fixes issue 13532 merged-on-behalf-of: Andrei Alexandrescu <andralex@users.noreply.github.com>
This commit is contained in:
commit
ad489989ec
8 changed files with 1140 additions and 976 deletions
|
@ -13,9 +13,7 @@ import std.regex.internal.ir;
|
||||||
BacktrackingMatcher implements backtracking scheme of matching
|
BacktrackingMatcher implements backtracking scheme of matching
|
||||||
regular expressions.
|
regular expressions.
|
||||||
+/
|
+/
|
||||||
template BacktrackingMatcher(bool CTregex)
|
@trusted class BacktrackingMatcher(Char, Stream = Input!Char) : Matcher!Char
|
||||||
{
|
|
||||||
@trusted struct BacktrackingMatcher(Char, Stream = Input!Char)
|
|
||||||
if (is(Char : dchar))
|
if (is(Char : dchar))
|
||||||
{
|
{
|
||||||
alias DataIndex = Stream.DataIndex;
|
alias DataIndex = Stream.DataIndex;
|
||||||
|
@ -29,19 +27,17 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
enum initialStack = 1 << 11; // items in a block of segmented stack
|
enum initialStack = 1 << 11; // items in a block of segmented stack
|
||||||
alias String = const(Char)[];
|
alias String = const(Char)[];
|
||||||
alias RegEx = Regex!Char;
|
alias RegEx = Regex!Char;
|
||||||
alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream));
|
alias MatchFn = bool function (BacktrackingMatcher);
|
||||||
RegEx re; //regex program
|
const RegEx re; // regex program
|
||||||
static if (CTregex)
|
|
||||||
MatchFn nativeFn; // native code for that program
|
MatchFn nativeFn; // native code for that program
|
||||||
// Stream state
|
// Stream state
|
||||||
Stream s;
|
Stream s;
|
||||||
DataIndex index;
|
DataIndex index;
|
||||||
dchar front;
|
dchar front;
|
||||||
bool exhausted;
|
bool exhausted;
|
||||||
//backtracking machine state
|
// Backtracking machine state
|
||||||
uint pc, counter;
|
uint pc, counter;
|
||||||
DataIndex lastState = 0; //top of state stack
|
DataIndex lastState = 0; // Top of state stack
|
||||||
static if (!CTregex)
|
|
||||||
uint infiniteNesting;
|
uint infiniteNesting;
|
||||||
size_t[] memory;
|
size_t[] memory;
|
||||||
Trace[] merge;
|
Trace[] merge;
|
||||||
|
@ -69,6 +65,11 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
}
|
}
|
||||||
//local slice of matches, global for backref
|
//local slice of matches, global for backref
|
||||||
Group!DataIndex[] matches, backrefed;
|
Group!DataIndex[] matches, backrefed;
|
||||||
|
size_t _refCount;
|
||||||
|
final:
|
||||||
|
|
||||||
|
override @property ref size_t refCount() { return _refCount; }
|
||||||
|
override @property ref const(RegEx) pattern(){ return re; }
|
||||||
|
|
||||||
static if (__traits(hasMember,Stream, "search"))
|
static if (__traits(hasMember,Stream, "search"))
|
||||||
{
|
{
|
||||||
|
@ -153,49 +154,64 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
memory = memory[2..$];
|
memory = memory[2..$];
|
||||||
}
|
}
|
||||||
|
|
||||||
void initialize(ref RegEx program, Stream stream, void[] memBlock)
|
void initialize(ref const RegEx program, Stream stream, void[] memBlock)
|
||||||
{
|
{
|
||||||
re = program;
|
|
||||||
s = stream;
|
s = stream;
|
||||||
exhausted = false;
|
exhausted = false;
|
||||||
initExternalMemory(memBlock);
|
initExternalMemory(memBlock);
|
||||||
backrefed = null;
|
backrefed = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dupTo(void[] memory)
|
override void dupTo(Matcher!Char m, void[] memBlock)
|
||||||
{
|
{
|
||||||
typeof(this) tmp = this;
|
auto backtracking = cast(BacktrackingMatcher) m;
|
||||||
tmp.initExternalMemory(memory);
|
backtracking.s = s;
|
||||||
return tmp;
|
backtracking.front = front;
|
||||||
|
backtracking.index = index;
|
||||||
|
backtracking.exhausted = exhausted;
|
||||||
|
backtracking.initExternalMemory(memBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
|
this(ref const RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
|
||||||
{
|
{
|
||||||
|
_refCount = 1;
|
||||||
|
re = program;
|
||||||
|
nativeFn = null;
|
||||||
initialize(program, stream, memBlock);
|
initialize(program, stream, memBlock);
|
||||||
front = ch;
|
front = ch;
|
||||||
index = idx;
|
index = idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
this(ref RegEx program, Stream stream, void[] memBlock)
|
this(ref const RegEx program, MatchFn func, Stream stream, void[] memBlock)
|
||||||
{
|
{
|
||||||
|
_refCount = 1;
|
||||||
|
re = program;
|
||||||
|
initialize(program, stream, memBlock);
|
||||||
|
nativeFn = func;
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
this(ref const RegEx program, Stream stream, void[] memBlock)
|
||||||
|
{
|
||||||
|
_refCount = 1;
|
||||||
|
re = program;
|
||||||
|
nativeFn = null;
|
||||||
initialize(program, stream, memBlock);
|
initialize(program, stream, memBlock);
|
||||||
next();
|
next();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
|
auto fwdMatcher(ref const RegEx re, void[] memBlock)
|
||||||
{
|
{
|
||||||
alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
|
alias BackMatcher = BacktrackingMatcher!(Char, Stream);
|
||||||
alias BackMatcher = BackMatcherTempl!(Char, Stream);
|
auto fwdMatcher = new BackMatcher(re, s, memBlock, front, index);
|
||||||
auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index);
|
|
||||||
return fwdMatcher;
|
return fwdMatcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto bwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock)
|
auto bwdMatcher(ref const RegEx re, void[] memBlock)
|
||||||
{
|
{
|
||||||
alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
|
alias BackMatcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
|
||||||
alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index)));
|
|
||||||
auto fwdMatcher =
|
auto fwdMatcher =
|
||||||
BackMatcher(matcher.re, s.loopBack(index), memBlock);
|
new BackMatcher(re, s.loopBack(index), memBlock);
|
||||||
return fwdMatcher;
|
return fwdMatcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,7 +235,7 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
}
|
}
|
||||||
|
|
||||||
//lookup next match, fill matches with indices into input
|
//lookup next match, fill matches with indices into input
|
||||||
int match(Group!DataIndex[] matches)
|
override int match(Group!DataIndex[] matches)
|
||||||
{
|
{
|
||||||
debug(std_regex_matcher)
|
debug(std_regex_matcher)
|
||||||
{
|
{
|
||||||
|
@ -293,7 +309,7 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
+/
|
+/
|
||||||
int matchImpl()
|
int matchImpl()
|
||||||
{
|
{
|
||||||
static if (CTregex && is(typeof(nativeFn(this))))
|
if (nativeFn)
|
||||||
{
|
{
|
||||||
debug(std_regex_ctr) writeln("using C-T matcher");
|
debug(std_regex_ctr) writeln("using C-T matcher");
|
||||||
return nativeFn(this);
|
return nativeFn(this);
|
||||||
|
@ -303,6 +319,7 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
pc = 0;
|
pc = 0;
|
||||||
counter = 0;
|
counter = 0;
|
||||||
lastState = 0;
|
lastState = 0;
|
||||||
|
infiniteNesting = 0;
|
||||||
matches[] = Group!DataIndex.init;
|
matches[] = Group!DataIndex.init;
|
||||||
auto start = s._index;
|
auto start = s._index;
|
||||||
debug(std_regex_matcher)
|
debug(std_regex_matcher)
|
||||||
|
@ -580,19 +597,19 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
|
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
|
||||||
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
|
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
|
||||||
scope(exit) free(mem.ptr);
|
scope(exit) free(mem.ptr);
|
||||||
|
auto slicedRe = re.withCode(re.ir[
|
||||||
|
pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
|
||||||
|
]);
|
||||||
static if (Stream.isLoopback)
|
static if (Stream.isLoopback)
|
||||||
{
|
{
|
||||||
auto matcher = bwdMatcher(this, mem);
|
auto matcher = bwdMatcher(slicedRe, mem);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto matcher = fwdMatcher(this, mem);
|
auto matcher = fwdMatcher(slicedRe, mem);
|
||||||
}
|
}
|
||||||
matcher.matches = matches[ms .. me];
|
matcher.matches = matches[ms .. me];
|
||||||
matcher.backrefed = backrefed.empty ? matches : backrefed;
|
matcher.backrefed = backrefed.empty ? matches : backrefed;
|
||||||
matcher.re.ir = re.ir[
|
|
||||||
pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
|
|
||||||
];
|
|
||||||
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
|
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
|
||||||
s.reset(save);
|
s.reset(save);
|
||||||
next();
|
next();
|
||||||
|
@ -609,20 +626,20 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
|
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
|
||||||
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
|
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
|
||||||
scope(exit) free(mem.ptr);
|
scope(exit) free(mem.ptr);
|
||||||
|
auto slicedRe = re.withCode(re.ir[
|
||||||
|
pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
|
||||||
|
]);
|
||||||
static if (Stream.isLoopback)
|
static if (Stream.isLoopback)
|
||||||
{
|
{
|
||||||
alias Matcher = BacktrackingMatcher!(Char, Stream);
|
alias Matcher = BacktrackingMatcher!(Char, Stream);
|
||||||
auto matcher = Matcher(re, s, mem, front, index);
|
auto matcher = new Matcher(slicedRe, s, mem, front, index);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
|
alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
|
||||||
auto matcher = Matcher(re, s.loopBack(index), mem);
|
auto matcher = new Matcher(slicedRe, s.loopBack(index), mem);
|
||||||
}
|
}
|
||||||
matcher.matches = matches[ms .. me];
|
matcher.matches = matches[ms .. me];
|
||||||
matcher.re.ir = re.ir[
|
|
||||||
pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
|
|
||||||
];
|
|
||||||
matcher.backrefed = backrefed.empty ? matches : backrefed;
|
matcher.backrefed = backrefed.empty ? matches : backrefed;
|
||||||
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
|
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
|
||||||
if (!match)
|
if (!match)
|
||||||
|
@ -715,9 +732,6 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
|
val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
|
||||||
debug(std_regex_matcher) writeln("pop array SP= ", lastState);
|
debug(std_regex_matcher) writeln("pop array SP= ", lastState);
|
||||||
}
|
}
|
||||||
|
|
||||||
static if (!CTregex)
|
|
||||||
{
|
|
||||||
//helper function, saves engine state
|
//helper function, saves engine state
|
||||||
void pushState(uint pc, uint counter)
|
void pushState(uint pc, uint counter)
|
||||||
{
|
{
|
||||||
|
@ -763,8 +777,6 @@ template BacktrackingMatcher(bool CTregex)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//very shitty string formatter, $$ replaced with next argument converted to string
|
//very shitty string formatter, $$ replaced with next argument converted to string
|
||||||
@trusted string ctSub( U...)(string format, U args)
|
@trusted string ctSub( U...)(string format, U args)
|
||||||
|
@ -805,7 +817,7 @@ struct CtContext
|
||||||
//to mark the portion of matches to save
|
//to mark the portion of matches to save
|
||||||
int match, total_matches;
|
int match, total_matches;
|
||||||
int reserved;
|
int reserved;
|
||||||
CodepointSet[] charsets;
|
const(CodepointInterval)[][] charsets;
|
||||||
|
|
||||||
|
|
||||||
//state of codegenerator
|
//state of codegenerator
|
||||||
|
@ -815,12 +827,15 @@ struct CtContext
|
||||||
int addr;
|
int addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
this(Char)(Regex!Char re)
|
this(Char)(const Regex!Char re)
|
||||||
{
|
{
|
||||||
match = 1;
|
match = 1;
|
||||||
reserved = 1; //first match is skipped
|
reserved = 1; //first match is skipped
|
||||||
total_matches = re.ngroup;
|
total_matches = re.ngroup;
|
||||||
charsets = re.charsets;
|
foreach (ref set; re.charsets)
|
||||||
|
{
|
||||||
|
charsets ~= set.intervals;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CtContext lookaround(uint s, uint e)
|
CtContext lookaround(uint s, uint e)
|
||||||
|
@ -876,7 +891,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
CtState ctGenBlock(Bytecode[] ir, int addr)
|
CtState ctGenBlock(const(Bytecode)[] ir, int addr)
|
||||||
{
|
{
|
||||||
CtState result;
|
CtState result;
|
||||||
result.addr = addr;
|
result.addr = addr;
|
||||||
|
@ -890,7 +905,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
CtState ctGenGroup(ref Bytecode[] ir, int addr)
|
CtState ctGenGroup(ref const(Bytecode)[] ir, int addr)
|
||||||
{
|
{
|
||||||
import std.algorithm.comparison : max;
|
import std.algorithm.comparison : max;
|
||||||
auto bailOut = "goto L_backtrack;";
|
auto bailOut = "goto L_backtrack;";
|
||||||
|
@ -932,10 +947,10 @@ struct CtContext
|
||||||
immutable len = ir[0].data;
|
immutable len = ir[0].data;
|
||||||
immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
|
immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
|
||||||
immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
|
immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
|
||||||
string fwdType = "typeof(fwdMatcher(matcher, []))";
|
string fwdType = "typeof(fwdMatcher(re, []))";
|
||||||
string bwdType = "typeof(bwdMatcher(matcher, []))";
|
string bwdType = "typeof(bwdMatcher(re, []))";
|
||||||
string fwdCreate = "fwdMatcher(matcher, mem)";
|
string fwdCreate = "fwdMatcher(re, mem)";
|
||||||
string bwdCreate = "bwdMatcher(matcher, mem)";
|
string bwdCreate = "bwdMatcher(re, mem)";
|
||||||
immutable start = IRL!(IR.LookbehindStart);
|
immutable start = IRL!(IR.LookbehindStart);
|
||||||
immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
|
immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
|
||||||
CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
|
CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
|
||||||
|
@ -946,7 +961,7 @@ struct CtContext
|
||||||
alias Lookaround = $$;
|
alias Lookaround = $$;
|
||||||
else
|
else
|
||||||
alias Lookaround = $$;
|
alias Lookaround = $$;
|
||||||
static bool matcher_$$(ref Lookaround matcher) @trusted
|
static bool matcher_$$(Lookaround matcher) @trusted
|
||||||
{
|
{
|
||||||
//(neg)lookaround piece start
|
//(neg)lookaround piece start
|
||||||
$$
|
$$
|
||||||
|
@ -992,7 +1007,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//generate source for bytecode contained in OrStart ... OrEnd
|
//generate source for bytecode contained in OrStart ... OrEnd
|
||||||
CtState ctGenAlternation(Bytecode[] ir, int addr)
|
CtState ctGenAlternation(const(Bytecode)[] ir, int addr)
|
||||||
{
|
{
|
||||||
CtState[] pieces;
|
CtState[] pieces;
|
||||||
CtState r;
|
CtState r;
|
||||||
|
@ -1032,11 +1047,11 @@ struct CtContext
|
||||||
|
|
||||||
// generate fixup code for instruction in ir,
|
// generate fixup code for instruction in ir,
|
||||||
// fixup means it has an alternative way for control flow
|
// fixup means it has an alternative way for control flow
|
||||||
string ctGenFixupCode(Bytecode[] ir, int addr, int fixup)
|
string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup)
|
||||||
{
|
{
|
||||||
return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
|
return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
|
||||||
}
|
}
|
||||||
string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup)
|
string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup)
|
||||||
{
|
{
|
||||||
string r;
|
string r;
|
||||||
string testCode;
|
string testCode;
|
||||||
|
@ -1190,7 +1205,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
string ctQuickTest(Bytecode[] ir, int id)
|
string ctQuickTest(const(Bytecode)[] ir, int id)
|
||||||
{
|
{
|
||||||
uint pc = 0;
|
uint pc = 0;
|
||||||
while (pc < ir.length && ir[pc].isAtom)
|
while (pc < ir.length && ir[pc].isAtom)
|
||||||
|
@ -1217,7 +1232,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//process & generate source for simple bytecodes at front of ir using address addr
|
//process & generate source for simple bytecodes at front of ir using address addr
|
||||||
CtState ctGenAtom(ref Bytecode[] ir, int addr)
|
CtState ctGenAtom(ref const(Bytecode)[] ir, int addr)
|
||||||
{
|
{
|
||||||
CtState result;
|
CtState result;
|
||||||
result.code = ctAtomCode(ir, addr);
|
result.code = ctAtomCode(ir, addr);
|
||||||
|
@ -1227,7 +1242,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//D code for atom at ir using address addr, addr < 0 means quickTest
|
//D code for atom at ir using address addr, addr < 0 means quickTest
|
||||||
string ctAtomCode(Bytecode[] ir, int addr)
|
string ctAtomCode(const(Bytecode)[] ir, int addr)
|
||||||
{
|
{
|
||||||
string code;
|
string code;
|
||||||
string bailOut, nextInstr;
|
string bailOut, nextInstr;
|
||||||
|
@ -1282,7 +1297,7 @@ struct CtContext
|
||||||
if (charsets.length)
|
if (charsets.length)
|
||||||
{
|
{
|
||||||
string name = `func_`~to!string(addr+1);
|
string name = `func_`~to!string(addr+1);
|
||||||
string funcCode = charsets[ir[0].data].toSourceCode(name);
|
string funcCode = CodepointSet.toSourceCode(charsets[ir[0].data], name);
|
||||||
code ~= ctSub( `
|
code ~= ctSub( `
|
||||||
static $$
|
static $$
|
||||||
if (atEnd || !$$(front))
|
if (atEnd || !$$(front))
|
||||||
|
@ -1298,7 +1313,7 @@ struct CtContext
|
||||||
$$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
|
$$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
|
||||||
break;
|
break;
|
||||||
case IR.Trie:
|
case IR.Trie:
|
||||||
if (charsets.length && charsets[ir[0].data].byInterval.length <= 8)
|
if (charsets.length && charsets[ir[0].data].length <= 8)
|
||||||
goto case IR.CodepointSet;
|
goto case IR.CodepointSet;
|
||||||
code ~= ctSub( `
|
code ~= ctSub( `
|
||||||
if (atEnd || !re.matchers[$$][front])
|
if (atEnd || !re.matchers[$$][front])
|
||||||
|
@ -1439,7 +1454,7 @@ struct CtContext
|
||||||
}
|
}
|
||||||
|
|
||||||
//generate D code for the whole regex
|
//generate D code for the whole regex
|
||||||
public string ctGenRegEx(Bytecode[] ir)
|
public string ctGenRegEx(const(Bytecode)[] ir)
|
||||||
{
|
{
|
||||||
auto bdy = ctGenBlock(ir, 0);
|
auto bdy = ctGenBlock(ir, 0);
|
||||||
auto r = `
|
auto r = `
|
||||||
|
@ -1488,7 +1503,7 @@ struct CtContext
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
string ctGenRegExCode(Char)(Regex!Char re)
|
string ctGenRegExCode(Char)(const Regex!Char re)
|
||||||
{
|
{
|
||||||
auto context = CtContext(re);
|
auto context = CtContext(re);
|
||||||
return context.ctGenRegEx(re.ir);
|
return context.ctGenRegEx(re.ir);
|
||||||
|
|
|
@ -423,6 +423,134 @@ struct Group(DataIndex)
|
||||||
writeln("\t", disassemble(slice, pc, dict));
|
writeln("\t", disassemble(slice, pc, dict));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Encapsulates memory management, explicit ref counting
|
||||||
|
// and the exact type of engine created
|
||||||
|
// there is a single instance per engine combination type x Char
|
||||||
|
// In future may also maintain a (TLS?) cache of memory
|
||||||
|
interface MatcherFactory(Char)
|
||||||
|
{
|
||||||
|
@safe:
|
||||||
|
Matcher!Char create(const Regex!Char, in Char[] input) const;
|
||||||
|
Matcher!Char dup(Matcher!Char m, in Char[] input) const;
|
||||||
|
size_t incRef(Matcher!Char m) const;
|
||||||
|
size_t decRef(Matcher!Char m) const;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only memory management, no compile-time vs run-time specialities
|
||||||
|
abstract class GenericFactory(alias EngineType, Char) : MatcherFactory!Char
|
||||||
|
{
|
||||||
|
import core.stdc.stdlib : malloc, free;
|
||||||
|
import core.memory : GC;
|
||||||
|
enum classSize = __traits(classInstanceSize, EngineType!Char);
|
||||||
|
|
||||||
|
Matcher!Char construct(const Regex!Char re, in Char[] input, void[] memory) const;
|
||||||
|
|
||||||
|
override Matcher!Char create(const Regex!Char re, in Char[] input) const @trusted
|
||||||
|
{
|
||||||
|
immutable size = EngineType!Char.initialMemory(re) + classSize;
|
||||||
|
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
||||||
|
scope(failure) free(memory.ptr);
|
||||||
|
GC.addRange(memory.ptr, classSize);
|
||||||
|
auto engine = construct(re, input, memory);
|
||||||
|
assert(engine.refCount == 1);
|
||||||
|
assert(cast(void*) engine == memory.ptr);
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
override Matcher!Char dup(Matcher!Char engine, in Char[] input) const @trusted
|
||||||
|
{
|
||||||
|
immutable size = EngineType!Char.initialMemory(engine.pattern) + classSize;
|
||||||
|
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
||||||
|
scope(failure) free(memory.ptr);
|
||||||
|
auto copy = construct(engine.pattern, input, memory);
|
||||||
|
GC.addRange(memory.ptr, classSize);
|
||||||
|
engine.dupTo(copy, memory[classSize .. size]);
|
||||||
|
assert(copy.refCount == 1);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
override size_t incRef(Matcher!Char m) const
|
||||||
|
{
|
||||||
|
return ++m.refCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
override size_t decRef(Matcher!Char m) const @trusted
|
||||||
|
{
|
||||||
|
assert(m.refCount != 0);
|
||||||
|
auto cnt = --m.refCount;
|
||||||
|
if (cnt == 0)
|
||||||
|
{
|
||||||
|
void* ptr = cast(void*) m;
|
||||||
|
GC.removeRange(ptr);
|
||||||
|
free(ptr);
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A factory for run-time engines
|
||||||
|
class RuntimeFactory(alias EngineType, Char) : GenericFactory!(EngineType, Char)
|
||||||
|
{
|
||||||
|
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
|
||||||
|
{
|
||||||
|
import std.conv : emplace;
|
||||||
|
return emplace!(EngineType!Char)(memory[0 .. classSize],
|
||||||
|
re, Input!Char(input), memory[classSize .. $]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A factory for compile-time engine
|
||||||
|
class CtfeFactory(alias EngineType, Char, alias func) : GenericFactory!(EngineType, Char)
|
||||||
|
{
|
||||||
|
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
|
||||||
|
{
|
||||||
|
import std.conv : emplace;
|
||||||
|
return emplace!(EngineType!Char)(memory[0 .. classSize],
|
||||||
|
re, &func, Input!Char(input), memory[classSize .. $]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A workaround for R-T enum re = regex(...)
|
||||||
|
template defaultFactory(Char)
|
||||||
|
{
|
||||||
|
@property MatcherFactory!Char defaultFactory(const Regex!Char re)
|
||||||
|
{
|
||||||
|
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||||
|
import std.regex.internal.thompson : ThompsonMatcher;
|
||||||
|
import std.algorithm.searching : canFind;
|
||||||
|
static MatcherFactory!Char backtrackingFactory;
|
||||||
|
static MatcherFactory!Char thompsonFactory;
|
||||||
|
if (re.backrefed.canFind!"a != 0")
|
||||||
|
{
|
||||||
|
if (backtrackingFactory is null)
|
||||||
|
backtrackingFactory = new RuntimeFactory!(BacktrackingMatcher, Char);
|
||||||
|
return backtrackingFactory;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (thompsonFactory is null)
|
||||||
|
thompsonFactory = new RuntimeFactory!(ThompsonMatcher, Char);
|
||||||
|
return thompsonFactory;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defining it as an interface has the undesired side-effect:
|
||||||
|
// casting any class to an interface silently adjusts pointer to point to a nested vtbl
|
||||||
|
abstract class Matcher(Char)
|
||||||
|
{
|
||||||
|
abstract:
|
||||||
|
// Get a (next) match
|
||||||
|
int match(Group!size_t[] matches);
|
||||||
|
// This only maintains internal ref-count,
|
||||||
|
// deallocation happens inside MatcherFactory
|
||||||
|
@property ref size_t refCount() @safe;
|
||||||
|
// Copy internal state to another engine, using memory arena 'memory'
|
||||||
|
void dupTo(Matcher!Char m, void[] memory);
|
||||||
|
// The pattern loaded
|
||||||
|
@property ref const(Regex!Char) pattern() @safe;
|
||||||
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
$(D Regex) object holds regular expression pattern in compiled form.
|
$(D Regex) object holds regular expression pattern in compiled form.
|
||||||
Instances of this object are constructed via calls to $(D regex).
|
Instances of this object are constructed via calls to $(D regex).
|
||||||
|
@ -443,11 +571,11 @@ struct Regex(Char)
|
||||||
static struct NamedGroupRange
|
static struct NamedGroupRange
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
NamedGroup[] groups;
|
const(NamedGroup)[] groups;
|
||||||
size_t start;
|
size_t start;
|
||||||
size_t end;
|
size_t end;
|
||||||
public:
|
public:
|
||||||
this(NamedGroup[] g, size_t s, size_t e)
|
this(const(NamedGroup)[] g, size_t s, size_t e)
|
||||||
{
|
{
|
||||||
assert(s <= e);
|
assert(s <= e);
|
||||||
assert(e <= g.length);
|
assert(e <= g.length);
|
||||||
|
@ -485,7 +613,7 @@ struct Regex(Char)
|
||||||
|
|
||||||
package(std.regex):
|
package(std.regex):
|
||||||
import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
|
import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
|
||||||
NamedGroup[] dict; // maps name -> user group number
|
const(NamedGroup)[] dict; // maps name -> user group number
|
||||||
uint ngroup; // number of internal groups
|
uint ngroup; // number of internal groups
|
||||||
uint maxCounterDepth; // max depth of nested {n,m} repetitions
|
uint maxCounterDepth; // max depth of nested {n,m} repetitions
|
||||||
uint hotspotTableSize; // number of entries in merge table
|
uint hotspotTableSize; // number of entries in merge table
|
||||||
|
@ -495,6 +623,35 @@ package(std.regex):
|
||||||
public const(BitTable)[] filters; // bloom filters for conditional loops
|
public const(BitTable)[] filters; // bloom filters for conditional loops
|
||||||
uint[] backrefed; // bit array of backreferenced submatches
|
uint[] backrefed; // bit array of backreferenced submatches
|
||||||
Kickstart!Char kickstart;
|
Kickstart!Char kickstart;
|
||||||
|
MatcherFactory!Char factory; // produces optimal matcher for this pattern
|
||||||
|
|
||||||
|
const(Regex) withFactory(MatcherFactory!Char factory) pure const @trusted
|
||||||
|
{
|
||||||
|
auto r = cast() this;
|
||||||
|
r.factory = factory;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
const(Regex) withFlags(uint newFlags) pure const @trusted
|
||||||
|
{
|
||||||
|
auto r = cast() this;
|
||||||
|
r.flags = newFlags;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
const(Regex) withCode(const(Bytecode)[] code) pure const @trusted
|
||||||
|
{
|
||||||
|
auto r = cast() this;
|
||||||
|
r.ir = code.dup; // TODO: sidestep const instead?
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
const(Regex) withNGroup(uint nGroup) pure const @trusted
|
||||||
|
{
|
||||||
|
auto r = cast() this;
|
||||||
|
r.ngroup = nGroup;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
//bit access helper
|
//bit access helper
|
||||||
uint isBackref(uint n)
|
uint isBackref(uint n)
|
||||||
|
@ -537,26 +694,6 @@ package(std.regex):
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//@@@BUG@@@ (unreduced) - public makes it inaccessible in std.regex.package (!)
|
|
||||||
/*public*/ struct StaticRegex(Char)
|
|
||||||
{
|
|
||||||
package(std.regex):
|
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
|
||||||
alias Matcher = BacktrackingMatcher!(true);
|
|
||||||
alias MatchFn = bool function(ref Matcher!Char) @trusted;
|
|
||||||
MatchFn nativeFn;
|
|
||||||
public:
|
|
||||||
Regex!Char _regex;
|
|
||||||
alias _regex this;
|
|
||||||
this(Regex!Char re, MatchFn fn)
|
|
||||||
{
|
|
||||||
_regex = re;
|
|
||||||
nativeFn = fn;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// The stuff below this point is temporarrily part of IR module
|
// The stuff below this point is temporarrily part of IR module
|
||||||
// but may need better place in the future (all internals)
|
// but may need better place in the future (all internals)
|
||||||
package(std.regex):
|
package(std.regex):
|
||||||
|
@ -593,7 +730,7 @@ if (is(Char :dchar))
|
||||||
@property bool atEnd(){
|
@property bool atEnd(){
|
||||||
return _index == _origin.length;
|
return _index == _origin.length;
|
||||||
}
|
}
|
||||||
bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos)
|
bool search(Kickstart)(ref const Kickstart kick, ref dchar res, ref size_t pos)
|
||||||
{
|
{
|
||||||
size_t idx = kick.search(_origin, _index);
|
size_t idx = kick.search(_origin, _index);
|
||||||
_index = idx;
|
_index = idx;
|
||||||
|
@ -676,7 +813,7 @@ template BackLooper(E)
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
|
@trusted uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name)
|
||||||
{//equal is @system?
|
{//equal is @system?
|
||||||
import std.algorithm.comparison : equal;
|
import std.algorithm.comparison : equal;
|
||||||
import std.algorithm.iteration : map;
|
import std.algorithm.iteration : map;
|
||||||
|
|
|
@ -393,7 +393,7 @@ public:
|
||||||
// has a useful trait: if supplied with valid UTF indexes,
|
// has a useful trait: if supplied with valid UTF indexes,
|
||||||
// returns only valid UTF indexes
|
// returns only valid UTF indexes
|
||||||
// (that given the haystack in question is valid UTF string)
|
// (that given the haystack in question is valid UTF string)
|
||||||
@trusted size_t search(const(Char)[] haystack, size_t idx)
|
@trusted size_t search(const(Char)[] haystack, size_t idx) const
|
||||||
{//@BUG: apparently assumes little endian machines
|
{//@BUG: apparently assumes little endian machines
|
||||||
import core.stdc.string : memchr;
|
import core.stdc.string : memchr;
|
||||||
import std.conv : text;
|
import std.conv : text;
|
||||||
|
|
|
@ -12,7 +12,11 @@ static import std.ascii;
|
||||||
// package relevant info from parser into a regex object
|
// package relevant info from parser into a regex object
|
||||||
auto makeRegex(S, CG)(Parser!(S, CG) p)
|
auto makeRegex(S, CG)(Parser!(S, CG) p)
|
||||||
{
|
{
|
||||||
Regex!(BasicElementOf!S) re;
|
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||||
|
import std.regex.internal.thompson : ThompsonMatcher;
|
||||||
|
import std.algorithm.searching : canFind;
|
||||||
|
alias Char = BasicElementOf!S;
|
||||||
|
Regex!Char re;
|
||||||
auto g = p.g;
|
auto g = p.g;
|
||||||
with(re)
|
with(re)
|
||||||
{
|
{
|
||||||
|
@ -25,6 +29,12 @@ auto makeRegex(S, CG)(Parser!(S, CG) p)
|
||||||
matchers = g.matchers;
|
matchers = g.matchers;
|
||||||
backrefed = g.backrefed;
|
backrefed = g.backrefed;
|
||||||
re.postprocess();
|
re.postprocess();
|
||||||
|
// check if we have backreferences, if so - use backtracking
|
||||||
|
if (__ctfe) factory = null; // allows us to use the awful enum re = regex(...);
|
||||||
|
else if (re.backrefed.canFind!"a != 0")
|
||||||
|
factory = new RuntimeFactory!(BacktrackingMatcher, Char);
|
||||||
|
else
|
||||||
|
factory = new RuntimeFactory!(ThompsonMatcher, Char);
|
||||||
debug(std_regex_parser)
|
debug(std_regex_parser)
|
||||||
{
|
{
|
||||||
__ctfe || print();
|
__ctfe || print();
|
||||||
|
|
|
@ -518,11 +518,11 @@ alias Sequence(int B, int E) = staticIota!(B, E);
|
||||||
{
|
{
|
||||||
import std.algorithm.comparison : equal;
|
import std.algorithm.comparison : equal;
|
||||||
auto rtr = regex("a|b|c");
|
auto rtr = regex("a|b|c");
|
||||||
enum ctr = regex("a|b|c");
|
static ctr = regex("a|b|c");
|
||||||
assert(equal(rtr.ir,ctr.ir));
|
assert(equal(rtr.ir,ctr.ir));
|
||||||
//CTFE parser BUG is triggered by group
|
//CTFE parser BUG is triggered by group
|
||||||
//in the middle of alternation (at least not first and not last)
|
//in the middle of alternation (at least not first and not last)
|
||||||
enum testCT = regex(`abc|(edf)|xyz`);
|
static testCT = regex(`abc|(edf)|xyz`);
|
||||||
auto testRT = regex(`abc|(edf)|xyz`);
|
auto testRT = regex(`abc|(edf)|xyz`);
|
||||||
assert(equal(testCT.ir,testRT.ir));
|
assert(equal(testCT.ir,testRT.ir));
|
||||||
}
|
}
|
||||||
|
@ -996,6 +996,36 @@ alias Sequence(int B, int E) = staticIota!(B, E);
|
||||||
assertThrown(regex(`^((x)(?=\1))`));
|
assertThrown(regex(`^((x)(?=\1))`));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bugzilla 13532
|
||||||
|
version(none) // TODO: revist once we have proper benchmark framework
|
||||||
|
@safe unittest
|
||||||
|
{
|
||||||
|
import std.datetime.stopwatch : StopWatch, AutoStart;
|
||||||
|
import std.math : abs;
|
||||||
|
import std.conv : to;
|
||||||
|
enum re1 = ctRegex!`[0-9][0-9]`;
|
||||||
|
immutable static re2 = ctRegex!`[0-9][0-9]`;
|
||||||
|
immutable iterations = 1_000_000;
|
||||||
|
size_t result1 = 0, result2 = 0;
|
||||||
|
auto sw = StopWatch(AutoStart.yes);
|
||||||
|
foreach (_; 0 .. iterations)
|
||||||
|
{
|
||||||
|
result1 += matchFirst("12345678", re1).length;
|
||||||
|
}
|
||||||
|
const staticTime = sw.peek();
|
||||||
|
sw.reset();
|
||||||
|
foreach (_; 0 .. iterations)
|
||||||
|
{
|
||||||
|
result2 += matchFirst("12345678", re2).length;
|
||||||
|
}
|
||||||
|
const enumTime = sw.peek();
|
||||||
|
assert(result1 == result2);
|
||||||
|
auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs";
|
||||||
|
// enum is faster or the diff is less < 30%
|
||||||
|
assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75,
|
||||||
|
"enum regex to static regex ratio "~to!string(ratio));
|
||||||
|
}
|
||||||
|
|
||||||
// bugzilla 14504
|
// bugzilla 14504
|
||||||
@safe unittest
|
@safe unittest
|
||||||
{
|
{
|
||||||
|
|
|
@ -89,7 +89,7 @@ struct ThreadList(DataIndex)
|
||||||
template ThompsonOps(E, S, bool withInput:true)
|
template ThompsonOps(E, S, bool withInput:true)
|
||||||
{
|
{
|
||||||
@trusted:
|
@trusted:
|
||||||
static bool op(IR code:IR.End)(E* e, S* state)
|
static bool op(IR code:IR.End)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -105,7 +105,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Wordboundary)(E* e, S* state)
|
static bool op(IR code:IR.Wordboundary)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -137,7 +137,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Notwordboundary)(E* e, S* state)
|
static bool op(IR code:IR.Notwordboundary)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -167,7 +167,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Bof)(E* e, S* state)
|
static bool op(IR code:IR.Bof)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -183,7 +183,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Bol)(E* e, S* state)
|
static bool op(IR code:IR.Bol)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -203,7 +203,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Eof)(E* e, S* state)
|
static bool op(IR code:IR.Eof)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -219,7 +219,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Eol)(E* e, S* state)
|
static bool op(IR code:IR.Eol)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -240,42 +240,42 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.InfiniteStart)(E* e, S* state)
|
static bool op(IR code:IR.InfiniteStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
|
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
|
||||||
return op!(IR.InfiniteEnd)(e,state);
|
return op!(IR.InfiniteEnd)(e,state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.InfiniteBloomStart)(E* e, S* state)
|
static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
|
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
|
||||||
return op!(IR.InfiniteBloomEnd)(e,state);
|
return op!(IR.InfiniteBloomEnd)(e,state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.InfiniteQStart)(E* e, S* state)
|
static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
|
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
|
||||||
return op!(IR.InfiniteQEnd)(e,state);
|
return op!(IR.InfiniteQEnd)(e,state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.RepeatStart)(E* e, S* state)
|
static bool op(IR code:IR.RepeatStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
|
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
|
||||||
return op!(IR.RepeatEnd)(e,state);
|
return op!(IR.RepeatEnd)(e,state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.RepeatQStart)(E* e, S* state)
|
static bool op(IR code:IR.RepeatQStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
|
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
|
||||||
return op!(IR.RepeatQEnd)(e,state);
|
return op!(IR.RepeatQEnd)(e,state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
|
if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
|
@ -330,7 +330,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
|
if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
|
@ -365,7 +365,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.InfiniteBloomEnd)
|
if (code == IR.InfiniteBloomEnd)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
|
@ -394,7 +394,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.OrEnd)(E* e, S* state)
|
static bool op(IR code:IR.OrEnd)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -415,7 +415,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.OrStart)(E* e, S* state)
|
static bool op(IR code:IR.OrStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -424,7 +424,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Option)(E* e, S* state)
|
static bool op(IR code:IR.Option)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -439,7 +439,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.GotoEndOr)(E* e, S* state)
|
static bool op(IR code:IR.GotoEndOr)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -448,7 +448,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.GroupStart)(E* e, S* state)
|
static bool op(IR code:IR.GroupStart)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -458,7 +458,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static bool op(IR code:IR.GroupEnd)(E* e, S* state)
|
static bool op(IR code:IR.GroupEnd)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -469,7 +469,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Backref)(E* e, S* state)
|
static bool op(IR code:IR.Backref)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -506,7 +506,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
|
if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
|
@ -516,10 +516,9 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
|
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
|
||||||
bool positive = re.ir[t.pc].code == IR.LookbehindStart;
|
bool positive = re.ir[t.pc].code == IR.LookbehindStart;
|
||||||
static if (Stream.isLoopback)
|
static if (Stream.isLoopback)
|
||||||
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||||
else
|
else
|
||||||
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||||
matcher.re.ngroup = me - ms;
|
|
||||||
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
||||||
//backMatch
|
//backMatch
|
||||||
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
|
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
|
||||||
|
@ -534,7 +533,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
|
if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
|
@ -545,10 +544,9 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
|
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
|
||||||
bool positive = re.ir[t.pc].code == IR.LookaheadStart;
|
bool positive = re.ir[t.pc].code == IR.LookaheadStart;
|
||||||
static if (Stream.isLoopback)
|
static if (Stream.isLoopback)
|
||||||
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||||
else
|
else
|
||||||
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||||
matcher.re.ngroup = me - ms;
|
|
||||||
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
||||||
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
|
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
|
||||||
freelist = matcher.freelist;
|
freelist = matcher.freelist;
|
||||||
|
@ -564,7 +562,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
|
if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
|
||||||
code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
|
code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
|
||||||
{
|
{
|
||||||
|
@ -579,13 +577,13 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Nop)(E* e, S* state)
|
static bool op(IR code:IR.Nop)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(state) t.pc += IRL!(IR.Nop);
|
with(state) t.pc += IRL!(IR.Nop);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.OrChar)(E* e, S* state)
|
static bool op(IR code:IR.OrChar)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -607,7 +605,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Char)(E* e, S* state)
|
static bool op(IR code:IR.Char)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -623,7 +621,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Any)(E* e, S* state)
|
static bool op(IR code:IR.Any)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -634,7 +632,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.CodepointSet)(E* e, S* state)
|
static bool op(IR code:IR.CodepointSet)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -652,7 +650,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool op(IR code:IR.Trie)(E* e, S* state)
|
static bool op(IR code:IR.Trie)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -676,7 +674,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
||||||
{
|
{
|
||||||
@trusted:
|
@trusted:
|
||||||
// can't match these without input
|
// can't match these without input
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
|
if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
|
||||||
|| code == IR.Trie || code == IR.Char || code == IR.Any)
|
|| code == IR.Trie || code == IR.Char || code == IR.Any)
|
||||||
{
|
{
|
||||||
|
@ -684,7 +682,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// special case of zero-width backref
|
// special case of zero-width backref
|
||||||
static bool op(IR code:IR.Backref)(E* e, S* state)
|
static bool op(IR code:IR.Backref)(E e, S* state)
|
||||||
{
|
{
|
||||||
with(e) with(state)
|
with(e) with(state)
|
||||||
{
|
{
|
||||||
|
@ -702,7 +700,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// forward all control flow to normal versions
|
// forward all control flow to normal versions
|
||||||
static bool op(IR code)(E* e, S* state)
|
static bool op(IR code)(E e, S* state)
|
||||||
if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
|
if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
|
||||||
&& code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
|
&& code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
|
||||||
{
|
{
|
||||||
|
@ -714,19 +712,19 @@ template ThompsonOps(E,S, bool withInput:false)
|
||||||
Thomspon matcher does all matching in lockstep,
|
Thomspon matcher does all matching in lockstep,
|
||||||
never looking at the same char twice
|
never looking at the same char twice
|
||||||
+/
|
+/
|
||||||
@trusted struct ThompsonMatcher(Char, StreamType = Input!Char)
|
@trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
|
||||||
if (is(Char : dchar))
|
if (is(Char : dchar))
|
||||||
{
|
{
|
||||||
alias DataIndex = Stream.DataIndex;
|
alias DataIndex = Stream.DataIndex;
|
||||||
alias Stream = StreamType;
|
alias Stream = StreamType;
|
||||||
alias OpFunc = bool function(ThompsonMatcher*, State*);
|
alias OpFunc = bool function(ThompsonMatcher, State*);
|
||||||
alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
|
alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
|
||||||
alias OpBackFunc = bool function(BackMatcher*, BackMatcher.State*);
|
alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*);
|
||||||
Thread!DataIndex* freelist;
|
Thread!DataIndex* freelist;
|
||||||
ThreadList!DataIndex clist, nlist;
|
ThreadList!DataIndex clist, nlist;
|
||||||
DataIndex[] merge;
|
DataIndex[] merge;
|
||||||
Group!DataIndex[] backrefed;
|
Group!DataIndex[] backrefed;
|
||||||
Regex!Char re; //regex program
|
const Regex!Char re; //regex program
|
||||||
Stream s;
|
Stream s;
|
||||||
dchar front;
|
dchar front;
|
||||||
DataIndex index;
|
DataIndex index;
|
||||||
|
@ -737,16 +735,18 @@ if (is(Char : dchar))
|
||||||
OpBackFunc[] opCacheBackTrue; // ditto
|
OpBackFunc[] opCacheBackTrue; // ditto
|
||||||
OpBackFunc[] opCacheBackFalse; // ditto
|
OpBackFunc[] opCacheBackFalse; // ditto
|
||||||
size_t threadSize;
|
size_t threadSize;
|
||||||
|
size_t _refCount;
|
||||||
int matched;
|
int matched;
|
||||||
bool exhausted;
|
bool exhausted;
|
||||||
|
|
||||||
|
final:
|
||||||
static struct State
|
static struct State
|
||||||
{
|
{
|
||||||
Thread!DataIndex* t;
|
Thread!DataIndex* t;
|
||||||
ThreadList!DataIndex worklist;
|
ThreadList!DataIndex worklist;
|
||||||
Group!DataIndex[] matches;
|
Group!DataIndex[] matches;
|
||||||
|
|
||||||
bool popState(E)(E* e)
|
bool popState(E)(E e)
|
||||||
{
|
{
|
||||||
with(e)
|
with(e)
|
||||||
{
|
{
|
||||||
|
@ -784,6 +784,10 @@ if (is(Char : dchar))
|
||||||
//true if it's end of input
|
//true if it's end of input
|
||||||
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
|
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
|
||||||
|
|
||||||
|
override @property ref size_t refCount() @safe { return _refCount; }
|
||||||
|
|
||||||
|
override @property ref const(Regex!Char) pattern() @safe { return re; }
|
||||||
|
|
||||||
bool next()
|
bool next()
|
||||||
{
|
{
|
||||||
if (!s.nextChar(front, index))
|
if (!s.nextChar(front, index))
|
||||||
|
@ -843,19 +847,28 @@ if (is(Char : dchar))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this()(Regex!Char program, Stream stream, void[] memory)
|
this()(const Regex!Char program, Stream stream, void[] memory)
|
||||||
{
|
{
|
||||||
|
// We are emplace'd to malloced memory w/o blitting T.init over it\
|
||||||
|
// make sure we initialize all fields explicitly
|
||||||
|
_refCount = 1;
|
||||||
|
subCounters = null;
|
||||||
|
backrefed = null;
|
||||||
|
exhausted = false;
|
||||||
|
matched = 0;
|
||||||
re = program;
|
re = program;
|
||||||
s = stream;
|
s = stream;
|
||||||
initExternalMemory(memory);
|
initExternalMemory(memory);
|
||||||
genCounter = 0;
|
genCounter = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
this(ref ThompsonMatcher matcher, size_t lo, size_t hi, Stream stream)
|
this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
|
||||||
{
|
{
|
||||||
|
_refCount = 1;
|
||||||
|
subCounters = matcher.subCounters;
|
||||||
s = stream;
|
s = stream;
|
||||||
re = matcher.re;
|
auto code = matcher.re.ir[lo .. hi];
|
||||||
re.ir = re.ir[lo .. hi];
|
re = matcher.re.withCode(code).withNGroup(nGroup);
|
||||||
threadSize = matcher.threadSize;
|
threadSize = matcher.threadSize;
|
||||||
merge = matcher.merge;
|
merge = matcher.merge;
|
||||||
freelist = matcher.freelist;
|
freelist = matcher.freelist;
|
||||||
|
@ -867,11 +880,13 @@ if (is(Char : dchar))
|
||||||
index = matcher.index;
|
index = matcher.index;
|
||||||
}
|
}
|
||||||
|
|
||||||
this(ref BackMatcher matcher, size_t lo, size_t hi, Stream stream)
|
this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
|
||||||
{
|
{
|
||||||
|
_refCount = 1;
|
||||||
|
subCounters = matcher.subCounters;
|
||||||
s = stream;
|
s = stream;
|
||||||
re = matcher.re;
|
auto code = matcher.re.ir[lo .. hi];
|
||||||
re.ir = re.ir[lo .. hi];
|
re = matcher.re.withCode(code).withNGroup(nGroup);
|
||||||
threadSize = matcher.threadSize;
|
threadSize = matcher.threadSize;
|
||||||
merge = matcher.merge;
|
merge = matcher.merge;
|
||||||
freelist = matcher.freelist;
|
freelist = matcher.freelist;
|
||||||
|
@ -883,31 +898,35 @@ if (is(Char : dchar))
|
||||||
index = matcher.index;
|
index = matcher.index;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fwdMatcher()(size_t lo, size_t hi, size_t counter)
|
auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
|
||||||
{
|
{
|
||||||
auto m = ThompsonMatcher!(Char, Stream)(this, lo, hi, s);
|
auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
|
||||||
m.genCounter = counter;
|
m.genCounter = counter;
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto bwdMatcher()(size_t lo, size_t hi, size_t counter)
|
auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
|
||||||
{
|
{
|
||||||
alias BackLooper = typeof(s.loopBack(index));
|
alias BackLooper = typeof(s.loopBack(index));
|
||||||
auto m = ThompsonMatcher!(Char, BackLooper)(this, lo, hi, s.loopBack(index));
|
auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
|
||||||
m.genCounter = counter;
|
m.genCounter = counter;
|
||||||
m.next();
|
m.next();
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dupTo(void[] memory)
|
override void dupTo(Matcher!Char engine, void[] memory)
|
||||||
{
|
{
|
||||||
typeof(this) tmp = this;//bitblit
|
auto thompson = cast(ThompsonMatcher) engine;
|
||||||
tmp.initExternalMemory(memory);
|
thompson.s = s;
|
||||||
tmp.genCounter = 0;
|
thompson.subCounters = null;
|
||||||
return tmp;
|
thompson.front = front;
|
||||||
|
thompson.index = index;
|
||||||
|
thompson.matched = matched;
|
||||||
|
thompson.exhausted = exhausted;
|
||||||
|
thompson.initExternalMemory(memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
int match(Group!DataIndex[] matches)
|
override int match(Group!DataIndex[] matches)
|
||||||
{
|
{
|
||||||
debug(std_regex_matcher)
|
debug(std_regex_matcher)
|
||||||
writeln("------------------------------------------");
|
writeln("------------------------------------------");
|
||||||
|
@ -1052,9 +1071,9 @@ if (is(Char : dchar))
|
||||||
{
|
{
|
||||||
debug(std_regex_matcher) writeln("---- Evaluating thread");
|
debug(std_regex_matcher) writeln("---- Evaluating thread");
|
||||||
static if (withInput)
|
static if (withInput)
|
||||||
while (opCacheTrue.ptr[state.t.pc](&this, state)){}
|
while (opCacheTrue.ptr[state.t.pc](this, state)){}
|
||||||
else
|
else
|
||||||
while (opCacheFalse.ptr[state.t.pc](&this, state)){}
|
while (opCacheFalse.ptr[state.t.pc](this, state)){}
|
||||||
}
|
}
|
||||||
enum uint RestartPc = uint.max;
|
enum uint RestartPc = uint.max;
|
||||||
//match the input, evaluating IR without searching
|
//match the input, evaluating IR without searching
|
||||||
|
|
|
@ -298,7 +298,6 @@ module std.regex;
|
||||||
|
|
||||||
import std.range.primitives, std.traits;
|
import std.range.primitives, std.traits;
|
||||||
import std.regex.internal.ir;
|
import std.regex.internal.ir;
|
||||||
import std.regex.internal.thompson; //TODO: get rid of this dependency
|
|
||||||
import std.typecons; // : Flag, Yes, No;
|
import std.typecons; // : Flag, Yes, No;
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
@ -339,10 +338,9 @@ public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
|
||||||
A $(D StaticRegex) is $(D Regex) object that contains D code specially
|
A $(D StaticRegex) is $(D Regex) object that contains D code specially
|
||||||
generated at compile-time to speed up matching.
|
generated at compile-time to speed up matching.
|
||||||
|
|
||||||
Implicitly convertible to normal $(D Regex),
|
No longer used, kept as alias to Regex for backwards compatibility.
|
||||||
however doing so will result in losing this additional capability.
|
|
||||||
+/
|
+/
|
||||||
public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
|
public alias StaticRegex = Regex;
|
||||||
|
|
||||||
/++
|
/++
|
||||||
Compile regular expression pattern for the later execution.
|
Compile regular expression pattern for the later execution.
|
||||||
|
@ -428,16 +426,25 @@ if (isSomeString!(S))
|
||||||
template ctRegexImpl(alias pattern, string flags=[])
|
template ctRegexImpl(alias pattern, string flags=[])
|
||||||
{
|
{
|
||||||
import std.regex.internal.backtracking, std.regex.internal.parser;
|
import std.regex.internal.backtracking, std.regex.internal.parser;
|
||||||
enum r = regex(pattern, flags);
|
static immutable r = cast(immutable) regex(pattern, flags);
|
||||||
alias Char = BasicElementOf!(typeof(pattern));
|
alias Char = BasicElementOf!(typeof(pattern));
|
||||||
enum source = ctGenRegExCode(r);
|
enum source = ctGenRegExCode(r);
|
||||||
alias Matcher = BacktrackingMatcher!(true);
|
@trusted bool func(BacktrackingMatcher!Char matcher)
|
||||||
@trusted bool func(ref Matcher!Char matcher)
|
|
||||||
{
|
{
|
||||||
debug(std_regex_ctr) pragma(msg, source);
|
debug(std_regex_ctr) pragma(msg, source);
|
||||||
|
cast(void) matcher;
|
||||||
mixin(source);
|
mixin(source);
|
||||||
}
|
}
|
||||||
enum nr = StaticRegex!Char(r, &func);
|
static immutable staticRe =
|
||||||
|
cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
|
||||||
|
struct Wrapper
|
||||||
|
{
|
||||||
|
// allow code that expects mutable Regex to still work
|
||||||
|
// we stay "logically const"
|
||||||
|
@trusted @property auto getRe() const { return cast() staticRe; }
|
||||||
|
alias getRe this;
|
||||||
|
}
|
||||||
|
enum wrapper = Wrapper();
|
||||||
}
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
@ -450,10 +457,10 @@ template ctRegexImpl(alias pattern, string flags=[])
|
||||||
pattern = Regular expression
|
pattern = Regular expression
|
||||||
flags = The _attributes (g, i, m, s and x accepted)
|
flags = The _attributes (g, i, m, s and x accepted)
|
||||||
+/
|
+/
|
||||||
public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
|
public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
|
||||||
|
|
||||||
enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
|
enum isRegexFor(RegEx, R) = is(Unqual!RegEx == Regex!(BasicElementOf!R)) || is(RegEx : const(Regex!(BasicElementOf!R)))
|
||||||
|| is(RegEx == StaticRegex!(BasicElementOf!R));
|
|| is(Unqual!RegEx == StaticRegex!(BasicElementOf!R));
|
||||||
|
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
@ -462,10 +469,10 @@ enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
|
||||||
|
|
||||||
First element of range is the whole match.
|
First element of range is the whole match.
|
||||||
+/
|
+/
|
||||||
@trusted public struct Captures(R, DIndex = size_t)
|
@trusted public struct Captures(R)
|
||||||
if (isSomeString!R)
|
if (isSomeString!R)
|
||||||
{//@trusted because of union inside
|
{//@trusted because of union inside
|
||||||
alias DataIndex = DIndex;
|
alias DataIndex = size_t;
|
||||||
alias String = R;
|
alias String = R;
|
||||||
private:
|
private:
|
||||||
import std.conv : text;
|
import std.conv : text;
|
||||||
|
@ -480,9 +487,9 @@ private:
|
||||||
}
|
}
|
||||||
uint _f, _b;
|
uint _f, _b;
|
||||||
uint _refcount; // ref count or SMALL MASK + num groups
|
uint _refcount; // ref count or SMALL MASK + num groups
|
||||||
NamedGroup[] _names;
|
const(NamedGroup)[] _names;
|
||||||
|
|
||||||
this()(R input, uint n, NamedGroup[] named)
|
this(R input, uint n, const(NamedGroup)[] named)
|
||||||
{
|
{
|
||||||
_input = input;
|
_input = input;
|
||||||
_names = named;
|
_names = named;
|
||||||
|
@ -491,11 +498,11 @@ private:
|
||||||
_f = 0;
|
_f = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
|
this(ref RegexMatch!R rmatch)
|
||||||
{
|
{
|
||||||
_input = rmatch._input;
|
_input = rmatch._input;
|
||||||
_names = rmatch._engine.re.dict;
|
_names = rmatch._engine.pattern.dict;
|
||||||
immutable n = rmatch._engine.re.ngroup;
|
immutable n = rmatch._engine.pattern.ngroup;
|
||||||
newMatches(n);
|
newMatches(n);
|
||||||
_b = n;
|
_b = n;
|
||||||
_f = 0;
|
_f = 0;
|
||||||
|
@ -693,58 +700,38 @@ public:
|
||||||
|
|
||||||
Effectively it's a forward range of Captures!R, produced
|
Effectively it's a forward range of Captures!R, produced
|
||||||
by lazily searching for matches in a given input.
|
by lazily searching for matches in a given input.
|
||||||
|
|
||||||
$(D alias Engine) specifies an engine type to use during matching,
|
|
||||||
and is automatically deduced in a call to $(D match)/$(D bmatch).
|
|
||||||
+/
|
+/
|
||||||
@trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
|
@trusted public struct RegexMatch(R)
|
||||||
if (isSomeString!R)
|
if (isSomeString!R)
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
import core.stdc.stdlib : malloc, free;
|
|
||||||
alias Char = BasicElementOf!R;
|
alias Char = BasicElementOf!R;
|
||||||
alias EngineType = Engine!Char;
|
Matcher!Char _engine;
|
||||||
EngineType _engine;
|
const MatcherFactory!Char _factory;
|
||||||
R _input;
|
R _input;
|
||||||
Captures!(R,EngineType.DataIndex) _captures;
|
Captures!R _captures;
|
||||||
void[] _memory;//is ref-counted
|
|
||||||
|
|
||||||
this(RegEx)(R input, RegEx prog)
|
this(RegEx)(R input, RegEx prog)
|
||||||
{
|
{
|
||||||
import std.exception : enforce;
|
import std.exception : enforce;
|
||||||
_input = input;
|
_input = input;
|
||||||
immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
|
if (prog.factory is null) _factory = defaultFactory!Char(prog);
|
||||||
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
|
else _factory = prog.factory;
|
||||||
scope(failure) free(_memory.ptr);
|
_engine = _factory.create(prog, input);
|
||||||
*cast(size_t*)_memory.ptr = 1;
|
assert(_engine.refCount == 1);
|
||||||
_engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
|
_captures = Captures!R(this);
|
||||||
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
_engine.nativeFn = prog.nativeFn;
|
|
||||||
_captures = Captures!(R,EngineType.DataIndex)(this);
|
|
||||||
_captures._nMatch = _engine.match(_captures.matches);
|
_captures._nMatch = _engine.match(_captures.matches);
|
||||||
debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
|
|
||||||
public:
|
public:
|
||||||
this(this)
|
this(this)
|
||||||
{
|
{
|
||||||
if (_memory.ptr)
|
if (_engine) _factory.incRef(_engine);
|
||||||
{
|
|
||||||
++counter;
|
|
||||||
debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
|
|
||||||
_memory.ptr, *cast(size_t*)_memory.ptr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~this()
|
~this()
|
||||||
{
|
{
|
||||||
if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
|
if (_engine) _factory.decRef(_engine);
|
||||||
{
|
|
||||||
debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
|
|
||||||
_memory.ptr, *cast(size_t*)_memory.ptr);
|
|
||||||
free(cast(void*)_memory.ptr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///Shorthands for front.pre, front.post, front.hit.
|
///Shorthands for front.pre, front.post, front.hit.
|
||||||
|
@ -786,19 +773,18 @@ public:
|
||||||
void popFront()
|
void popFront()
|
||||||
{
|
{
|
||||||
import std.exception : enforce;
|
import std.exception : enforce;
|
||||||
if (counter != 1)
|
// CoW - if refCount is not 1, we are aliased by somebody else
|
||||||
{//do cow magic first
|
if (_engine.refCount != 1)
|
||||||
counter--;//we abandon this reference
|
{
|
||||||
immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
|
// we create a new engine & abandon this reference
|
||||||
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
|
auto old = _engine;
|
||||||
_engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
|
_engine = _factory.dup(old, _input);
|
||||||
counter = 1;//points to new chunk
|
_factory.decRef(old);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_captures.unique)
|
if (!_captures.unique)
|
||||||
{
|
{
|
||||||
// has external references - allocate new space
|
// has external references - allocate new space
|
||||||
_captures.newMatches(_engine.re.ngroup);
|
_captures.newMatches(_engine.pattern.ngroup);
|
||||||
}
|
}
|
||||||
_captures._nMatch = _engine.match(_captures.matches);
|
_captures._nMatch = _engine.match(_captures.matches);
|
||||||
}
|
}
|
||||||
|
@ -814,39 +800,30 @@ public:
|
||||||
|
|
||||||
/// Same as .front, provided for compatibility with original std.regex.
|
/// Same as .front, provided for compatibility with original std.regex.
|
||||||
@property auto captures() inout { return _captures; }
|
@property auto captures() inout { return _captures; }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
|
private @trusted auto matchOnce(RegEx, R)(R input, const RegEx prog)
|
||||||
{
|
{
|
||||||
import core.stdc.stdlib : malloc, free;
|
|
||||||
import std.exception : enforce;
|
|
||||||
alias Char = BasicElementOf!R;
|
alias Char = BasicElementOf!R;
|
||||||
alias EngineType = Engine!Char;
|
auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
|
||||||
|
auto engine = factory.create(prog, input);
|
||||||
size_t size = EngineType.initialMemory(re);
|
scope(exit) factory.decRef(engine); // destroys the engine
|
||||||
void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
auto captures = Captures!R(input, prog.ngroup, prog.dict);
|
||||||
scope(exit) free(memory.ptr);
|
|
||||||
auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
|
|
||||||
auto engine = EngineType(re, Input!Char(input), memory);
|
|
||||||
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
engine.nativeFn = re.nativeFn;
|
|
||||||
captures._nMatch = engine.match(captures.matches);
|
captures._nMatch = engine.match(captures.matches);
|
||||||
return captures;
|
return captures;
|
||||||
}
|
}
|
||||||
|
|
||||||
private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
|
private auto matchMany(RegEx, R)(R input, RegEx re) @safe
|
||||||
{
|
{
|
||||||
re.flags |= RegexOption.global;
|
return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
|
||||||
return RegexMatch!(R, Engine)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@system unittest
|
@system unittest
|
||||||
{
|
{
|
||||||
//sanity checks for new API
|
//sanity checks for new API
|
||||||
auto re = regex("abc");
|
auto re = regex("abc");
|
||||||
assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
|
assert(!"abc".matchOnce(re).empty);
|
||||||
assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
|
assert("abc".matchOnce(re)[0] == "abc");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -938,25 +915,16 @@ if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||||
+/
|
+/
|
||||||
|
|
||||||
public auto match(R, RegEx)(R input, RegEx re)
|
public auto match(R, RegEx)(R input, RegEx re)
|
||||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
if (isSomeString!R && isRegexFor!(RegEx,R))
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return RegexMatch!(Unqual!(typeof(input)))(input, re);
|
||||||
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto match(R, String)(R input, String re)
|
public auto match(R, String)(R input, String re)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
|
||||||
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
|
|
||||||
}
|
|
||||||
|
|
||||||
public auto match(R, RegEx)(R input, RegEx re)
|
|
||||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
{
|
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
|
||||||
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
@ -978,33 +946,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||||
if there was a match, otherwise an empty $(LREF Captures) object.
|
if there was a match, otherwise an empty $(LREF Captures) object.
|
||||||
+/
|
+/
|
||||||
public auto matchFirst(R, RegEx)(R input, RegEx re)
|
public auto matchFirst(R, RegEx)(R input, RegEx re)
|
||||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchOnce(input, re);
|
||||||
return matchOnce!ThompsonMatcher(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto matchFirst(R, String)(R input, String re)
|
public auto matchFirst(R, String)(R input, String re)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchOnce(input, regex(re));
|
||||||
return matchOnce!ThompsonMatcher(input, regex(re));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto matchFirst(R, String)(R input, String[] re...)
|
public auto matchFirst(R, String)(R input, String[] re...)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchOnce(input, regex(re));
|
||||||
return matchOnce!ThompsonMatcher(input, regex(re));
|
|
||||||
}
|
|
||||||
|
|
||||||
public auto matchFirst(R, RegEx)(R input, RegEx re)
|
|
||||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
{
|
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
|
||||||
return matchOnce!(BacktrackingMatcher!true)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
@ -1029,33 +987,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||||
after the first match was found or an empty one if not present.
|
after the first match was found or an empty one if not present.
|
||||||
+/
|
+/
|
||||||
public auto matchAll(R, RegEx)(R input, RegEx re)
|
public auto matchAll(R, RegEx)(R input, RegEx re)
|
||||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchMany(input, re);
|
||||||
return matchMany!ThompsonMatcher(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto matchAll(R, String)(R input, String re)
|
public auto matchAll(R, String)(R input, String re)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchMany(input, regex(re));
|
||||||
return matchMany!ThompsonMatcher(input, regex(re));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto matchAll(R, String)(R input, String[] re...)
|
public auto matchAll(R, String)(R input, String[] re...)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.thompson : ThompsonMatcher;
|
return matchMany(input, regex(re));
|
||||||
return matchMany!ThompsonMatcher(input, regex(re));
|
|
||||||
}
|
|
||||||
|
|
||||||
public auto matchAll(R, RegEx)(R input, RegEx re)
|
|
||||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
{
|
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
|
||||||
return matchMany!(BacktrackingMatcher!true)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// another set of tests just to cover the new API
|
// another set of tests just to cover the new API
|
||||||
|
@ -1119,25 +1067,16 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||||
|
|
||||||
+/
|
+/
|
||||||
public auto bmatch(R, RegEx)(R input, RegEx re)
|
public auto bmatch(R, RegEx)(R input, RegEx re)
|
||||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||||
{
|
{
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
return RegexMatch!(Unqual!(typeof(input)))(input, re);
|
||||||
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///ditto
|
///ditto
|
||||||
public auto bmatch(R, String)(R input, String re)
|
public auto bmatch(R, String)(R input, String re)
|
||||||
if (isSomeString!R && isSomeString!String)
|
if (isSomeString!R && isSomeString!String)
|
||||||
{
|
{
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
|
||||||
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
|
|
||||||
}
|
|
||||||
|
|
||||||
public auto bmatch(R, RegEx)(R input, RegEx re)
|
|
||||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|
||||||
{
|
|
||||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
|
||||||
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// produces replacement string from format using captures for substitution
|
// produces replacement string from format using captures for substitution
|
||||||
|
@ -1530,7 +1469,7 @@ private:
|
||||||
@trusted this(Range input, RegEx separator)
|
@trusted this(Range input, RegEx separator)
|
||||||
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
|
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
|
||||||
_input = input;
|
_input = input;
|
||||||
separator.flags |= RegexOption.global;
|
const re = separator.withFlags(separator.flags | RegexOption.global);
|
||||||
if (_input.empty)
|
if (_input.empty)
|
||||||
{
|
{
|
||||||
//there is nothing to match at all, make _offset > 0
|
//there is nothing to match at all, make _offset > 0
|
||||||
|
@ -1538,7 +1477,7 @@ private:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_match = Rx(_input, separator);
|
_match = Rx(_input, re);
|
||||||
|
|
||||||
static if (keepSeparators)
|
static if (keepSeparators)
|
||||||
if (_match.pre.empty)
|
if (_match.pre.empty)
|
||||||
|
|
104
std/uni.d
104
std/uni.d
|
@ -2184,6 +2184,12 @@ pure:
|
||||||
return Intervals!(typeof(data))(data);
|
return Intervals!(typeof(data))(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
package @property const(CodepointInterval)[] intervals() const
|
||||||
|
{
|
||||||
|
import std.array : array;
|
||||||
|
return Intervals!(typeof(data[]))(data[]).array;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Tests the presence of code point $(D val) in this set.
|
Tests the presence of code point $(D val) in this set.
|
||||||
*/
|
*/
|
||||||
|
@ -2619,52 +2625,9 @@ public:
|
||||||
assert((set & set.inverted).empty);
|
assert((set & set.inverted).empty);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
package static string toSourceCode(const(CodepointInterval)[] range, string funcName)
|
||||||
Generates string with D source code of unary function with name of
|
|
||||||
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
|
|
||||||
the code is adjusted to be a lambda function.
|
|
||||||
|
|
||||||
The function generated tests if the $(CODEPOINT) passed
|
|
||||||
belongs to this set or not. The result is to be used with string mixin.
|
|
||||||
The intended usage area is aggressive optimization via meta programming
|
|
||||||
in parser generators and the like.
|
|
||||||
|
|
||||||
Note: Use with care for relatively small or regular sets. It
|
|
||||||
could end up being slower then just using multi-staged tables.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
---
|
|
||||||
import std.stdio;
|
|
||||||
|
|
||||||
// construct set directly from [a, b$RPAREN intervals
|
|
||||||
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
|
|
||||||
writeln(set);
|
|
||||||
writeln(set.toSourceCode("func"));
|
|
||||||
---
|
|
||||||
|
|
||||||
The above outputs something along the lines of:
|
|
||||||
---
|
|
||||||
bool func(dchar ch) @safe pure nothrow @nogc
|
|
||||||
{
|
|
||||||
if (ch < 45)
|
|
||||||
{
|
|
||||||
if (ch == 10 || ch == 11) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else if (ch < 65) return true;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (ch < 100) return false;
|
|
||||||
if (ch < 200) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
---
|
|
||||||
*/
|
|
||||||
string toSourceCode(string funcName="")
|
|
||||||
{
|
{
|
||||||
import std.algorithm.searching : countUntil;
|
import std.algorithm.searching : countUntil;
|
||||||
import std.array : array;
|
|
||||||
import std.format : format;
|
import std.format : format;
|
||||||
enum maxBinary = 3;
|
enum maxBinary = 3;
|
||||||
static string linearScope(R)(R ivals, string indent)
|
static string linearScope(R)(R ivals, string indent)
|
||||||
|
@ -2746,7 +2709,6 @@ public:
|
||||||
|
|
||||||
string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n",
|
string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n",
|
||||||
funcName.empty ? "function" : funcName);
|
funcName.empty ? "function" : funcName);
|
||||||
auto range = byInterval.array();
|
|
||||||
// special case first bisection to be on ASCII vs beyond
|
// special case first bisection to be on ASCII vs beyond
|
||||||
auto tillAscii = countUntil!"a[0] > 0x80"(range);
|
auto tillAscii = countUntil!"a[0] > 0x80"(range);
|
||||||
if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0)
|
if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0)
|
||||||
|
@ -2756,6 +2718,55 @@ public:
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Generates string with D source code of unary function with name of
|
||||||
|
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
|
||||||
|
the code is adjusted to be a lambda function.
|
||||||
|
|
||||||
|
The function generated tests if the $(CODEPOINT) passed
|
||||||
|
belongs to this set or not. The result is to be used with string mixin.
|
||||||
|
The intended usage area is aggressive optimization via meta programming
|
||||||
|
in parser generators and the like.
|
||||||
|
|
||||||
|
Note: Use with care for relatively small or regular sets. It
|
||||||
|
could end up being slower then just using multi-staged tables.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
---
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
// construct set directly from [a, b$RPAREN intervals
|
||||||
|
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
|
||||||
|
writeln(set);
|
||||||
|
writeln(set.toSourceCode("func"));
|
||||||
|
---
|
||||||
|
|
||||||
|
The above outputs something along the lines of:
|
||||||
|
---
|
||||||
|
bool func(dchar ch) @safe pure nothrow @nogc
|
||||||
|
{
|
||||||
|
if (ch < 45)
|
||||||
|
{
|
||||||
|
if (ch == 10 || ch == 11) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (ch < 65) return true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ch < 100) return false;
|
||||||
|
if (ch < 200) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
---
|
||||||
|
*/
|
||||||
|
string toSourceCode(string funcName="")
|
||||||
|
{
|
||||||
|
import std.array : array;
|
||||||
|
auto range = byInterval.array();
|
||||||
|
return toSourceCode(range, funcName);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
True if this set doesn't contain any $(CODEPOINTS).
|
True if this set doesn't contain any $(CODEPOINTS).
|
||||||
*/
|
*/
|
||||||
|
@ -2802,6 +2813,7 @@ private:
|
||||||
|
|
||||||
//may break sorted property - but we need std.sort to access it
|
//may break sorted property - but we need std.sort to access it
|
||||||
//hence package protection attribute
|
//hence package protection attribute
|
||||||
|
static if (hasAssignableElements!Range)
|
||||||
package @property void front(CodepointInterval val)
|
package @property void front(CodepointInterval val)
|
||||||
{
|
{
|
||||||
slice[start] = val.a;
|
slice[start] = val.a;
|
||||||
|
@ -2816,6 +2828,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
//ditto about package
|
//ditto about package
|
||||||
|
static if (hasAssignableElements!Range)
|
||||||
package @property void back(CodepointInterval val)
|
package @property void back(CodepointInterval val)
|
||||||
{
|
{
|
||||||
slice[end-2] = val.a;
|
slice[end-2] = val.a;
|
||||||
|
@ -2840,6 +2853,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
//ditto about package
|
//ditto about package
|
||||||
|
static if (hasAssignableElements!Range)
|
||||||
package void opIndexAssign(CodepointInterval val, size_t idx)
|
package void opIndexAssign(CodepointInterval val, size_t idx)
|
||||||
{
|
{
|
||||||
slice[start+idx*2] = val.a;
|
slice[start+idx*2] = val.a;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue