mirror of
https://github.com/dlang/phobos.git
synced 2025-04-27 21:51:40 +03:00
Merge pull request #5722 from DmitryOlshansky/regex-matcher-interfaces
std.regex: major internal redesign, also fixes issue 13532 merged-on-behalf-of: Andrei Alexandrescu <andralex@users.noreply.github.com>
This commit is contained in:
commit
ad489989ec
8 changed files with 1140 additions and 976 deletions
File diff suppressed because it is too large
Load diff
|
@ -200,7 +200,7 @@ bool isAtomIR(IR i)
|
|||
IR pairedIR(IR i)
|
||||
{
|
||||
assert(isStartIR(i) || isEndIR(i));
|
||||
return cast(IR)(i ^ 0b11);
|
||||
return cast(IR) (i ^ 0b11);
|
||||
}
|
||||
|
||||
//encoded IR instruction
|
||||
|
@ -423,6 +423,134 @@ struct Group(DataIndex)
|
|||
writeln("\t", disassemble(slice, pc, dict));
|
||||
}
|
||||
|
||||
// Encapsulates memory management, explicit ref counting
|
||||
// and the exact type of engine created
|
||||
// there is a single instance per engine combination type x Char
|
||||
// In future may also maintain a (TLS?) cache of memory
|
||||
interface MatcherFactory(Char)
|
||||
{
|
||||
@safe:
|
||||
Matcher!Char create(const Regex!Char, in Char[] input) const;
|
||||
Matcher!Char dup(Matcher!Char m, in Char[] input) const;
|
||||
size_t incRef(Matcher!Char m) const;
|
||||
size_t decRef(Matcher!Char m) const;
|
||||
}
|
||||
|
||||
// Only memory management, no compile-time vs run-time specialities
|
||||
abstract class GenericFactory(alias EngineType, Char) : MatcherFactory!Char
|
||||
{
|
||||
import core.stdc.stdlib : malloc, free;
|
||||
import core.memory : GC;
|
||||
enum classSize = __traits(classInstanceSize, EngineType!Char);
|
||||
|
||||
Matcher!Char construct(const Regex!Char re, in Char[] input, void[] memory) const;
|
||||
|
||||
override Matcher!Char create(const Regex!Char re, in Char[] input) const @trusted
|
||||
{
|
||||
immutable size = EngineType!Char.initialMemory(re) + classSize;
|
||||
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
||||
scope(failure) free(memory.ptr);
|
||||
GC.addRange(memory.ptr, classSize);
|
||||
auto engine = construct(re, input, memory);
|
||||
assert(engine.refCount == 1);
|
||||
assert(cast(void*) engine == memory.ptr);
|
||||
return engine;
|
||||
}
|
||||
|
||||
override Matcher!Char dup(Matcher!Char engine, in Char[] input) const @trusted
|
||||
{
|
||||
immutable size = EngineType!Char.initialMemory(engine.pattern) + classSize;
|
||||
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
||||
scope(failure) free(memory.ptr);
|
||||
auto copy = construct(engine.pattern, input, memory);
|
||||
GC.addRange(memory.ptr, classSize);
|
||||
engine.dupTo(copy, memory[classSize .. size]);
|
||||
assert(copy.refCount == 1);
|
||||
return copy;
|
||||
}
|
||||
|
||||
override size_t incRef(Matcher!Char m) const
|
||||
{
|
||||
return ++m.refCount;
|
||||
}
|
||||
|
||||
override size_t decRef(Matcher!Char m) const @trusted
|
||||
{
|
||||
assert(m.refCount != 0);
|
||||
auto cnt = --m.refCount;
|
||||
if (cnt == 0)
|
||||
{
|
||||
void* ptr = cast(void*) m;
|
||||
GC.removeRange(ptr);
|
||||
free(ptr);
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
}
|
||||
|
||||
// A factory for run-time engines
|
||||
class RuntimeFactory(alias EngineType, Char) : GenericFactory!(EngineType, Char)
|
||||
{
|
||||
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
|
||||
{
|
||||
import std.conv : emplace;
|
||||
return emplace!(EngineType!Char)(memory[0 .. classSize],
|
||||
re, Input!Char(input), memory[classSize .. $]);
|
||||
}
|
||||
}
|
||||
|
||||
// A factory for compile-time engine
|
||||
class CtfeFactory(alias EngineType, Char, alias func) : GenericFactory!(EngineType, Char)
|
||||
{
|
||||
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
|
||||
{
|
||||
import std.conv : emplace;
|
||||
return emplace!(EngineType!Char)(memory[0 .. classSize],
|
||||
re, &func, Input!Char(input), memory[classSize .. $]);
|
||||
}
|
||||
}
|
||||
|
||||
// A workaround for R-T enum re = regex(...)
|
||||
template defaultFactory(Char)
|
||||
{
|
||||
@property MatcherFactory!Char defaultFactory(const Regex!Char re)
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
import std.algorithm.searching : canFind;
|
||||
static MatcherFactory!Char backtrackingFactory;
|
||||
static MatcherFactory!Char thompsonFactory;
|
||||
if (re.backrefed.canFind!"a != 0")
|
||||
{
|
||||
if (backtrackingFactory is null)
|
||||
backtrackingFactory = new RuntimeFactory!(BacktrackingMatcher, Char);
|
||||
return backtrackingFactory;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (thompsonFactory is null)
|
||||
thompsonFactory = new RuntimeFactory!(ThompsonMatcher, Char);
|
||||
return thompsonFactory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Defining it as an interface has the undesired side-effect:
|
||||
// casting any class to an interface silently adjusts pointer to point to a nested vtbl
|
||||
abstract class Matcher(Char)
|
||||
{
|
||||
abstract:
|
||||
// Get a (next) match
|
||||
int match(Group!size_t[] matches);
|
||||
// This only maintains internal ref-count,
|
||||
// deallocation happens inside MatcherFactory
|
||||
@property ref size_t refCount() @safe;
|
||||
// Copy internal state to another engine, using memory arena 'memory'
|
||||
void dupTo(Matcher!Char m, void[] memory);
|
||||
// The pattern loaded
|
||||
@property ref const(Regex!Char) pattern() @safe;
|
||||
}
|
||||
|
||||
/++
|
||||
$(D Regex) object holds regular expression pattern in compiled form.
|
||||
Instances of this object are constructed via calls to $(D regex).
|
||||
|
@ -443,11 +571,11 @@ struct Regex(Char)
|
|||
static struct NamedGroupRange
|
||||
{
|
||||
private:
|
||||
NamedGroup[] groups;
|
||||
const(NamedGroup)[] groups;
|
||||
size_t start;
|
||||
size_t end;
|
||||
public:
|
||||
this(NamedGroup[] g, size_t s, size_t e)
|
||||
this(const(NamedGroup)[] g, size_t s, size_t e)
|
||||
{
|
||||
assert(s <= e);
|
||||
assert(e <= g.length);
|
||||
|
@ -485,7 +613,7 @@ struct Regex(Char)
|
|||
|
||||
package(std.regex):
|
||||
import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
|
||||
NamedGroup[] dict; // maps name -> user group number
|
||||
const(NamedGroup)[] dict; // maps name -> user group number
|
||||
uint ngroup; // number of internal groups
|
||||
uint maxCounterDepth; // max depth of nested {n,m} repetitions
|
||||
uint hotspotTableSize; // number of entries in merge table
|
||||
|
@ -495,6 +623,35 @@ package(std.regex):
|
|||
public const(BitTable)[] filters; // bloom filters for conditional loops
|
||||
uint[] backrefed; // bit array of backreferenced submatches
|
||||
Kickstart!Char kickstart;
|
||||
MatcherFactory!Char factory; // produces optimal matcher for this pattern
|
||||
|
||||
const(Regex) withFactory(MatcherFactory!Char factory) pure const @trusted
|
||||
{
|
||||
auto r = cast() this;
|
||||
r.factory = factory;
|
||||
return r;
|
||||
}
|
||||
|
||||
const(Regex) withFlags(uint newFlags) pure const @trusted
|
||||
{
|
||||
auto r = cast() this;
|
||||
r.flags = newFlags;
|
||||
return r;
|
||||
}
|
||||
|
||||
const(Regex) withCode(const(Bytecode)[] code) pure const @trusted
|
||||
{
|
||||
auto r = cast() this;
|
||||
r.ir = code.dup; // TODO: sidestep const instead?
|
||||
return r;
|
||||
}
|
||||
|
||||
const(Regex) withNGroup(uint nGroup) pure const @trusted
|
||||
{
|
||||
auto r = cast() this;
|
||||
r.ngroup = nGroup;
|
||||
return r;
|
||||
}
|
||||
|
||||
//bit access helper
|
||||
uint isBackref(uint n)
|
||||
|
@ -537,26 +694,6 @@ package(std.regex):
|
|||
|
||||
}
|
||||
|
||||
//@@@BUG@@@ (unreduced) - public makes it inaccessible in std.regex.package (!)
|
||||
/*public*/ struct StaticRegex(Char)
|
||||
{
|
||||
package(std.regex):
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
alias Matcher = BacktrackingMatcher!(true);
|
||||
alias MatchFn = bool function(ref Matcher!Char) @trusted;
|
||||
MatchFn nativeFn;
|
||||
public:
|
||||
Regex!Char _regex;
|
||||
alias _regex this;
|
||||
this(Regex!Char re, MatchFn fn)
|
||||
{
|
||||
_regex = re;
|
||||
nativeFn = fn;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// The stuff below this point is temporarrily part of IR module
|
||||
// but may need better place in the future (all internals)
|
||||
package(std.regex):
|
||||
|
@ -593,7 +730,7 @@ if (is(Char :dchar))
|
|||
@property bool atEnd(){
|
||||
return _index == _origin.length;
|
||||
}
|
||||
bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos)
|
||||
bool search(Kickstart)(ref const Kickstart kick, ref dchar res, ref size_t pos)
|
||||
{
|
||||
size_t idx = kick.search(_origin, _index);
|
||||
_index = idx;
|
||||
|
@ -676,7 +813,7 @@ template BackLooper(E)
|
|||
}
|
||||
|
||||
//
|
||||
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
|
||||
@trusted uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name)
|
||||
{//equal is @system?
|
||||
import std.algorithm.comparison : equal;
|
||||
import std.algorithm.iteration : map;
|
||||
|
|
|
@ -393,7 +393,7 @@ public:
|
|||
// has a useful trait: if supplied with valid UTF indexes,
|
||||
// returns only valid UTF indexes
|
||||
// (that given the haystack in question is valid UTF string)
|
||||
@trusted size_t search(const(Char)[] haystack, size_t idx)
|
||||
@trusted size_t search(const(Char)[] haystack, size_t idx) const
|
||||
{//@BUG: apparently assumes little endian machines
|
||||
import core.stdc.string : memchr;
|
||||
import std.conv : text;
|
||||
|
|
|
@ -12,7 +12,11 @@ static import std.ascii;
|
|||
// package relevant info from parser into a regex object
|
||||
auto makeRegex(S, CG)(Parser!(S, CG) p)
|
||||
{
|
||||
Regex!(BasicElementOf!S) re;
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
import std.algorithm.searching : canFind;
|
||||
alias Char = BasicElementOf!S;
|
||||
Regex!Char re;
|
||||
auto g = p.g;
|
||||
with(re)
|
||||
{
|
||||
|
@ -25,6 +29,12 @@ auto makeRegex(S, CG)(Parser!(S, CG) p)
|
|||
matchers = g.matchers;
|
||||
backrefed = g.backrefed;
|
||||
re.postprocess();
|
||||
// check if we have backreferences, if so - use backtracking
|
||||
if (__ctfe) factory = null; // allows us to use the awful enum re = regex(...);
|
||||
else if (re.backrefed.canFind!"a != 0")
|
||||
factory = new RuntimeFactory!(BacktrackingMatcher, Char);
|
||||
else
|
||||
factory = new RuntimeFactory!(ThompsonMatcher, Char);
|
||||
debug(std_regex_parser)
|
||||
{
|
||||
__ctfe || print();
|
||||
|
|
|
@ -518,11 +518,11 @@ alias Sequence(int B, int E) = staticIota!(B, E);
|
|||
{
|
||||
import std.algorithm.comparison : equal;
|
||||
auto rtr = regex("a|b|c");
|
||||
enum ctr = regex("a|b|c");
|
||||
static ctr = regex("a|b|c");
|
||||
assert(equal(rtr.ir,ctr.ir));
|
||||
//CTFE parser BUG is triggered by group
|
||||
//in the middle of alternation (at least not first and not last)
|
||||
enum testCT = regex(`abc|(edf)|xyz`);
|
||||
static testCT = regex(`abc|(edf)|xyz`);
|
||||
auto testRT = regex(`abc|(edf)|xyz`);
|
||||
assert(equal(testCT.ir,testRT.ir));
|
||||
}
|
||||
|
@ -996,6 +996,36 @@ alias Sequence(int B, int E) = staticIota!(B, E);
|
|||
assertThrown(regex(`^((x)(?=\1))`));
|
||||
}
|
||||
|
||||
// bugzilla 13532
|
||||
version(none) // TODO: revist once we have proper benchmark framework
|
||||
@safe unittest
|
||||
{
|
||||
import std.datetime.stopwatch : StopWatch, AutoStart;
|
||||
import std.math : abs;
|
||||
import std.conv : to;
|
||||
enum re1 = ctRegex!`[0-9][0-9]`;
|
||||
immutable static re2 = ctRegex!`[0-9][0-9]`;
|
||||
immutable iterations = 1_000_000;
|
||||
size_t result1 = 0, result2 = 0;
|
||||
auto sw = StopWatch(AutoStart.yes);
|
||||
foreach (_; 0 .. iterations)
|
||||
{
|
||||
result1 += matchFirst("12345678", re1).length;
|
||||
}
|
||||
const staticTime = sw.peek();
|
||||
sw.reset();
|
||||
foreach (_; 0 .. iterations)
|
||||
{
|
||||
result2 += matchFirst("12345678", re2).length;
|
||||
}
|
||||
const enumTime = sw.peek();
|
||||
assert(result1 == result2);
|
||||
auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs";
|
||||
// enum is faster or the diff is less < 30%
|
||||
assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75,
|
||||
"enum regex to static regex ratio "~to!string(ratio));
|
||||
}
|
||||
|
||||
// bugzilla 14504
|
||||
@safe unittest
|
||||
{
|
||||
|
|
|
@ -89,7 +89,7 @@ struct ThreadList(DataIndex)
|
|||
template ThompsonOps(E, S, bool withInput:true)
|
||||
{
|
||||
@trusted:
|
||||
static bool op(IR code:IR.End)(E* e, S* state)
|
||||
static bool op(IR code:IR.End)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -105,7 +105,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Wordboundary)(E* e, S* state)
|
||||
static bool op(IR code:IR.Wordboundary)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -137,7 +137,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Notwordboundary)(E* e, S* state)
|
||||
static bool op(IR code:IR.Notwordboundary)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -167,7 +167,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Bof)(E* e, S* state)
|
||||
static bool op(IR code:IR.Bof)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -183,7 +183,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Bol)(E* e, S* state)
|
||||
static bool op(IR code:IR.Bol)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -203,7 +203,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Eof)(E* e, S* state)
|
||||
static bool op(IR code:IR.Eof)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -219,7 +219,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Eol)(E* e, S* state)
|
||||
static bool op(IR code:IR.Eol)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -240,42 +240,42 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.InfiniteStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.InfiniteStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
|
||||
return op!(IR.InfiniteEnd)(e,state);
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.InfiniteBloomStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
|
||||
return op!(IR.InfiniteBloomEnd)(e,state);
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.InfiniteQStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
|
||||
return op!(IR.InfiniteQEnd)(e,state);
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.RepeatStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.RepeatStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
|
||||
return op!(IR.RepeatEnd)(e,state);
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.RepeatQStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.RepeatQStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
|
||||
return op!(IR.RepeatQEnd)(e,state);
|
||||
}
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
|
||||
{
|
||||
with(e) with(state)
|
||||
|
@ -330,7 +330,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
|
||||
{
|
||||
with(e) with(state)
|
||||
|
@ -365,7 +365,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.InfiniteBloomEnd)
|
||||
{
|
||||
with(e) with(state)
|
||||
|
@ -394,7 +394,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.OrEnd)(E* e, S* state)
|
||||
static bool op(IR code:IR.OrEnd)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -415,7 +415,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.OrStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.OrStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -424,7 +424,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Option)(E* e, S* state)
|
||||
static bool op(IR code:IR.Option)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -439,7 +439,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.GotoEndOr)(E* e, S* state)
|
||||
static bool op(IR code:IR.GotoEndOr)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -448,7 +448,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.GroupStart)(E* e, S* state)
|
||||
static bool op(IR code:IR.GroupStart)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -458,7 +458,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
return true;
|
||||
}
|
||||
}
|
||||
static bool op(IR code:IR.GroupEnd)(E* e, S* state)
|
||||
static bool op(IR code:IR.GroupEnd)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -469,7 +469,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Backref)(E* e, S* state)
|
||||
static bool op(IR code:IR.Backref)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -506,7 +506,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
|
||||
{
|
||||
with(e) with(state)
|
||||
|
@ -516,10 +516,9 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
|
||||
bool positive = re.ir[t.pc].code == IR.LookbehindStart;
|
||||
static if (Stream.isLoopback)
|
||||
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
||||
auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||
else
|
||||
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
||||
matcher.re.ngroup = me - ms;
|
||||
auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
||||
//backMatch
|
||||
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
|
||||
|
@ -534,7 +533,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
|
||||
{
|
||||
with(e) with(state)
|
||||
|
@ -545,10 +544,9 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
|
||||
bool positive = re.ir[t.pc].code == IR.LookaheadStart;
|
||||
static if (Stream.isLoopback)
|
||||
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
||||
auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||
else
|
||||
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
|
||||
matcher.re.ngroup = me - ms;
|
||||
auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
|
||||
matcher.backrefed = backrefed.empty ? t.matches : backrefed;
|
||||
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
|
||||
freelist = matcher.freelist;
|
||||
|
@ -564,7 +562,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
|
||||
code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
|
||||
{
|
||||
|
@ -579,13 +577,13 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Nop)(E* e, S* state)
|
||||
static bool op(IR code:IR.Nop)(E e, S* state)
|
||||
{
|
||||
with(state) t.pc += IRL!(IR.Nop);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.OrChar)(E* e, S* state)
|
||||
static bool op(IR code:IR.OrChar)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -607,7 +605,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Char)(E* e, S* state)
|
||||
static bool op(IR code:IR.Char)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -623,7 +621,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Any)(E* e, S* state)
|
||||
static bool op(IR code:IR.Any)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -634,7 +632,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.CodepointSet)(E* e, S* state)
|
||||
static bool op(IR code:IR.CodepointSet)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -652,7 +650,7 @@ template ThompsonOps(E, S, bool withInput:true)
|
|||
}
|
||||
}
|
||||
|
||||
static bool op(IR code:IR.Trie)(E* e, S* state)
|
||||
static bool op(IR code:IR.Trie)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -676,7 +674,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
|||
{
|
||||
@trusted:
|
||||
// can't match these without input
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
|
||||
|| code == IR.Trie || code == IR.Char || code == IR.Any)
|
||||
{
|
||||
|
@ -684,7 +682,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
|||
}
|
||||
|
||||
// special case of zero-width backref
|
||||
static bool op(IR code:IR.Backref)(E* e, S* state)
|
||||
static bool op(IR code:IR.Backref)(E e, S* state)
|
||||
{
|
||||
with(e) with(state)
|
||||
{
|
||||
|
@ -702,7 +700,7 @@ template ThompsonOps(E,S, bool withInput:false)
|
|||
}
|
||||
|
||||
// forward all control flow to normal versions
|
||||
static bool op(IR code)(E* e, S* state)
|
||||
static bool op(IR code)(E e, S* state)
|
||||
if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
|
||||
&& code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
|
||||
{
|
||||
|
@ -714,19 +712,19 @@ template ThompsonOps(E,S, bool withInput:false)
|
|||
Thomspon matcher does all matching in lockstep,
|
||||
never looking at the same char twice
|
||||
+/
|
||||
@trusted struct ThompsonMatcher(Char, StreamType = Input!Char)
|
||||
@trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
|
||||
if (is(Char : dchar))
|
||||
{
|
||||
alias DataIndex = Stream.DataIndex;
|
||||
alias Stream = StreamType;
|
||||
alias OpFunc = bool function(ThompsonMatcher*, State*);
|
||||
alias OpFunc = bool function(ThompsonMatcher, State*);
|
||||
alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
|
||||
alias OpBackFunc = bool function(BackMatcher*, BackMatcher.State*);
|
||||
alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*);
|
||||
Thread!DataIndex* freelist;
|
||||
ThreadList!DataIndex clist, nlist;
|
||||
DataIndex[] merge;
|
||||
Group!DataIndex[] backrefed;
|
||||
Regex!Char re; //regex program
|
||||
const Regex!Char re; //regex program
|
||||
Stream s;
|
||||
dchar front;
|
||||
DataIndex index;
|
||||
|
@ -737,16 +735,18 @@ if (is(Char : dchar))
|
|||
OpBackFunc[] opCacheBackTrue; // ditto
|
||||
OpBackFunc[] opCacheBackFalse; // ditto
|
||||
size_t threadSize;
|
||||
size_t _refCount;
|
||||
int matched;
|
||||
bool exhausted;
|
||||
|
||||
final:
|
||||
static struct State
|
||||
{
|
||||
Thread!DataIndex* t;
|
||||
ThreadList!DataIndex worklist;
|
||||
Group!DataIndex[] matches;
|
||||
|
||||
bool popState(E)(E* e)
|
||||
bool popState(E)(E e)
|
||||
{
|
||||
with(e)
|
||||
{
|
||||
|
@ -784,6 +784,10 @@ if (is(Char : dchar))
|
|||
//true if it's end of input
|
||||
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
|
||||
|
||||
override @property ref size_t refCount() @safe { return _refCount; }
|
||||
|
||||
override @property ref const(Regex!Char) pattern() @safe { return re; }
|
||||
|
||||
bool next()
|
||||
{
|
||||
if (!s.nextChar(front, index))
|
||||
|
@ -843,19 +847,28 @@ if (is(Char : dchar))
|
|||
}
|
||||
}
|
||||
|
||||
this()(Regex!Char program, Stream stream, void[] memory)
|
||||
this()(const Regex!Char program, Stream stream, void[] memory)
|
||||
{
|
||||
// We are emplace'd to malloced memory w/o blitting T.init over it\
|
||||
// make sure we initialize all fields explicitly
|
||||
_refCount = 1;
|
||||
subCounters = null;
|
||||
backrefed = null;
|
||||
exhausted = false;
|
||||
matched = 0;
|
||||
re = program;
|
||||
s = stream;
|
||||
initExternalMemory(memory);
|
||||
genCounter = 0;
|
||||
}
|
||||
|
||||
this(ref ThompsonMatcher matcher, size_t lo, size_t hi, Stream stream)
|
||||
this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
|
||||
{
|
||||
_refCount = 1;
|
||||
subCounters = matcher.subCounters;
|
||||
s = stream;
|
||||
re = matcher.re;
|
||||
re.ir = re.ir[lo .. hi];
|
||||
auto code = matcher.re.ir[lo .. hi];
|
||||
re = matcher.re.withCode(code).withNGroup(nGroup);
|
||||
threadSize = matcher.threadSize;
|
||||
merge = matcher.merge;
|
||||
freelist = matcher.freelist;
|
||||
|
@ -867,11 +880,13 @@ if (is(Char : dchar))
|
|||
index = matcher.index;
|
||||
}
|
||||
|
||||
this(ref BackMatcher matcher, size_t lo, size_t hi, Stream stream)
|
||||
this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
|
||||
{
|
||||
_refCount = 1;
|
||||
subCounters = matcher.subCounters;
|
||||
s = stream;
|
||||
re = matcher.re;
|
||||
re.ir = re.ir[lo .. hi];
|
||||
auto code = matcher.re.ir[lo .. hi];
|
||||
re = matcher.re.withCode(code).withNGroup(nGroup);
|
||||
threadSize = matcher.threadSize;
|
||||
merge = matcher.merge;
|
||||
freelist = matcher.freelist;
|
||||
|
@ -883,31 +898,35 @@ if (is(Char : dchar))
|
|||
index = matcher.index;
|
||||
}
|
||||
|
||||
auto fwdMatcher()(size_t lo, size_t hi, size_t counter)
|
||||
auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
|
||||
{
|
||||
auto m = ThompsonMatcher!(Char, Stream)(this, lo, hi, s);
|
||||
auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
|
||||
m.genCounter = counter;
|
||||
return m;
|
||||
}
|
||||
|
||||
auto bwdMatcher()(size_t lo, size_t hi, size_t counter)
|
||||
auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
|
||||
{
|
||||
alias BackLooper = typeof(s.loopBack(index));
|
||||
auto m = ThompsonMatcher!(Char, BackLooper)(this, lo, hi, s.loopBack(index));
|
||||
auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
|
||||
m.genCounter = counter;
|
||||
m.next();
|
||||
return m;
|
||||
}
|
||||
|
||||
auto dupTo(void[] memory)
|
||||
override void dupTo(Matcher!Char engine, void[] memory)
|
||||
{
|
||||
typeof(this) tmp = this;//bitblit
|
||||
tmp.initExternalMemory(memory);
|
||||
tmp.genCounter = 0;
|
||||
return tmp;
|
||||
auto thompson = cast(ThompsonMatcher) engine;
|
||||
thompson.s = s;
|
||||
thompson.subCounters = null;
|
||||
thompson.front = front;
|
||||
thompson.index = index;
|
||||
thompson.matched = matched;
|
||||
thompson.exhausted = exhausted;
|
||||
thompson.initExternalMemory(memory);
|
||||
}
|
||||
|
||||
int match(Group!DataIndex[] matches)
|
||||
override int match(Group!DataIndex[] matches)
|
||||
{
|
||||
debug(std_regex_matcher)
|
||||
writeln("------------------------------------------");
|
||||
|
@ -1052,9 +1071,9 @@ if (is(Char : dchar))
|
|||
{
|
||||
debug(std_regex_matcher) writeln("---- Evaluating thread");
|
||||
static if (withInput)
|
||||
while (opCacheTrue.ptr[state.t.pc](&this, state)){}
|
||||
while (opCacheTrue.ptr[state.t.pc](this, state)){}
|
||||
else
|
||||
while (opCacheFalse.ptr[state.t.pc](&this, state)){}
|
||||
while (opCacheFalse.ptr[state.t.pc](this, state)){}
|
||||
}
|
||||
enum uint RestartPc = uint.max;
|
||||
//match the input, evaluating IR without searching
|
||||
|
|
|
@ -298,7 +298,6 @@ module std.regex;
|
|||
|
||||
import std.range.primitives, std.traits;
|
||||
import std.regex.internal.ir;
|
||||
import std.regex.internal.thompson; //TODO: get rid of this dependency
|
||||
import std.typecons; // : Flag, Yes, No;
|
||||
|
||||
/++
|
||||
|
@ -339,10 +338,9 @@ public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
|
|||
A $(D StaticRegex) is $(D Regex) object that contains D code specially
|
||||
generated at compile-time to speed up matching.
|
||||
|
||||
Implicitly convertible to normal $(D Regex),
|
||||
however doing so will result in losing this additional capability.
|
||||
No longer used, kept as alias to Regex for backwards compatibility.
|
||||
+/
|
||||
public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
|
||||
public alias StaticRegex = Regex;
|
||||
|
||||
/++
|
||||
Compile regular expression pattern for the later execution.
|
||||
|
@ -428,16 +426,25 @@ if (isSomeString!(S))
|
|||
template ctRegexImpl(alias pattern, string flags=[])
|
||||
{
|
||||
import std.regex.internal.backtracking, std.regex.internal.parser;
|
||||
enum r = regex(pattern, flags);
|
||||
static immutable r = cast(immutable) regex(pattern, flags);
|
||||
alias Char = BasicElementOf!(typeof(pattern));
|
||||
enum source = ctGenRegExCode(r);
|
||||
alias Matcher = BacktrackingMatcher!(true);
|
||||
@trusted bool func(ref Matcher!Char matcher)
|
||||
@trusted bool func(BacktrackingMatcher!Char matcher)
|
||||
{
|
||||
debug(std_regex_ctr) pragma(msg, source);
|
||||
cast(void) matcher;
|
||||
mixin(source);
|
||||
}
|
||||
enum nr = StaticRegex!Char(r, &func);
|
||||
static immutable staticRe =
|
||||
cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
|
||||
struct Wrapper
|
||||
{
|
||||
// allow code that expects mutable Regex to still work
|
||||
// we stay "logically const"
|
||||
@trusted @property auto getRe() const { return cast() staticRe; }
|
||||
alias getRe this;
|
||||
}
|
||||
enum wrapper = Wrapper();
|
||||
}
|
||||
|
||||
/++
|
||||
|
@ -450,10 +457,10 @@ template ctRegexImpl(alias pattern, string flags=[])
|
|||
pattern = Regular expression
|
||||
flags = The _attributes (g, i, m, s and x accepted)
|
||||
+/
|
||||
public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
|
||||
public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
|
||||
|
||||
enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
|
||||
|| is(RegEx == StaticRegex!(BasicElementOf!R));
|
||||
enum isRegexFor(RegEx, R) = is(Unqual!RegEx == Regex!(BasicElementOf!R)) || is(RegEx : const(Regex!(BasicElementOf!R)))
|
||||
|| is(Unqual!RegEx == StaticRegex!(BasicElementOf!R));
|
||||
|
||||
|
||||
/++
|
||||
|
@ -462,10 +469,10 @@ enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
|
|||
|
||||
First element of range is the whole match.
|
||||
+/
|
||||
@trusted public struct Captures(R, DIndex = size_t)
|
||||
@trusted public struct Captures(R)
|
||||
if (isSomeString!R)
|
||||
{//@trusted because of union inside
|
||||
alias DataIndex = DIndex;
|
||||
alias DataIndex = size_t;
|
||||
alias String = R;
|
||||
private:
|
||||
import std.conv : text;
|
||||
|
@ -480,9 +487,9 @@ private:
|
|||
}
|
||||
uint _f, _b;
|
||||
uint _refcount; // ref count or SMALL MASK + num groups
|
||||
NamedGroup[] _names;
|
||||
const(NamedGroup)[] _names;
|
||||
|
||||
this()(R input, uint n, NamedGroup[] named)
|
||||
this(R input, uint n, const(NamedGroup)[] named)
|
||||
{
|
||||
_input = input;
|
||||
_names = named;
|
||||
|
@ -491,11 +498,11 @@ private:
|
|||
_f = 0;
|
||||
}
|
||||
|
||||
this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
|
||||
this(ref RegexMatch!R rmatch)
|
||||
{
|
||||
_input = rmatch._input;
|
||||
_names = rmatch._engine.re.dict;
|
||||
immutable n = rmatch._engine.re.ngroup;
|
||||
_names = rmatch._engine.pattern.dict;
|
||||
immutable n = rmatch._engine.pattern.ngroup;
|
||||
newMatches(n);
|
||||
_b = n;
|
||||
_f = 0;
|
||||
|
@ -693,58 +700,38 @@ public:
|
|||
|
||||
Effectively it's a forward range of Captures!R, produced
|
||||
by lazily searching for matches in a given input.
|
||||
|
||||
$(D alias Engine) specifies an engine type to use during matching,
|
||||
and is automatically deduced in a call to $(D match)/$(D bmatch).
|
||||
+/
|
||||
@trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher)
|
||||
@trusted public struct RegexMatch(R)
|
||||
if (isSomeString!R)
|
||||
{
|
||||
private:
|
||||
import core.stdc.stdlib : malloc, free;
|
||||
alias Char = BasicElementOf!R;
|
||||
alias EngineType = Engine!Char;
|
||||
EngineType _engine;
|
||||
Matcher!Char _engine;
|
||||
const MatcherFactory!Char _factory;
|
||||
R _input;
|
||||
Captures!(R,EngineType.DataIndex) _captures;
|
||||
void[] _memory;//is ref-counted
|
||||
Captures!R _captures;
|
||||
|
||||
this(RegEx)(R input, RegEx prog)
|
||||
{
|
||||
import std.exception : enforce;
|
||||
_input = input;
|
||||
immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
|
||||
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
|
||||
scope(failure) free(_memory.ptr);
|
||||
*cast(size_t*)_memory.ptr = 1;
|
||||
_engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
|
||||
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
_engine.nativeFn = prog.nativeFn;
|
||||
_captures = Captures!(R,EngineType.DataIndex)(this);
|
||||
if (prog.factory is null) _factory = defaultFactory!Char(prog);
|
||||
else _factory = prog.factory;
|
||||
_engine = _factory.create(prog, input);
|
||||
assert(_engine.refCount == 1);
|
||||
_captures = Captures!R(this);
|
||||
_captures._nMatch = _engine.match(_captures.matches);
|
||||
debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
|
||||
}
|
||||
|
||||
@property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
|
||||
public:
|
||||
this(this)
|
||||
{
|
||||
if (_memory.ptr)
|
||||
{
|
||||
++counter;
|
||||
debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
|
||||
_memory.ptr, *cast(size_t*)_memory.ptr);
|
||||
}
|
||||
if (_engine) _factory.incRef(_engine);
|
||||
}
|
||||
|
||||
~this()
|
||||
{
|
||||
if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0)
|
||||
{
|
||||
debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
|
||||
_memory.ptr, *cast(size_t*)_memory.ptr);
|
||||
free(cast(void*)_memory.ptr);
|
||||
}
|
||||
if (_engine) _factory.decRef(_engine);
|
||||
}
|
||||
|
||||
///Shorthands for front.pre, front.post, front.hit.
|
||||
|
@ -786,19 +773,18 @@ public:
|
|||
void popFront()
|
||||
{
|
||||
import std.exception : enforce;
|
||||
if (counter != 1)
|
||||
{//do cow magic first
|
||||
counter--;//we abandon this reference
|
||||
immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
|
||||
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]);
|
||||
_engine = _engine.dupTo(_memory[size_t.sizeof .. size]);
|
||||
counter = 1;//points to new chunk
|
||||
// CoW - if refCount is not 1, we are aliased by somebody else
|
||||
if (_engine.refCount != 1)
|
||||
{
|
||||
// we create a new engine & abandon this reference
|
||||
auto old = _engine;
|
||||
_engine = _factory.dup(old, _input);
|
||||
_factory.decRef(old);
|
||||
}
|
||||
|
||||
if (!_captures.unique)
|
||||
{
|
||||
// has external references - allocate new space
|
||||
_captures.newMatches(_engine.re.ngroup);
|
||||
_captures.newMatches(_engine.pattern.ngroup);
|
||||
}
|
||||
_captures._nMatch = _engine.match(_captures.matches);
|
||||
}
|
||||
|
@ -814,39 +800,30 @@ public:
|
|||
|
||||
/// Same as .front, provided for compatibility with original std.regex.
|
||||
@property auto captures() inout { return _captures; }
|
||||
|
||||
}
|
||||
|
||||
private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
|
||||
private @trusted auto matchOnce(RegEx, R)(R input, const RegEx prog)
|
||||
{
|
||||
import core.stdc.stdlib : malloc, free;
|
||||
import std.exception : enforce;
|
||||
alias Char = BasicElementOf!R;
|
||||
alias EngineType = Engine!Char;
|
||||
|
||||
size_t size = EngineType.initialMemory(re);
|
||||
void[] memory = enforce(malloc(size), "malloc failed")[0 .. size];
|
||||
scope(exit) free(memory.ptr);
|
||||
auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
|
||||
auto engine = EngineType(re, Input!Char(input), memory);
|
||||
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
engine.nativeFn = re.nativeFn;
|
||||
auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
|
||||
auto engine = factory.create(prog, input);
|
||||
scope(exit) factory.decRef(engine); // destroys the engine
|
||||
auto captures = Captures!R(input, prog.ngroup, prog.dict);
|
||||
captures._nMatch = engine.match(captures.matches);
|
||||
return captures;
|
||||
}
|
||||
|
||||
private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
|
||||
private auto matchMany(RegEx, R)(R input, RegEx re) @safe
|
||||
{
|
||||
re.flags |= RegexOption.global;
|
||||
return RegexMatch!(R, Engine)(input, re);
|
||||
return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
|
||||
}
|
||||
|
||||
@system unittest
|
||||
{
|
||||
//sanity checks for new API
|
||||
auto re = regex("abc");
|
||||
assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty);
|
||||
assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc");
|
||||
assert(!"abc".matchOnce(re).empty);
|
||||
assert("abc".matchOnce(re)[0] == "abc");
|
||||
}
|
||||
|
||||
|
||||
|
@ -938,25 +915,16 @@ if (isSomeString!R && isRegexFor!(RegEx, R))
|
|||
+/
|
||||
|
||||
public auto match(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
||||
if (isSomeString!R && isRegexFor!(RegEx,R))
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
|
||||
return RegexMatch!(Unqual!(typeof(input)))(input, re);
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto match(R, String)(R input, String re)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
|
||||
}
|
||||
|
||||
public auto match(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
|
||||
return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
|
||||
}
|
||||
|
||||
/++
|
||||
|
@ -978,33 +946,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|||
if there was a match, otherwise an empty $(LREF Captures) object.
|
||||
+/
|
||||
public auto matchFirst(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
||||
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchOnce!ThompsonMatcher(input, re);
|
||||
return matchOnce(input, re);
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto matchFirst(R, String)(R input, String re)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchOnce!ThompsonMatcher(input, regex(re));
|
||||
return matchOnce(input, regex(re));
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto matchFirst(R, String)(R input, String[] re...)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchOnce!ThompsonMatcher(input, regex(re));
|
||||
}
|
||||
|
||||
public auto matchFirst(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return matchOnce!(BacktrackingMatcher!true)(input, re);
|
||||
return matchOnce(input, regex(re));
|
||||
}
|
||||
|
||||
/++
|
||||
|
@ -1029,33 +987,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|||
after the first match was found or an empty one if not present.
|
||||
+/
|
||||
public auto matchAll(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
||||
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchMany!ThompsonMatcher(input, re);
|
||||
return matchMany(input, re);
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto matchAll(R, String)(R input, String re)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchMany!ThompsonMatcher(input, regex(re));
|
||||
return matchMany(input, regex(re));
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto matchAll(R, String)(R input, String[] re...)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.thompson : ThompsonMatcher;
|
||||
return matchMany!ThompsonMatcher(input, regex(re));
|
||||
}
|
||||
|
||||
public auto matchAll(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return matchMany!(BacktrackingMatcher!true)(input, re);
|
||||
return matchMany(input, regex(re));
|
||||
}
|
||||
|
||||
// another set of tests just to cover the new API
|
||||
|
@ -1119,25 +1067,16 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
|||
|
||||
+/
|
||||
public auto bmatch(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
|
||||
if (isSomeString!R && isRegexFor!(RegEx, R))
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
|
||||
return RegexMatch!(Unqual!(typeof(input)))(input, re);
|
||||
}
|
||||
|
||||
///ditto
|
||||
public auto bmatch(R, String)(R input, String re)
|
||||
if (isSomeString!R && isSomeString!String)
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
|
||||
}
|
||||
|
||||
public auto bmatch(R, RegEx)(R input, RegEx re)
|
||||
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
|
||||
{
|
||||
import std.regex.internal.backtracking : BacktrackingMatcher;
|
||||
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
|
||||
return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
|
||||
}
|
||||
|
||||
// produces replacement string from format using captures for substitution
|
||||
|
@ -1530,7 +1469,7 @@ private:
|
|||
@trusted this(Range input, RegEx separator)
|
||||
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
|
||||
_input = input;
|
||||
separator.flags |= RegexOption.global;
|
||||
const re = separator.withFlags(separator.flags | RegexOption.global);
|
||||
if (_input.empty)
|
||||
{
|
||||
//there is nothing to match at all, make _offset > 0
|
||||
|
@ -1538,7 +1477,7 @@ private:
|
|||
}
|
||||
else
|
||||
{
|
||||
_match = Rx(_input, separator);
|
||||
_match = Rx(_input, re);
|
||||
|
||||
static if (keepSeparators)
|
||||
if (_match.pre.empty)
|
||||
|
|
104
std/uni.d
104
std/uni.d
|
@ -2184,6 +2184,12 @@ pure:
|
|||
return Intervals!(typeof(data))(data);
|
||||
}
|
||||
|
||||
package @property const(CodepointInterval)[] intervals() const
|
||||
{
|
||||
import std.array : array;
|
||||
return Intervals!(typeof(data[]))(data[]).array;
|
||||
}
|
||||
|
||||
/**
|
||||
Tests the presence of code point $(D val) in this set.
|
||||
*/
|
||||
|
@ -2619,52 +2625,9 @@ public:
|
|||
assert((set & set.inverted).empty);
|
||||
}
|
||||
|
||||
/**
|
||||
Generates string with D source code of unary function with name of
|
||||
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
|
||||
the code is adjusted to be a lambda function.
|
||||
|
||||
The function generated tests if the $(CODEPOINT) passed
|
||||
belongs to this set or not. The result is to be used with string mixin.
|
||||
The intended usage area is aggressive optimization via meta programming
|
||||
in parser generators and the like.
|
||||
|
||||
Note: Use with care for relatively small or regular sets. It
|
||||
could end up being slower then just using multi-staged tables.
|
||||
|
||||
Example:
|
||||
---
|
||||
import std.stdio;
|
||||
|
||||
// construct set directly from [a, b$RPAREN intervals
|
||||
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
|
||||
writeln(set);
|
||||
writeln(set.toSourceCode("func"));
|
||||
---
|
||||
|
||||
The above outputs something along the lines of:
|
||||
---
|
||||
bool func(dchar ch) @safe pure nothrow @nogc
|
||||
{
|
||||
if (ch < 45)
|
||||
{
|
||||
if (ch == 10 || ch == 11) return true;
|
||||
return false;
|
||||
}
|
||||
else if (ch < 65) return true;
|
||||
else
|
||||
{
|
||||
if (ch < 100) return false;
|
||||
if (ch < 200) return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
---
|
||||
*/
|
||||
string toSourceCode(string funcName="")
|
||||
package static string toSourceCode(const(CodepointInterval)[] range, string funcName)
|
||||
{
|
||||
import std.algorithm.searching : countUntil;
|
||||
import std.array : array;
|
||||
import std.format : format;
|
||||
enum maxBinary = 3;
|
||||
static string linearScope(R)(R ivals, string indent)
|
||||
|
@ -2746,7 +2709,6 @@ public:
|
|||
|
||||
string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n",
|
||||
funcName.empty ? "function" : funcName);
|
||||
auto range = byInterval.array();
|
||||
// special case first bisection to be on ASCII vs beyond
|
||||
auto tillAscii = countUntil!"a[0] > 0x80"(range);
|
||||
if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0)
|
||||
|
@ -2756,6 +2718,55 @@ public:
|
|||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
Generates string with D source code of unary function with name of
|
||||
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
|
||||
the code is adjusted to be a lambda function.
|
||||
|
||||
The function generated tests if the $(CODEPOINT) passed
|
||||
belongs to this set or not. The result is to be used with string mixin.
|
||||
The intended usage area is aggressive optimization via meta programming
|
||||
in parser generators and the like.
|
||||
|
||||
Note: Use with care for relatively small or regular sets. It
|
||||
could end up being slower then just using multi-staged tables.
|
||||
|
||||
Example:
|
||||
---
|
||||
import std.stdio;
|
||||
|
||||
// construct set directly from [a, b$RPAREN intervals
|
||||
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
|
||||
writeln(set);
|
||||
writeln(set.toSourceCode("func"));
|
||||
---
|
||||
|
||||
The above outputs something along the lines of:
|
||||
---
|
||||
bool func(dchar ch) @safe pure nothrow @nogc
|
||||
{
|
||||
if (ch < 45)
|
||||
{
|
||||
if (ch == 10 || ch == 11) return true;
|
||||
return false;
|
||||
}
|
||||
else if (ch < 65) return true;
|
||||
else
|
||||
{
|
||||
if (ch < 100) return false;
|
||||
if (ch < 200) return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
---
|
||||
*/
|
||||
string toSourceCode(string funcName="")
|
||||
{
|
||||
import std.array : array;
|
||||
auto range = byInterval.array();
|
||||
return toSourceCode(range, funcName);
|
||||
}
|
||||
|
||||
/**
|
||||
True if this set doesn't contain any $(CODEPOINTS).
|
||||
*/
|
||||
|
@ -2802,6 +2813,7 @@ private:
|
|||
|
||||
//may break sorted property - but we need std.sort to access it
|
||||
//hence package protection attribute
|
||||
static if (hasAssignableElements!Range)
|
||||
package @property void front(CodepointInterval val)
|
||||
{
|
||||
slice[start] = val.a;
|
||||
|
@ -2816,6 +2828,7 @@ private:
|
|||
}
|
||||
|
||||
//ditto about package
|
||||
static if (hasAssignableElements!Range)
|
||||
package @property void back(CodepointInterval val)
|
||||
{
|
||||
slice[end-2] = val.a;
|
||||
|
@ -2840,6 +2853,7 @@ private:
|
|||
}
|
||||
|
||||
//ditto about package
|
||||
static if (hasAssignableElements!Range)
|
||||
package void opIndexAssign(CodepointInterval val, size_t idx)
|
||||
{
|
||||
slice[start+idx*2] = val.a;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue