Merge pull request #5722 from DmitryOlshansky/regex-matcher-interfaces

std.regex: major internal redesign, also fixes issue 13532
merged-on-behalf-of: Andrei Alexandrescu <andralex@users.noreply.github.com>
This commit is contained in:
The Dlang Bot 2017-10-16 20:16:33 +02:00 committed by GitHub
commit ad489989ec
8 changed files with 1140 additions and 976 deletions

View file

@ -13,9 +13,7 @@ import std.regex.internal.ir;
BacktrackingMatcher implements backtracking scheme of matching BacktrackingMatcher implements backtracking scheme of matching
regular expressions. regular expressions.
+/ +/
template BacktrackingMatcher(bool CTregex) @trusted class BacktrackingMatcher(Char, Stream = Input!Char) : Matcher!Char
{
@trusted struct BacktrackingMatcher(Char, Stream = Input!Char)
if (is(Char : dchar)) if (is(Char : dchar))
{ {
alias DataIndex = Stream.DataIndex; alias DataIndex = Stream.DataIndex;
@ -29,19 +27,17 @@ template BacktrackingMatcher(bool CTregex)
enum initialStack = 1 << 11; // items in a block of segmented stack enum initialStack = 1 << 11; // items in a block of segmented stack
alias String = const(Char)[]; alias String = const(Char)[];
alias RegEx = Regex!Char; alias RegEx = Regex!Char;
alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream)); alias MatchFn = bool function (BacktrackingMatcher);
RegEx re; //regex program const RegEx re; // regex program
static if (CTregex)
MatchFn nativeFn; // native code for that program MatchFn nativeFn; // native code for that program
// Stream state // Stream state
Stream s; Stream s;
DataIndex index; DataIndex index;
dchar front; dchar front;
bool exhausted; bool exhausted;
//backtracking machine state // Backtracking machine state
uint pc, counter; uint pc, counter;
DataIndex lastState = 0; //top of state stack DataIndex lastState = 0; // Top of state stack
static if (!CTregex)
uint infiniteNesting; uint infiniteNesting;
size_t[] memory; size_t[] memory;
Trace[] merge; Trace[] merge;
@ -69,6 +65,11 @@ template BacktrackingMatcher(bool CTregex)
} }
//local slice of matches, global for backref //local slice of matches, global for backref
Group!DataIndex[] matches, backrefed; Group!DataIndex[] matches, backrefed;
size_t _refCount;
final:
override @property ref size_t refCount() { return _refCount; }
override @property ref const(RegEx) pattern(){ return re; }
static if (__traits(hasMember,Stream, "search")) static if (__traits(hasMember,Stream, "search"))
{ {
@ -153,49 +154,64 @@ template BacktrackingMatcher(bool CTregex)
memory = memory[2..$]; memory = memory[2..$];
} }
void initialize(ref RegEx program, Stream stream, void[] memBlock) void initialize(ref const RegEx program, Stream stream, void[] memBlock)
{ {
re = program;
s = stream; s = stream;
exhausted = false; exhausted = false;
initExternalMemory(memBlock); initExternalMemory(memBlock);
backrefed = null; backrefed = null;
} }
auto dupTo(void[] memory) override void dupTo(Matcher!Char m, void[] memBlock)
{ {
typeof(this) tmp = this; auto backtracking = cast(BacktrackingMatcher) m;
tmp.initExternalMemory(memory); backtracking.s = s;
return tmp; backtracking.front = front;
backtracking.index = index;
backtracking.exhausted = exhausted;
backtracking.initExternalMemory(memBlock);
} }
this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx) this(ref const RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
{ {
_refCount = 1;
re = program;
nativeFn = null;
initialize(program, stream, memBlock); initialize(program, stream, memBlock);
front = ch; front = ch;
index = idx; index = idx;
} }
this(ref RegEx program, Stream stream, void[] memBlock) this(ref const RegEx program, MatchFn func, Stream stream, void[] memBlock)
{ {
_refCount = 1;
re = program;
initialize(program, stream, memBlock);
nativeFn = func;
next();
}
this(ref const RegEx program, Stream stream, void[] memBlock)
{
_refCount = 1;
re = program;
nativeFn = null;
initialize(program, stream, memBlock); initialize(program, stream, memBlock);
next(); next();
} }
auto fwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock) auto fwdMatcher(ref const RegEx re, void[] memBlock)
{ {
alias BackMatcherTempl = .BacktrackingMatcher!(CTregex); alias BackMatcher = BacktrackingMatcher!(Char, Stream);
alias BackMatcher = BackMatcherTempl!(Char, Stream); auto fwdMatcher = new BackMatcher(re, s, memBlock, front, index);
auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index);
return fwdMatcher; return fwdMatcher;
} }
auto bwdMatcher(ref BacktrackingMatcher matcher, void[] memBlock) auto bwdMatcher(ref const RegEx re, void[] memBlock)
{ {
alias BackMatcherTempl = .BacktrackingMatcher!(CTregex); alias BackMatcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index)));
auto fwdMatcher = auto fwdMatcher =
BackMatcher(matcher.re, s.loopBack(index), memBlock); new BackMatcher(re, s.loopBack(index), memBlock);
return fwdMatcher; return fwdMatcher;
} }
@ -219,7 +235,7 @@ template BacktrackingMatcher(bool CTregex)
} }
//lookup next match, fill matches with indices into input //lookup next match, fill matches with indices into input
int match(Group!DataIndex[] matches) override int match(Group!DataIndex[] matches)
{ {
debug(std_regex_matcher) debug(std_regex_matcher)
{ {
@ -293,7 +309,7 @@ template BacktrackingMatcher(bool CTregex)
+/ +/
int matchImpl() int matchImpl()
{ {
static if (CTregex && is(typeof(nativeFn(this)))) if (nativeFn)
{ {
debug(std_regex_ctr) writeln("using C-T matcher"); debug(std_regex_ctr) writeln("using C-T matcher");
return nativeFn(this); return nativeFn(this);
@ -303,6 +319,7 @@ template BacktrackingMatcher(bool CTregex)
pc = 0; pc = 0;
counter = 0; counter = 0;
lastState = 0; lastState = 0;
infiniteNesting = 0;
matches[] = Group!DataIndex.init; matches[] = Group!DataIndex.init;
auto start = s._index; auto start = s._index;
debug(std_regex_matcher) debug(std_regex_matcher)
@ -580,19 +597,19 @@ template BacktrackingMatcher(bool CTregex)
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw; immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)]; auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
scope(exit) free(mem.ptr); scope(exit) free(mem.ptr);
auto slicedRe = re.withCode(re.ir[
pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
]);
static if (Stream.isLoopback) static if (Stream.isLoopback)
{ {
auto matcher = bwdMatcher(this, mem); auto matcher = bwdMatcher(slicedRe, mem);
} }
else else
{ {
auto matcher = fwdMatcher(this, mem); auto matcher = fwdMatcher(slicedRe, mem);
} }
matcher.matches = matches[ms .. me]; matcher.matches = matches[ms .. me];
matcher.backrefed = backrefed.empty ? matches : backrefed; matcher.backrefed = backrefed.empty ? matches : backrefed;
matcher.re.ir = re.ir[
pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
];
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart); immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
s.reset(save); s.reset(save);
next(); next();
@ -609,20 +626,20 @@ template BacktrackingMatcher(bool CTregex)
immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw; immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)]; auto mem = malloc(initialMemory(re))[0 .. initialMemory(re)];
scope(exit) free(mem.ptr); scope(exit) free(mem.ptr);
auto slicedRe = re.withCode(re.ir[
pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
]);
static if (Stream.isLoopback) static if (Stream.isLoopback)
{ {
alias Matcher = BacktrackingMatcher!(Char, Stream); alias Matcher = BacktrackingMatcher!(Char, Stream);
auto matcher = Matcher(re, s, mem, front, index); auto matcher = new Matcher(slicedRe, s, mem, front, index);
} }
else else
{ {
alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index))); alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
auto matcher = Matcher(re, s.loopBack(index), mem); auto matcher = new Matcher(slicedRe, s.loopBack(index), mem);
} }
matcher.matches = matches[ms .. me]; matcher.matches = matches[ms .. me];
matcher.re.ir = re.ir[
pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
];
matcher.backrefed = backrefed.empty ? matches : backrefed; matcher.backrefed = backrefed.empty ? matches : backrefed;
immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart); immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
if (!match) if (!match)
@ -715,9 +732,6 @@ template BacktrackingMatcher(bool CTregex)
val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length]; val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
debug(std_regex_matcher) writeln("pop array SP= ", lastState); debug(std_regex_matcher) writeln("pop array SP= ", lastState);
} }
static if (!CTregex)
{
//helper function, saves engine state //helper function, saves engine state
void pushState(uint pc, uint counter) void pushState(uint pc, uint counter)
{ {
@ -763,8 +777,6 @@ template BacktrackingMatcher(bool CTregex)
return true; return true;
} }
} }
}
}
//very shitty string formatter, $$ replaced with next argument converted to string //very shitty string formatter, $$ replaced with next argument converted to string
@trusted string ctSub( U...)(string format, U args) @trusted string ctSub( U...)(string format, U args)
@ -805,7 +817,7 @@ struct CtContext
//to mark the portion of matches to save //to mark the portion of matches to save
int match, total_matches; int match, total_matches;
int reserved; int reserved;
CodepointSet[] charsets; const(CodepointInterval)[][] charsets;
//state of codegenerator //state of codegenerator
@ -815,12 +827,15 @@ struct CtContext
int addr; int addr;
} }
this(Char)(Regex!Char re) this(Char)(const Regex!Char re)
{ {
match = 1; match = 1;
reserved = 1; //first match is skipped reserved = 1; //first match is skipped
total_matches = re.ngroup; total_matches = re.ngroup;
charsets = re.charsets; foreach (ref set; re.charsets)
{
charsets ~= set.intervals;
}
} }
CtContext lookaround(uint s, uint e) CtContext lookaround(uint s, uint e)
@ -876,7 +891,7 @@ struct CtContext
} }
// //
CtState ctGenBlock(Bytecode[] ir, int addr) CtState ctGenBlock(const(Bytecode)[] ir, int addr)
{ {
CtState result; CtState result;
result.addr = addr; result.addr = addr;
@ -890,7 +905,7 @@ struct CtContext
} }
// //
CtState ctGenGroup(ref Bytecode[] ir, int addr) CtState ctGenGroup(ref const(Bytecode)[] ir, int addr)
{ {
import std.algorithm.comparison : max; import std.algorithm.comparison : max;
auto bailOut = "goto L_backtrack;"; auto bailOut = "goto L_backtrack;";
@ -932,10 +947,10 @@ struct CtContext
immutable len = ir[0].data; immutable len = ir[0].data;
immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart; immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart; immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
string fwdType = "typeof(fwdMatcher(matcher, []))"; string fwdType = "typeof(fwdMatcher(re, []))";
string bwdType = "typeof(bwdMatcher(matcher, []))"; string bwdType = "typeof(bwdMatcher(re, []))";
string fwdCreate = "fwdMatcher(matcher, mem)"; string fwdCreate = "fwdMatcher(re, mem)";
string bwdCreate = "bwdMatcher(matcher, mem)"; string bwdCreate = "bwdMatcher(re, mem)";
immutable start = IRL!(IR.LookbehindStart); immutable start = IRL!(IR.LookbehindStart);
immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd); immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
@ -946,7 +961,7 @@ struct CtContext
alias Lookaround = $$; alias Lookaround = $$;
else else
alias Lookaround = $$; alias Lookaround = $$;
static bool matcher_$$(ref Lookaround matcher) @trusted static bool matcher_$$(Lookaround matcher) @trusted
{ {
//(neg)lookaround piece start //(neg)lookaround piece start
$$ $$
@ -992,7 +1007,7 @@ struct CtContext
} }
//generate source for bytecode contained in OrStart ... OrEnd //generate source for bytecode contained in OrStart ... OrEnd
CtState ctGenAlternation(Bytecode[] ir, int addr) CtState ctGenAlternation(const(Bytecode)[] ir, int addr)
{ {
CtState[] pieces; CtState[] pieces;
CtState r; CtState r;
@ -1032,11 +1047,11 @@ struct CtContext
// generate fixup code for instruction in ir, // generate fixup code for instruction in ir,
// fixup means it has an alternative way for control flow // fixup means it has an alternative way for control flow
string ctGenFixupCode(Bytecode[] ir, int addr, int fixup) string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup)
{ {
return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
} }
string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup) string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup)
{ {
string r; string r;
string testCode; string testCode;
@ -1190,7 +1205,7 @@ struct CtContext
} }
string ctQuickTest(Bytecode[] ir, int id) string ctQuickTest(const(Bytecode)[] ir, int id)
{ {
uint pc = 0; uint pc = 0;
while (pc < ir.length && ir[pc].isAtom) while (pc < ir.length && ir[pc].isAtom)
@ -1217,7 +1232,7 @@ struct CtContext
} }
//process & generate source for simple bytecodes at front of ir using address addr //process & generate source for simple bytecodes at front of ir using address addr
CtState ctGenAtom(ref Bytecode[] ir, int addr) CtState ctGenAtom(ref const(Bytecode)[] ir, int addr)
{ {
CtState result; CtState result;
result.code = ctAtomCode(ir, addr); result.code = ctAtomCode(ir, addr);
@ -1227,7 +1242,7 @@ struct CtContext
} }
//D code for atom at ir using address addr, addr < 0 means quickTest //D code for atom at ir using address addr, addr < 0 means quickTest
string ctAtomCode(Bytecode[] ir, int addr) string ctAtomCode(const(Bytecode)[] ir, int addr)
{ {
string code; string code;
string bailOut, nextInstr; string bailOut, nextInstr;
@ -1282,7 +1297,7 @@ struct CtContext
if (charsets.length) if (charsets.length)
{ {
string name = `func_`~to!string(addr+1); string name = `func_`~to!string(addr+1);
string funcCode = charsets[ir[0].data].toSourceCode(name); string funcCode = CodepointSet.toSourceCode(charsets[ir[0].data], name);
code ~= ctSub( ` code ~= ctSub( `
static $$ static $$
if (atEnd || !$$(front)) if (atEnd || !$$(front))
@ -1298,7 +1313,7 @@ struct CtContext
$$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr); $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
break; break;
case IR.Trie: case IR.Trie:
if (charsets.length && charsets[ir[0].data].byInterval.length <= 8) if (charsets.length && charsets[ir[0].data].length <= 8)
goto case IR.CodepointSet; goto case IR.CodepointSet;
code ~= ctSub( ` code ~= ctSub( `
if (atEnd || !re.matchers[$$][front]) if (atEnd || !re.matchers[$$][front])
@ -1439,7 +1454,7 @@ struct CtContext
} }
//generate D code for the whole regex //generate D code for the whole regex
public string ctGenRegEx(Bytecode[] ir) public string ctGenRegEx(const(Bytecode)[] ir)
{ {
auto bdy = ctGenBlock(ir, 0); auto bdy = ctGenBlock(ir, 0);
auto r = ` auto r = `
@ -1488,7 +1503,7 @@ struct CtContext
} }
string ctGenRegExCode(Char)(Regex!Char re) string ctGenRegExCode(Char)(const Regex!Char re)
{ {
auto context = CtContext(re); auto context = CtContext(re);
return context.ctGenRegEx(re.ir); return context.ctGenRegEx(re.ir);

View file

@ -423,6 +423,134 @@ struct Group(DataIndex)
writeln("\t", disassemble(slice, pc, dict)); writeln("\t", disassemble(slice, pc, dict));
} }
// Encapsulates memory management, explicit ref counting
// and the exact type of engine created
// there is a single instance per engine combination type x Char
// In future may also maintain a (TLS?) cache of memory
interface MatcherFactory(Char)
{
@safe:
Matcher!Char create(const Regex!Char, in Char[] input) const;
Matcher!Char dup(Matcher!Char m, in Char[] input) const;
size_t incRef(Matcher!Char m) const;
size_t decRef(Matcher!Char m) const;
}
// Only memory management, no compile-time vs run-time specialities
abstract class GenericFactory(alias EngineType, Char) : MatcherFactory!Char
{
import core.stdc.stdlib : malloc, free;
import core.memory : GC;
enum classSize = __traits(classInstanceSize, EngineType!Char);
Matcher!Char construct(const Regex!Char re, in Char[] input, void[] memory) const;
override Matcher!Char create(const Regex!Char re, in Char[] input) const @trusted
{
immutable size = EngineType!Char.initialMemory(re) + classSize;
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
scope(failure) free(memory.ptr);
GC.addRange(memory.ptr, classSize);
auto engine = construct(re, input, memory);
assert(engine.refCount == 1);
assert(cast(void*) engine == memory.ptr);
return engine;
}
override Matcher!Char dup(Matcher!Char engine, in Char[] input) const @trusted
{
immutable size = EngineType!Char.initialMemory(engine.pattern) + classSize;
auto memory = enforce(malloc(size), "malloc failed")[0 .. size];
scope(failure) free(memory.ptr);
auto copy = construct(engine.pattern, input, memory);
GC.addRange(memory.ptr, classSize);
engine.dupTo(copy, memory[classSize .. size]);
assert(copy.refCount == 1);
return copy;
}
override size_t incRef(Matcher!Char m) const
{
return ++m.refCount;
}
override size_t decRef(Matcher!Char m) const @trusted
{
assert(m.refCount != 0);
auto cnt = --m.refCount;
if (cnt == 0)
{
void* ptr = cast(void*) m;
GC.removeRange(ptr);
free(ptr);
}
return cnt;
}
}
// A factory for run-time engines
class RuntimeFactory(alias EngineType, Char) : GenericFactory!(EngineType, Char)
{
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
{
import std.conv : emplace;
return emplace!(EngineType!Char)(memory[0 .. classSize],
re, Input!Char(input), memory[classSize .. $]);
}
}
// A factory for compile-time engine
class CtfeFactory(alias EngineType, Char, alias func) : GenericFactory!(EngineType, Char)
{
override EngineType!Char construct(const Regex!Char re, in Char[] input, void[] memory) const
{
import std.conv : emplace;
return emplace!(EngineType!Char)(memory[0 .. classSize],
re, &func, Input!Char(input), memory[classSize .. $]);
}
}
// A workaround for R-T enum re = regex(...)
template defaultFactory(Char)
{
@property MatcherFactory!Char defaultFactory(const Regex!Char re)
{
import std.regex.internal.backtracking : BacktrackingMatcher;
import std.regex.internal.thompson : ThompsonMatcher;
import std.algorithm.searching : canFind;
static MatcherFactory!Char backtrackingFactory;
static MatcherFactory!Char thompsonFactory;
if (re.backrefed.canFind!"a != 0")
{
if (backtrackingFactory is null)
backtrackingFactory = new RuntimeFactory!(BacktrackingMatcher, Char);
return backtrackingFactory;
}
else
{
if (thompsonFactory is null)
thompsonFactory = new RuntimeFactory!(ThompsonMatcher, Char);
return thompsonFactory;
}
}
}
// Defining it as an interface has the undesired side-effect:
// casting any class to an interface silently adjusts pointer to point to a nested vtbl
abstract class Matcher(Char)
{
abstract:
// Get a (next) match
int match(Group!size_t[] matches);
// This only maintains internal ref-count,
// deallocation happens inside MatcherFactory
@property ref size_t refCount() @safe;
// Copy internal state to another engine, using memory arena 'memory'
void dupTo(Matcher!Char m, void[] memory);
// The pattern loaded
@property ref const(Regex!Char) pattern() @safe;
}
/++ /++
$(D Regex) object holds regular expression pattern in compiled form. $(D Regex) object holds regular expression pattern in compiled form.
Instances of this object are constructed via calls to $(D regex). Instances of this object are constructed via calls to $(D regex).
@ -443,11 +571,11 @@ struct Regex(Char)
static struct NamedGroupRange static struct NamedGroupRange
{ {
private: private:
NamedGroup[] groups; const(NamedGroup)[] groups;
size_t start; size_t start;
size_t end; size_t end;
public: public:
this(NamedGroup[] g, size_t s, size_t e) this(const(NamedGroup)[] g, size_t s, size_t e)
{ {
assert(s <= e); assert(s <= e);
assert(e <= g.length); assert(e <= g.length);
@ -485,7 +613,7 @@ struct Regex(Char)
package(std.regex): package(std.regex):
import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
NamedGroup[] dict; // maps name -> user group number const(NamedGroup)[] dict; // maps name -> user group number
uint ngroup; // number of internal groups uint ngroup; // number of internal groups
uint maxCounterDepth; // max depth of nested {n,m} repetitions uint maxCounterDepth; // max depth of nested {n,m} repetitions
uint hotspotTableSize; // number of entries in merge table uint hotspotTableSize; // number of entries in merge table
@ -495,6 +623,35 @@ package(std.regex):
public const(BitTable)[] filters; // bloom filters for conditional loops public const(BitTable)[] filters; // bloom filters for conditional loops
uint[] backrefed; // bit array of backreferenced submatches uint[] backrefed; // bit array of backreferenced submatches
Kickstart!Char kickstart; Kickstart!Char kickstart;
MatcherFactory!Char factory; // produces optimal matcher for this pattern
const(Regex) withFactory(MatcherFactory!Char factory) pure const @trusted
{
auto r = cast() this;
r.factory = factory;
return r;
}
const(Regex) withFlags(uint newFlags) pure const @trusted
{
auto r = cast() this;
r.flags = newFlags;
return r;
}
const(Regex) withCode(const(Bytecode)[] code) pure const @trusted
{
auto r = cast() this;
r.ir = code.dup; // TODO: sidestep const instead?
return r;
}
const(Regex) withNGroup(uint nGroup) pure const @trusted
{
auto r = cast() this;
r.ngroup = nGroup;
return r;
}
//bit access helper //bit access helper
uint isBackref(uint n) uint isBackref(uint n)
@ -537,26 +694,6 @@ package(std.regex):
} }
//@@@BUG@@@ (unreduced) - public makes it inaccessible in std.regex.package (!)
/*public*/ struct StaticRegex(Char)
{
package(std.regex):
import std.regex.internal.backtracking : BacktrackingMatcher;
alias Matcher = BacktrackingMatcher!(true);
alias MatchFn = bool function(ref Matcher!Char) @trusted;
MatchFn nativeFn;
public:
Regex!Char _regex;
alias _regex this;
this(Regex!Char re, MatchFn fn)
{
_regex = re;
nativeFn = fn;
}
}
// The stuff below this point is temporarrily part of IR module // The stuff below this point is temporarrily part of IR module
// but may need better place in the future (all internals) // but may need better place in the future (all internals)
package(std.regex): package(std.regex):
@ -593,7 +730,7 @@ if (is(Char :dchar))
@property bool atEnd(){ @property bool atEnd(){
return _index == _origin.length; return _index == _origin.length;
} }
bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos) bool search(Kickstart)(ref const Kickstart kick, ref dchar res, ref size_t pos)
{ {
size_t idx = kick.search(_origin, _index); size_t idx = kick.search(_origin, _index);
_index = idx; _index = idx;
@ -676,7 +813,7 @@ template BackLooper(E)
} }
// //
@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name) @trusted uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name)
{//equal is @system? {//equal is @system?
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
import std.algorithm.iteration : map; import std.algorithm.iteration : map;

View file

@ -393,7 +393,7 @@ public:
// has a useful trait: if supplied with valid UTF indexes, // has a useful trait: if supplied with valid UTF indexes,
// returns only valid UTF indexes // returns only valid UTF indexes
// (that given the haystack in question is valid UTF string) // (that given the haystack in question is valid UTF string)
@trusted size_t search(const(Char)[] haystack, size_t idx) @trusted size_t search(const(Char)[] haystack, size_t idx) const
{//@BUG: apparently assumes little endian machines {//@BUG: apparently assumes little endian machines
import core.stdc.string : memchr; import core.stdc.string : memchr;
import std.conv : text; import std.conv : text;

View file

@ -12,7 +12,11 @@ static import std.ascii;
// package relevant info from parser into a regex object // package relevant info from parser into a regex object
auto makeRegex(S, CG)(Parser!(S, CG) p) auto makeRegex(S, CG)(Parser!(S, CG) p)
{ {
Regex!(BasicElementOf!S) re; import std.regex.internal.backtracking : BacktrackingMatcher;
import std.regex.internal.thompson : ThompsonMatcher;
import std.algorithm.searching : canFind;
alias Char = BasicElementOf!S;
Regex!Char re;
auto g = p.g; auto g = p.g;
with(re) with(re)
{ {
@ -25,6 +29,12 @@ auto makeRegex(S, CG)(Parser!(S, CG) p)
matchers = g.matchers; matchers = g.matchers;
backrefed = g.backrefed; backrefed = g.backrefed;
re.postprocess(); re.postprocess();
// check if we have backreferences, if so - use backtracking
if (__ctfe) factory = null; // allows us to use the awful enum re = regex(...);
else if (re.backrefed.canFind!"a != 0")
factory = new RuntimeFactory!(BacktrackingMatcher, Char);
else
factory = new RuntimeFactory!(ThompsonMatcher, Char);
debug(std_regex_parser) debug(std_regex_parser)
{ {
__ctfe || print(); __ctfe || print();

View file

@ -518,11 +518,11 @@ alias Sequence(int B, int E) = staticIota!(B, E);
{ {
import std.algorithm.comparison : equal; import std.algorithm.comparison : equal;
auto rtr = regex("a|b|c"); auto rtr = regex("a|b|c");
enum ctr = regex("a|b|c"); static ctr = regex("a|b|c");
assert(equal(rtr.ir,ctr.ir)); assert(equal(rtr.ir,ctr.ir));
//CTFE parser BUG is triggered by group //CTFE parser BUG is triggered by group
//in the middle of alternation (at least not first and not last) //in the middle of alternation (at least not first and not last)
enum testCT = regex(`abc|(edf)|xyz`); static testCT = regex(`abc|(edf)|xyz`);
auto testRT = regex(`abc|(edf)|xyz`); auto testRT = regex(`abc|(edf)|xyz`);
assert(equal(testCT.ir,testRT.ir)); assert(equal(testCT.ir,testRT.ir));
} }
@ -996,6 +996,36 @@ alias Sequence(int B, int E) = staticIota!(B, E);
assertThrown(regex(`^((x)(?=\1))`)); assertThrown(regex(`^((x)(?=\1))`));
} }
// bugzilla 13532
version(none) // TODO: revist once we have proper benchmark framework
@safe unittest
{
import std.datetime.stopwatch : StopWatch, AutoStart;
import std.math : abs;
import std.conv : to;
enum re1 = ctRegex!`[0-9][0-9]`;
immutable static re2 = ctRegex!`[0-9][0-9]`;
immutable iterations = 1_000_000;
size_t result1 = 0, result2 = 0;
auto sw = StopWatch(AutoStart.yes);
foreach (_; 0 .. iterations)
{
result1 += matchFirst("12345678", re1).length;
}
const staticTime = sw.peek();
sw.reset();
foreach (_; 0 .. iterations)
{
result2 += matchFirst("12345678", re2).length;
}
const enumTime = sw.peek();
assert(result1 == result2);
auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs";
// enum is faster or the diff is less < 30%
assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75,
"enum regex to static regex ratio "~to!string(ratio));
}
// bugzilla 14504 // bugzilla 14504
@safe unittest @safe unittest
{ {

View file

@ -89,7 +89,7 @@ struct ThreadList(DataIndex)
template ThompsonOps(E, S, bool withInput:true) template ThompsonOps(E, S, bool withInput:true)
{ {
@trusted: @trusted:
static bool op(IR code:IR.End)(E* e, S* state) static bool op(IR code:IR.End)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -105,7 +105,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Wordboundary)(E* e, S* state) static bool op(IR code:IR.Wordboundary)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -137,7 +137,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Notwordboundary)(E* e, S* state) static bool op(IR code:IR.Notwordboundary)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -167,7 +167,7 @@ template ThompsonOps(E, S, bool withInput:true)
return true; return true;
} }
static bool op(IR code:IR.Bof)(E* e, S* state) static bool op(IR code:IR.Bof)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -183,7 +183,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Bol)(E* e, S* state) static bool op(IR code:IR.Bol)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -203,7 +203,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Eof)(E* e, S* state) static bool op(IR code:IR.Eof)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -219,7 +219,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Eol)(E* e, S* state) static bool op(IR code:IR.Eol)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -240,42 +240,42 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.InfiniteStart)(E* e, S* state) static bool op(IR code:IR.InfiniteStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart); t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
return op!(IR.InfiniteEnd)(e,state); return op!(IR.InfiniteEnd)(e,state);
} }
static bool op(IR code:IR.InfiniteBloomStart)(E* e, S* state) static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart); t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
return op!(IR.InfiniteBloomEnd)(e,state); return op!(IR.InfiniteBloomEnd)(e,state);
} }
static bool op(IR code:IR.InfiniteQStart)(E* e, S* state) static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart); t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
return op!(IR.InfiniteQEnd)(e,state); return op!(IR.InfiniteQEnd)(e,state);
} }
static bool op(IR code:IR.RepeatStart)(E* e, S* state) static bool op(IR code:IR.RepeatStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart); t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
return op!(IR.RepeatEnd)(e,state); return op!(IR.RepeatEnd)(e,state);
} }
static bool op(IR code:IR.RepeatQStart)(E* e, S* state) static bool op(IR code:IR.RepeatQStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart); t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
return op!(IR.RepeatQEnd)(e,state); return op!(IR.RepeatQEnd)(e,state);
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.RepeatEnd || code == IR.RepeatQEnd) if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
{ {
with(e) with(state) with(e) with(state)
@ -330,7 +330,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd) if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
{ {
with(e) with(state) with(e) with(state)
@ -365,7 +365,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.InfiniteBloomEnd) if (code == IR.InfiniteBloomEnd)
{ {
with(e) with(state) with(e) with(state)
@ -394,7 +394,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.OrEnd)(E* e, S* state) static bool op(IR code:IR.OrEnd)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -415,7 +415,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.OrStart)(E* e, S* state) static bool op(IR code:IR.OrStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -424,7 +424,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Option)(E* e, S* state) static bool op(IR code:IR.Option)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -439,7 +439,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.GotoEndOr)(E* e, S* state) static bool op(IR code:IR.GotoEndOr)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -448,7 +448,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.GroupStart)(E* e, S* state) static bool op(IR code:IR.GroupStart)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -458,7 +458,7 @@ template ThompsonOps(E, S, bool withInput:true)
return true; return true;
} }
} }
static bool op(IR code:IR.GroupEnd)(E* e, S* state) static bool op(IR code:IR.GroupEnd)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -469,7 +469,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Backref)(E* e, S* state) static bool op(IR code:IR.Backref)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -506,7 +506,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.LookbehindStart || code == IR.NeglookbehindStart) if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
{ {
with(e) with(state) with(e) with(state)
@ -516,10 +516,9 @@ template ThompsonOps(E, S, bool withInput:true)
uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart); uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
bool positive = re.ir[t.pc].code == IR.LookbehindStart; bool positive = re.ir[t.pc].code == IR.LookbehindStart;
static if (Stream.isLoopback) static if (Stream.isLoopback)
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0)); auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
else else
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0)); auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
matcher.re.ngroup = me - ms;
matcher.backrefed = backrefed.empty ? t.matches : backrefed; matcher.backrefed = backrefed.empty ? t.matches : backrefed;
//backMatch //backMatch
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart)); auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
@ -534,7 +533,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.LookaheadStart || code == IR.NeglookaheadStart) if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
{ {
with(e) with(state) with(e) with(state)
@ -545,10 +544,9 @@ template ThompsonOps(E, S, bool withInput:true)
uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart); uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
bool positive = re.ir[t.pc].code == IR.LookaheadStart; bool positive = re.ir[t.pc].code == IR.LookaheadStart;
static if (Stream.isLoopback) static if (Stream.isLoopback)
auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0)); auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
else else
auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0)); auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
matcher.re.ngroup = me - ms;
matcher.backrefed = backrefed.empty ? t.matches : backrefed; matcher.backrefed = backrefed.empty ? t.matches : backrefed;
auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart)); auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
freelist = matcher.freelist; freelist = matcher.freelist;
@ -564,7 +562,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd || if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
code == IR.LookbehindEnd || code == IR.NeglookbehindEnd) code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
{ {
@ -579,13 +577,13 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Nop)(E* e, S* state) static bool op(IR code:IR.Nop)(E e, S* state)
{ {
with(state) t.pc += IRL!(IR.Nop); with(state) t.pc += IRL!(IR.Nop);
return true; return true;
} }
static bool op(IR code:IR.OrChar)(E* e, S* state) static bool op(IR code:IR.OrChar)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -607,7 +605,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Char)(E* e, S* state) static bool op(IR code:IR.Char)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -623,7 +621,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Any)(E* e, S* state) static bool op(IR code:IR.Any)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -634,7 +632,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.CodepointSet)(E* e, S* state) static bool op(IR code:IR.CodepointSet)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -652,7 +650,7 @@ template ThompsonOps(E, S, bool withInput:true)
} }
} }
static bool op(IR code:IR.Trie)(E* e, S* state) static bool op(IR code:IR.Trie)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -676,7 +674,7 @@ template ThompsonOps(E,S, bool withInput:false)
{ {
@trusted: @trusted:
// can't match these without input // can't match these without input
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
|| code == IR.Trie || code == IR.Char || code == IR.Any) || code == IR.Trie || code == IR.Char || code == IR.Any)
{ {
@ -684,7 +682,7 @@ template ThompsonOps(E,S, bool withInput:false)
} }
// special case of zero-width backref // special case of zero-width backref
static bool op(IR code:IR.Backref)(E* e, S* state) static bool op(IR code:IR.Backref)(E e, S* state)
{ {
with(e) with(state) with(e) with(state)
{ {
@ -702,7 +700,7 @@ template ThompsonOps(E,S, bool withInput:false)
} }
// forward all control flow to normal versions // forward all control flow to normal versions
static bool op(IR code)(E* e, S* state) static bool op(IR code)(E e, S* state)
if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
&& code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref) && code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
{ {
@ -714,19 +712,19 @@ template ThompsonOps(E,S, bool withInput:false)
Thomspon matcher does all matching in lockstep, Thomspon matcher does all matching in lockstep,
never looking at the same char twice never looking at the same char twice
+/ +/
@trusted struct ThompsonMatcher(Char, StreamType = Input!Char) @trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
if (is(Char : dchar)) if (is(Char : dchar))
{ {
alias DataIndex = Stream.DataIndex; alias DataIndex = Stream.DataIndex;
alias Stream = StreamType; alias Stream = StreamType;
alias OpFunc = bool function(ThompsonMatcher*, State*); alias OpFunc = bool function(ThompsonMatcher, State*);
alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream)); alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
alias OpBackFunc = bool function(BackMatcher*, BackMatcher.State*); alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*);
Thread!DataIndex* freelist; Thread!DataIndex* freelist;
ThreadList!DataIndex clist, nlist; ThreadList!DataIndex clist, nlist;
DataIndex[] merge; DataIndex[] merge;
Group!DataIndex[] backrefed; Group!DataIndex[] backrefed;
Regex!Char re; //regex program const Regex!Char re; //regex program
Stream s; Stream s;
dchar front; dchar front;
DataIndex index; DataIndex index;
@ -737,16 +735,18 @@ if (is(Char : dchar))
OpBackFunc[] opCacheBackTrue; // ditto OpBackFunc[] opCacheBackTrue; // ditto
OpBackFunc[] opCacheBackFalse; // ditto OpBackFunc[] opCacheBackFalse; // ditto
size_t threadSize; size_t threadSize;
size_t _refCount;
int matched; int matched;
bool exhausted; bool exhausted;
final:
static struct State static struct State
{ {
Thread!DataIndex* t; Thread!DataIndex* t;
ThreadList!DataIndex worklist; ThreadList!DataIndex worklist;
Group!DataIndex[] matches; Group!DataIndex[] matches;
bool popState(E)(E* e) bool popState(E)(E e)
{ {
with(e) with(e)
{ {
@ -784,6 +784,10 @@ if (is(Char : dchar))
//true if it's end of input //true if it's end of input
@property bool atEnd(){ return index == s.lastIndex && s.atEnd; } @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
override @property ref size_t refCount() @safe { return _refCount; }
override @property ref const(Regex!Char) pattern() @safe { return re; }
bool next() bool next()
{ {
if (!s.nextChar(front, index)) if (!s.nextChar(front, index))
@ -843,19 +847,28 @@ if (is(Char : dchar))
} }
} }
this()(Regex!Char program, Stream stream, void[] memory) this()(const Regex!Char program, Stream stream, void[] memory)
{ {
// We are emplace'd to malloced memory w/o blitting T.init over it\
// make sure we initialize all fields explicitly
_refCount = 1;
subCounters = null;
backrefed = null;
exhausted = false;
matched = 0;
re = program; re = program;
s = stream; s = stream;
initExternalMemory(memory); initExternalMemory(memory);
genCounter = 0; genCounter = 0;
} }
this(ref ThompsonMatcher matcher, size_t lo, size_t hi, Stream stream) this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
{ {
_refCount = 1;
subCounters = matcher.subCounters;
s = stream; s = stream;
re = matcher.re; auto code = matcher.re.ir[lo .. hi];
re.ir = re.ir[lo .. hi]; re = matcher.re.withCode(code).withNGroup(nGroup);
threadSize = matcher.threadSize; threadSize = matcher.threadSize;
merge = matcher.merge; merge = matcher.merge;
freelist = matcher.freelist; freelist = matcher.freelist;
@ -867,11 +880,13 @@ if (is(Char : dchar))
index = matcher.index; index = matcher.index;
} }
this(ref BackMatcher matcher, size_t lo, size_t hi, Stream stream) this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
{ {
_refCount = 1;
subCounters = matcher.subCounters;
s = stream; s = stream;
re = matcher.re; auto code = matcher.re.ir[lo .. hi];
re.ir = re.ir[lo .. hi]; re = matcher.re.withCode(code).withNGroup(nGroup);
threadSize = matcher.threadSize; threadSize = matcher.threadSize;
merge = matcher.merge; merge = matcher.merge;
freelist = matcher.freelist; freelist = matcher.freelist;
@ -883,31 +898,35 @@ if (is(Char : dchar))
index = matcher.index; index = matcher.index;
} }
auto fwdMatcher()(size_t lo, size_t hi, size_t counter) auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
{ {
auto m = ThompsonMatcher!(Char, Stream)(this, lo, hi, s); auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
m.genCounter = counter; m.genCounter = counter;
return m; return m;
} }
auto bwdMatcher()(size_t lo, size_t hi, size_t counter) auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
{ {
alias BackLooper = typeof(s.loopBack(index)); alias BackLooper = typeof(s.loopBack(index));
auto m = ThompsonMatcher!(Char, BackLooper)(this, lo, hi, s.loopBack(index)); auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
m.genCounter = counter; m.genCounter = counter;
m.next(); m.next();
return m; return m;
} }
auto dupTo(void[] memory) override void dupTo(Matcher!Char engine, void[] memory)
{ {
typeof(this) tmp = this;//bitblit auto thompson = cast(ThompsonMatcher) engine;
tmp.initExternalMemory(memory); thompson.s = s;
tmp.genCounter = 0; thompson.subCounters = null;
return tmp; thompson.front = front;
thompson.index = index;
thompson.matched = matched;
thompson.exhausted = exhausted;
thompson.initExternalMemory(memory);
} }
int match(Group!DataIndex[] matches) override int match(Group!DataIndex[] matches)
{ {
debug(std_regex_matcher) debug(std_regex_matcher)
writeln("------------------------------------------"); writeln("------------------------------------------");
@ -1052,9 +1071,9 @@ if (is(Char : dchar))
{ {
debug(std_regex_matcher) writeln("---- Evaluating thread"); debug(std_regex_matcher) writeln("---- Evaluating thread");
static if (withInput) static if (withInput)
while (opCacheTrue.ptr[state.t.pc](&this, state)){} while (opCacheTrue.ptr[state.t.pc](this, state)){}
else else
while (opCacheFalse.ptr[state.t.pc](&this, state)){} while (opCacheFalse.ptr[state.t.pc](this, state)){}
} }
enum uint RestartPc = uint.max; enum uint RestartPc = uint.max;
//match the input, evaluating IR without searching //match the input, evaluating IR without searching

View file

@ -298,7 +298,6 @@ module std.regex;
import std.range.primitives, std.traits; import std.range.primitives, std.traits;
import std.regex.internal.ir; import std.regex.internal.ir;
import std.regex.internal.thompson; //TODO: get rid of this dependency
import std.typecons; // : Flag, Yes, No; import std.typecons; // : Flag, Yes, No;
/++ /++
@ -339,10 +338,9 @@ public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
A $(D StaticRegex) is $(D Regex) object that contains D code specially A $(D StaticRegex) is $(D Regex) object that contains D code specially
generated at compile-time to speed up matching. generated at compile-time to speed up matching.
Implicitly convertible to normal $(D Regex), No longer used, kept as alias to Regex for backwards compatibility.
however doing so will result in losing this additional capability.
+/ +/
public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char); public alias StaticRegex = Regex;
/++ /++
Compile regular expression pattern for the later execution. Compile regular expression pattern for the later execution.
@ -428,16 +426,25 @@ if (isSomeString!(S))
template ctRegexImpl(alias pattern, string flags=[]) template ctRegexImpl(alias pattern, string flags=[])
{ {
import std.regex.internal.backtracking, std.regex.internal.parser; import std.regex.internal.backtracking, std.regex.internal.parser;
enum r = regex(pattern, flags); static immutable r = cast(immutable) regex(pattern, flags);
alias Char = BasicElementOf!(typeof(pattern)); alias Char = BasicElementOf!(typeof(pattern));
enum source = ctGenRegExCode(r); enum source = ctGenRegExCode(r);
alias Matcher = BacktrackingMatcher!(true); @trusted bool func(BacktrackingMatcher!Char matcher)
@trusted bool func(ref Matcher!Char matcher)
{ {
debug(std_regex_ctr) pragma(msg, source); debug(std_regex_ctr) pragma(msg, source);
cast(void) matcher;
mixin(source); mixin(source);
} }
enum nr = StaticRegex!Char(r, &func); static immutable staticRe =
cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
struct Wrapper
{
// allow code that expects mutable Regex to still work
// we stay "logically const"
@trusted @property auto getRe() const { return cast() staticRe; }
alias getRe this;
}
enum wrapper = Wrapper();
} }
/++ /++
@ -450,10 +457,10 @@ template ctRegexImpl(alias pattern, string flags=[])
pattern = Regular expression pattern = Regular expression
flags = The _attributes (g, i, m, s and x accepted) flags = The _attributes (g, i, m, s and x accepted)
+/ +/
public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr; public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R)) enum isRegexFor(RegEx, R) = is(Unqual!RegEx == Regex!(BasicElementOf!R)) || is(RegEx : const(Regex!(BasicElementOf!R)))
|| is(RegEx == StaticRegex!(BasicElementOf!R)); || is(Unqual!RegEx == StaticRegex!(BasicElementOf!R));
/++ /++
@ -462,10 +469,10 @@ enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
First element of range is the whole match. First element of range is the whole match.
+/ +/
@trusted public struct Captures(R, DIndex = size_t) @trusted public struct Captures(R)
if (isSomeString!R) if (isSomeString!R)
{//@trusted because of union inside {//@trusted because of union inside
alias DataIndex = DIndex; alias DataIndex = size_t;
alias String = R; alias String = R;
private: private:
import std.conv : text; import std.conv : text;
@ -480,9 +487,9 @@ private:
} }
uint _f, _b; uint _f, _b;
uint _refcount; // ref count or SMALL MASK + num groups uint _refcount; // ref count or SMALL MASK + num groups
NamedGroup[] _names; const(NamedGroup)[] _names;
this()(R input, uint n, NamedGroup[] named) this(R input, uint n, const(NamedGroup)[] named)
{ {
_input = input; _input = input;
_names = named; _names = named;
@ -491,11 +498,11 @@ private:
_f = 0; _f = 0;
} }
this(alias Engine)(ref RegexMatch!(R,Engine) rmatch) this(ref RegexMatch!R rmatch)
{ {
_input = rmatch._input; _input = rmatch._input;
_names = rmatch._engine.re.dict; _names = rmatch._engine.pattern.dict;
immutable n = rmatch._engine.re.ngroup; immutable n = rmatch._engine.pattern.ngroup;
newMatches(n); newMatches(n);
_b = n; _b = n;
_f = 0; _f = 0;
@ -693,58 +700,38 @@ public:
Effectively it's a forward range of Captures!R, produced Effectively it's a forward range of Captures!R, produced
by lazily searching for matches in a given input. by lazily searching for matches in a given input.
$(D alias Engine) specifies an engine type to use during matching,
and is automatically deduced in a call to $(D match)/$(D bmatch).
+/ +/
@trusted public struct RegexMatch(R, alias Engine = ThompsonMatcher) @trusted public struct RegexMatch(R)
if (isSomeString!R) if (isSomeString!R)
{ {
private: private:
import core.stdc.stdlib : malloc, free;
alias Char = BasicElementOf!R; alias Char = BasicElementOf!R;
alias EngineType = Engine!Char; Matcher!Char _engine;
EngineType _engine; const MatcherFactory!Char _factory;
R _input; R _input;
Captures!(R,EngineType.DataIndex) _captures; Captures!R _captures;
void[] _memory;//is ref-counted
this(RegEx)(R input, RegEx prog) this(RegEx)(R input, RegEx prog)
{ {
import std.exception : enforce; import std.exception : enforce;
_input = input; _input = input;
immutable size = EngineType.initialMemory(prog)+size_t.sizeof; if (prog.factory is null) _factory = defaultFactory!Char(prog);
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]); else _factory = prog.factory;
scope(failure) free(_memory.ptr); _engine = _factory.create(prog, input);
*cast(size_t*)_memory.ptr = 1; assert(_engine.refCount == 1);
_engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]); _captures = Captures!R(this);
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
_engine.nativeFn = prog.nativeFn;
_captures = Captures!(R,EngineType.DataIndex)(this);
_captures._nMatch = _engine.match(_captures.matches); _captures._nMatch = _engine.match(_captures.matches);
debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
} }
@property ref size_t counter(){ return *cast(size_t*)_memory.ptr; }
public: public:
this(this) this(this)
{ {
if (_memory.ptr) if (_engine) _factory.incRef(_engine);
{
++counter;
debug(std_regex_allocation) writefln("RefCount (postblit): %x %d",
_memory.ptr, *cast(size_t*)_memory.ptr);
}
} }
~this() ~this()
{ {
if (_memory.ptr && --*cast(size_t*)_memory.ptr == 0) if (_engine) _factory.decRef(_engine);
{
debug(std_regex_allocation) writefln("RefCount (dtor): %x %d",
_memory.ptr, *cast(size_t*)_memory.ptr);
free(cast(void*)_memory.ptr);
}
} }
///Shorthands for front.pre, front.post, front.hit. ///Shorthands for front.pre, front.post, front.hit.
@ -786,19 +773,18 @@ public:
void popFront() void popFront()
{ {
import std.exception : enforce; import std.exception : enforce;
if (counter != 1) // CoW - if refCount is not 1, we are aliased by somebody else
{//do cow magic first if (_engine.refCount != 1)
counter--;//we abandon this reference {
immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof; // we create a new engine & abandon this reference
_memory = (enforce(malloc(size), "malloc failed")[0 .. size]); auto old = _engine;
_engine = _engine.dupTo(_memory[size_t.sizeof .. size]); _engine = _factory.dup(old, _input);
counter = 1;//points to new chunk _factory.decRef(old);
} }
if (!_captures.unique) if (!_captures.unique)
{ {
// has external references - allocate new space // has external references - allocate new space
_captures.newMatches(_engine.re.ngroup); _captures.newMatches(_engine.pattern.ngroup);
} }
_captures._nMatch = _engine.match(_captures.matches); _captures._nMatch = _engine.match(_captures.matches);
} }
@ -814,39 +800,30 @@ public:
/// Same as .front, provided for compatibility with original std.regex. /// Same as .front, provided for compatibility with original std.regex.
@property auto captures() inout { return _captures; } @property auto captures() inout { return _captures; }
} }
private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re) private @trusted auto matchOnce(RegEx, R)(R input, const RegEx prog)
{ {
import core.stdc.stdlib : malloc, free;
import std.exception : enforce;
alias Char = BasicElementOf!R; alias Char = BasicElementOf!R;
alias EngineType = Engine!Char; auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
auto engine = factory.create(prog, input);
size_t size = EngineType.initialMemory(re); scope(exit) factory.decRef(engine); // destroys the engine
void[] memory = enforce(malloc(size), "malloc failed")[0 .. size]; auto captures = Captures!R(input, prog.ngroup, prog.dict);
scope(exit) free(memory.ptr);
auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
auto engine = EngineType(re, Input!Char(input), memory);
static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
engine.nativeFn = re.nativeFn;
captures._nMatch = engine.match(captures.matches); captures._nMatch = engine.match(captures.matches);
return captures; return captures;
} }
private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re) private auto matchMany(RegEx, R)(R input, RegEx re) @safe
{ {
re.flags |= RegexOption.global; return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
return RegexMatch!(R, Engine)(input, re);
} }
@system unittest @system unittest
{ {
//sanity checks for new API //sanity checks for new API
auto re = regex("abc"); auto re = regex("abc");
assert(!"abc".matchOnce!(ThompsonMatcher)(re).empty); assert(!"abc".matchOnce(re).empty);
assert("abc".matchOnce!(ThompsonMatcher)(re)[0] == "abc"); assert("abc".matchOnce(re)[0] == "abc");
} }
@ -938,25 +915,16 @@ if (isSomeString!R && isRegexFor!(RegEx, R))
+/ +/
public auto match(R, RegEx)(R input, RegEx re) public auto match(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) if (isSomeString!R && isRegexFor!(RegEx,R))
{ {
import std.regex.internal.thompson : ThompsonMatcher; return RegexMatch!(Unqual!(typeof(input)))(input, re);
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
} }
///ditto ///ditto
public auto match(R, String)(R input, String re) public auto match(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.thompson : ThompsonMatcher; return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
}
public auto match(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
{
import std.regex.internal.backtracking : BacktrackingMatcher;
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
} }
/++ /++
@ -978,33 +946,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
if there was a match, otherwise an empty $(LREF Captures) object. if there was a match, otherwise an empty $(LREF Captures) object.
+/ +/
public auto matchFirst(R, RegEx)(R input, RegEx re) public auto matchFirst(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) if (isSomeString!R && isRegexFor!(RegEx, R))
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchOnce(input, re);
return matchOnce!ThompsonMatcher(input, re);
} }
///ditto ///ditto
public auto matchFirst(R, String)(R input, String re) public auto matchFirst(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchOnce(input, regex(re));
return matchOnce!ThompsonMatcher(input, regex(re));
} }
///ditto ///ditto
public auto matchFirst(R, String)(R input, String[] re...) public auto matchFirst(R, String)(R input, String[] re...)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchOnce(input, regex(re));
return matchOnce!ThompsonMatcher(input, regex(re));
}
public auto matchFirst(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
{
import std.regex.internal.backtracking : BacktrackingMatcher;
return matchOnce!(BacktrackingMatcher!true)(input, re);
} }
/++ /++
@ -1029,33 +987,23 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
after the first match was found or an empty one if not present. after the first match was found or an empty one if not present.
+/ +/
public auto matchAll(R, RegEx)(R input, RegEx re) public auto matchAll(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) if (isSomeString!R && isRegexFor!(RegEx, R))
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchMany(input, re);
return matchMany!ThompsonMatcher(input, re);
} }
///ditto ///ditto
public auto matchAll(R, String)(R input, String re) public auto matchAll(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchMany(input, regex(re));
return matchMany!ThompsonMatcher(input, regex(re));
} }
///ditto ///ditto
public auto matchAll(R, String)(R input, String[] re...) public auto matchAll(R, String)(R input, String[] re...)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.thompson : ThompsonMatcher; return matchMany(input, regex(re));
return matchMany!ThompsonMatcher(input, regex(re));
}
public auto matchAll(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
{
import std.regex.internal.backtracking : BacktrackingMatcher;
return matchMany!(BacktrackingMatcher!true)(input, re);
} }
// another set of tests just to cover the new API // another set of tests just to cover the new API
@ -1119,25 +1067,16 @@ if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
+/ +/
public auto bmatch(R, RegEx)(R input, RegEx re) public auto bmatch(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) if (isSomeString!R && isRegexFor!(RegEx, R))
{ {
import std.regex.internal.backtracking : BacktrackingMatcher; return RegexMatch!(Unqual!(typeof(input)))(input, re);
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
} }
///ditto ///ditto
public auto bmatch(R, String)(R input, String re) public auto bmatch(R, String)(R input, String re)
if (isSomeString!R && isSomeString!String) if (isSomeString!R && isSomeString!String)
{ {
import std.regex.internal.backtracking : BacktrackingMatcher; return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
}
public auto bmatch(R, RegEx)(R input, RegEx re)
if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
{
import std.regex.internal.backtracking : BacktrackingMatcher;
return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
} }
// produces replacement string from format using captures for substitution // produces replacement string from format using captures for substitution
@ -1530,7 +1469,7 @@ private:
@trusted this(Range input, RegEx separator) @trusted this(Range input, RegEx separator)
{//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
_input = input; _input = input;
separator.flags |= RegexOption.global; const re = separator.withFlags(separator.flags | RegexOption.global);
if (_input.empty) if (_input.empty)
{ {
//there is nothing to match at all, make _offset > 0 //there is nothing to match at all, make _offset > 0
@ -1538,7 +1477,7 @@ private:
} }
else else
{ {
_match = Rx(_input, separator); _match = Rx(_input, re);
static if (keepSeparators) static if (keepSeparators)
if (_match.pre.empty) if (_match.pre.empty)

104
std/uni.d
View file

@ -2184,6 +2184,12 @@ pure:
return Intervals!(typeof(data))(data); return Intervals!(typeof(data))(data);
} }
package @property const(CodepointInterval)[] intervals() const
{
import std.array : array;
return Intervals!(typeof(data[]))(data[]).array;
}
/** /**
Tests the presence of code point $(D val) in this set. Tests the presence of code point $(D val) in this set.
*/ */
@ -2619,52 +2625,9 @@ public:
assert((set & set.inverted).empty); assert((set & set.inverted).empty);
} }
/** package static string toSourceCode(const(CodepointInterval)[] range, string funcName)
Generates string with D source code of unary function with name of
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
the code is adjusted to be a lambda function.
The function generated tests if the $(CODEPOINT) passed
belongs to this set or not. The result is to be used with string mixin.
The intended usage area is aggressive optimization via meta programming
in parser generators and the like.
Note: Use with care for relatively small or regular sets. It
could end up being slower then just using multi-staged tables.
Example:
---
import std.stdio;
// construct set directly from [a, b$RPAREN intervals
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
writeln(set);
writeln(set.toSourceCode("func"));
---
The above outputs something along the lines of:
---
bool func(dchar ch) @safe pure nothrow @nogc
{
if (ch < 45)
{
if (ch == 10 || ch == 11) return true;
return false;
}
else if (ch < 65) return true;
else
{
if (ch < 100) return false;
if (ch < 200) return true;
return false;
}
}
---
*/
string toSourceCode(string funcName="")
{ {
import std.algorithm.searching : countUntil; import std.algorithm.searching : countUntil;
import std.array : array;
import std.format : format; import std.format : format;
enum maxBinary = 3; enum maxBinary = 3;
static string linearScope(R)(R ivals, string indent) static string linearScope(R)(R ivals, string indent)
@ -2746,7 +2709,6 @@ public:
string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n", string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n",
funcName.empty ? "function" : funcName); funcName.empty ? "function" : funcName);
auto range = byInterval.array();
// special case first bisection to be on ASCII vs beyond // special case first bisection to be on ASCII vs beyond
auto tillAscii = countUntil!"a[0] > 0x80"(range); auto tillAscii = countUntil!"a[0] > 0x80"(range);
if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0) if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0)
@ -2756,6 +2718,55 @@ public:
return code; return code;
} }
/**
Generates string with D source code of unary function with name of
$(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
the code is adjusted to be a lambda function.
The function generated tests if the $(CODEPOINT) passed
belongs to this set or not. The result is to be used with string mixin.
The intended usage area is aggressive optimization via meta programming
in parser generators and the like.
Note: Use with care for relatively small or regular sets. It
could end up being slower then just using multi-staged tables.
Example:
---
import std.stdio;
// construct set directly from [a, b$RPAREN intervals
auto set = CodepointSet(10, 12, 45, 65, 100, 200);
writeln(set);
writeln(set.toSourceCode("func"));
---
The above outputs something along the lines of:
---
bool func(dchar ch) @safe pure nothrow @nogc
{
if (ch < 45)
{
if (ch == 10 || ch == 11) return true;
return false;
}
else if (ch < 65) return true;
else
{
if (ch < 100) return false;
if (ch < 200) return true;
return false;
}
}
---
*/
string toSourceCode(string funcName="")
{
import std.array : array;
auto range = byInterval.array();
return toSourceCode(range, funcName);
}
/** /**
True if this set doesn't contain any $(CODEPOINTS). True if this set doesn't contain any $(CODEPOINTS).
*/ */
@ -2802,6 +2813,7 @@ private:
//may break sorted property - but we need std.sort to access it //may break sorted property - but we need std.sort to access it
//hence package protection attribute //hence package protection attribute
static if (hasAssignableElements!Range)
package @property void front(CodepointInterval val) package @property void front(CodepointInterval val)
{ {
slice[start] = val.a; slice[start] = val.a;
@ -2816,6 +2828,7 @@ private:
} }
//ditto about package //ditto about package
static if (hasAssignableElements!Range)
package @property void back(CodepointInterval val) package @property void back(CodepointInterval val)
{ {
slice[end-2] = val.a; slice[end-2] = val.a;
@ -2840,6 +2853,7 @@ private:
} }
//ditto about package //ditto about package
static if (hasAssignableElements!Range)
package void opIndexAssign(CodepointInterval val, size_t idx) package void opIndexAssign(CodepointInterval val, size_t idx)
{ {
slice[start+idx*2] = val.a; slice[start+idx*2] = val.a;