Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer
This commit is contained in:
commit
9b0d3d78d1
|
@ -1,232 +0,0 @@
|
|||
import std.stdio;
|
||||
import std.algorithm;
|
||||
|
||||
string[] opkwds = [
|
||||
"=", // Assign
|
||||
"@", // At
|
||||
"&", // BitAnd
|
||||
"&=", // BitAndEquals
|
||||
"|", // BitOr
|
||||
"|=", // BitOrEquals
|
||||
"~=", // CatEquals
|
||||
":", // Colon
|
||||
",", // Comma
|
||||
"--", // Decrement
|
||||
"/", // Div
|
||||
"/=", // DivEquals
|
||||
"$", // Dollar
|
||||
".", // Dot
|
||||
"==", // Equals
|
||||
"=>", // GoesTo
|
||||
">", // Greater
|
||||
">=", // GreaterEqual
|
||||
"#", // Hash
|
||||
"++", // Increment
|
||||
"{", // LBrace
|
||||
"[", // LBracket
|
||||
"<", // Less
|
||||
"<=", // LessEqual
|
||||
"<>=", // LessEqualGreater
|
||||
"<>", // LessOrGreater
|
||||
"&&", // LogicAnd
|
||||
"||", // LogicOr
|
||||
"(", // LParen
|
||||
"-", // Minus
|
||||
"-=", // MinusEquals
|
||||
"%", // Mod
|
||||
"%=", // ModEquals
|
||||
"*=", // MulEquals
|
||||
"!", // Not
|
||||
"!=", // NotEquals
|
||||
"!>", // NotGreater
|
||||
"!>=", // NotGreaterEqual
|
||||
"!<", // NotLess
|
||||
"!<=", // NotLessEqual
|
||||
"!<>", // NotLessEqualGreater
|
||||
"+", // Plus
|
||||
"+=", // PlusEquals
|
||||
"^^", // Pow
|
||||
"^^=", // PowEquals
|
||||
"}", // RBrace
|
||||
"]", // RBracket
|
||||
")", // RParen
|
||||
";", // Semicolon
|
||||
"<<", // ShiftLeft
|
||||
"<<=", // ShiftLeftEqual
|
||||
">>", // ShiftRight
|
||||
">>=", // ShiftRightEqual
|
||||
"..", // Slice
|
||||
"*", // Star
|
||||
"?", // Ternary
|
||||
"~", // Tilde
|
||||
"!<>=", // Unordered
|
||||
">>>", // UnsignedShiftRight
|
||||
">>>=", // UnsignedShiftRightEqual
|
||||
"...", // Vararg
|
||||
"^", // Xor
|
||||
"^=", // XorEquals
|
||||
"bool",
|
||||
"byte",
|
||||
"cdouble",
|
||||
"cent",
|
||||
"cfloat",
|
||||
"char",
|
||||
"creal",
|
||||
"dchar",
|
||||
"double",
|
||||
"dstring",
|
||||
"float",
|
||||
"function",
|
||||
"idouble",
|
||||
"ifloat",
|
||||
"int",
|
||||
"ireal",
|
||||
"long",
|
||||
"real",
|
||||
"short",
|
||||
"string",
|
||||
"ubyte",
|
||||
"ucent",
|
||||
"uint",
|
||||
"ulong",
|
||||
"ushort",
|
||||
"void",
|
||||
"wchar",
|
||||
"wstring",
|
||||
"align",
|
||||
"deprecated",
|
||||
"extern",
|
||||
"pragma",
|
||||
"export",
|
||||
"package",
|
||||
"private",
|
||||
"protected",
|
||||
"public",
|
||||
"abstract",
|
||||
"auto",
|
||||
"const",
|
||||
"final",
|
||||
"__gshared",
|
||||
"immutable",
|
||||
"inout",
|
||||
"scope",
|
||||
"shared",
|
||||
"static",
|
||||
"synchronized",
|
||||
"alias",
|
||||
"asm",
|
||||
"assert",
|
||||
"body",
|
||||
"break",
|
||||
"case",
|
||||
"cast",
|
||||
"catch",
|
||||
"class",
|
||||
"continue",
|
||||
"debug",
|
||||
"default",
|
||||
"delegate",
|
||||
"delete",
|
||||
"do",
|
||||
"else",
|
||||
"enum",
|
||||
"false",
|
||||
"finally",
|
||||
"foreach",
|
||||
"foreach_reverse",
|
||||
"for",
|
||||
"goto",
|
||||
"if",
|
||||
"import",
|
||||
"in",
|
||||
"interface",
|
||||
"invariant",
|
||||
"is",
|
||||
"lazy",
|
||||
"macro",
|
||||
"mixin",
|
||||
"module",
|
||||
"new",
|
||||
"nothrow",
|
||||
"null",
|
||||
"out",
|
||||
"override",
|
||||
"pure",
|
||||
"ref",
|
||||
"return",
|
||||
"struct",
|
||||
"super",
|
||||
"switch",
|
||||
"template",
|
||||
"this",
|
||||
"throw",
|
||||
"true",
|
||||
"try",
|
||||
"typedef",
|
||||
"typeid",
|
||||
"typeof",
|
||||
"union",
|
||||
"unittest",
|
||||
"version",
|
||||
"volatile",
|
||||
"while",
|
||||
"with",
|
||||
"__DATE__",
|
||||
"__EOF__",
|
||||
"__TIME__",
|
||||
"__TIMESTAMP__",
|
||||
"__VENDOR__",
|
||||
"__VERSION__",
|
||||
"__FILE__",
|
||||
"__LINE__",
|
||||
null, // Comment
|
||||
null, // Identifier
|
||||
null, // ScriptLine
|
||||
"__traits",
|
||||
"__parameters",
|
||||
"__vector",
|
||||
null, // Whitespace
|
||||
null, // SpecialTokenSequence
|
||||
null, // DoubleLiteral
|
||||
null, // FloatLiteral
|
||||
null, // IDoubleLiteral
|
||||
null, // IFloatLiteral
|
||||
null, // IntLiteral
|
||||
null, // LongLiteral
|
||||
null, // RealLiteral
|
||||
null, // IRealLiteral
|
||||
null, // UnsignedIntLiteral
|
||||
null, // UnsignedLongLiteral
|
||||
null, // DStringLiteral
|
||||
null, // StringLiteral
|
||||
null, // WStringLiteral
|
||||
];
|
||||
|
||||
immutable string opKwdValues =
|
||||
"#/=*=+=++-=--^^=~=<<=%==>>>=||=&&=,;:!<=!<>=!=!>=?...()[]{}@$"
|
||||
~ "boolcdoublecentcfloatcrealdchardstringfunctionidoubleifloatirealubyte"
|
||||
~ "ucentuintulongushortvoidwcharwstringaligndeprecatedexternpragmaexport"
|
||||
~ "packageprivateprotectedpublicabstractautoconstfinal__gsharedimmutable"
|
||||
~ "inoutscopesharedstaticsynchronizedaliasasmassertbodybreakcasecastcatch"
|
||||
~ "classcontinuedebugdefaultdelegatedeleteelseenumfalsefinally"
|
||||
~ "foreach_reversegotoimportinterfaceinvariantlazymacromixinmodule"
|
||||
~ "newnothrownulloverridepurerefreturnstructsuperswitchtemplatethistruetry"
|
||||
~ "typedeftypeidtypeofunionunittestversionvolatilewhilewith__traits"
|
||||
~ "__vector__parameters__DATE__EOF__TIME__TIMESTAMP__VENDOR__VERSION__"
|
||||
~ "FILE__LINE__";
|
||||
|
||||
void main(string[] args)
|
||||
{
|
||||
writeln("immutable(string[]) tokenValues = [");
|
||||
foreach (s; opkwds)
|
||||
{
|
||||
if (s is null)
|
||||
{
|
||||
writeln("\tnull,");
|
||||
continue;
|
||||
}
|
||||
auto n = opKwdValues.countUntil(s);
|
||||
writeln("\topKwdValues[", n, " .. ", n + s.length, "], // ", s);
|
||||
}
|
||||
writeln("];");
|
||||
}
|
26
main.d
26
main.d
|
@ -97,12 +97,13 @@ int main(string[] args)
|
|||
{
|
||||
string[] importDirs;
|
||||
bool sloc;
|
||||
/+bool dotComplete;+/
|
||||
/+bool json;+/
|
||||
/+bool parenComplete;+/
|
||||
bool dotComplete;
|
||||
bool parenComplete;
|
||||
bool highlight;
|
||||
bool ctags;
|
||||
bool recursiveCtags;
|
||||
bool json;
|
||||
bool declaration;
|
||||
bool recursive;
|
||||
bool format;
|
||||
bool help;
|
||||
bool tokenCount;
|
||||
|
@ -112,8 +113,9 @@ int main(string[] args)
|
|||
{
|
||||
getopt(args, "I", &importDirs,/+ "dotComplete|d", &dotComplete,+/ "sloc|l", &sloc,
|
||||
/+"json|j", &json,+/ /+"parenComplete|p", &parenComplete,+/ "highlight", &highlight,
|
||||
"ctags|c", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help,
|
||||
"tokenCount", &tokenCount, "frequencyCount", &frequencyCount);
|
||||
"ctags|c", &ctags, "recursive|r|R", &recursive, "help|h", &help,
|
||||
"tokenCount", &tokenCount, "frequencyCount", &frequencyCount,
|
||||
"declaration|e", &declaration);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
@ -148,7 +150,10 @@ int main(string[] args)
|
|||
config.fileName = arg;
|
||||
uint count;
|
||||
auto f = File(arg);
|
||||
ubyte[] buffer = uninitializedArray!(ubyte[])(f.size);
|
||||
import core.stdc.stdlib;
|
||||
ubyte[] buffer = (cast(ubyte*)malloc(f.size))[0..f.size];
|
||||
scope(exit) free(buffer.ptr);
|
||||
//uninitializedArray!(ubyte[])(f.size);
|
||||
foreach (t; byToken(f.rawRead(buffer), config))
|
||||
{
|
||||
if (tokenCount)
|
||||
|
@ -210,9 +215,16 @@ options:
|
|||
and methods available in the current scope that begin with the text
|
||||
before the cursor position.
|
||||
|
||||
--declaration | -e [sourceFile] cursorPosition
|
||||
Prints the absolute path to the file in which the symbol at the cursor
|
||||
position was declared, as well as its line number.
|
||||
|
||||
--highlight [sourceFile] - Syntax-highlight the given source file. The
|
||||
resulting HTML will be written to standard output.
|
||||
|
||||
--imports | -i [sourceFiles]
|
||||
Prints modules imported by the given source file.
|
||||
|
||||
-I includePath
|
||||
Include _includePath_ in the list of paths used to search for imports.
|
||||
By default dscanner will search in the current working directory as
|
||||
|
|
4262
std/d/entities.d
4262
std/d/entities.d
File diff suppressed because it is too large
Load Diff
152
std/d/lexer.d
152
std/d/lexer.d
|
@ -975,10 +975,10 @@ private:
|
|||
void lexWhitespace(bool keep)()
|
||||
{
|
||||
current.type = TokenType.whitespace;
|
||||
while (!isEoF() && isWhite())
|
||||
do
|
||||
{
|
||||
nextChar();
|
||||
}
|
||||
}while (!isEoF() && isWhite());
|
||||
static if (keep) setTokenValue();
|
||||
}
|
||||
|
||||
|
@ -2010,15 +2010,18 @@ private:
|
|||
return idx;
|
||||
}
|
||||
}
|
||||
auto chunk = buffer[0..idx];
|
||||
auto entity = cast(string)chunk in characterEntities;
|
||||
if (entity is null)
|
||||
//TODO: avoid looking up as UTF string, use raw bytes
|
||||
string chunk = cast(string)buffer[0..idx];
|
||||
auto names = assumeSorted(map!"a.name"(characterEntities));
|
||||
auto place = names.lowerBound(chunk).length;
|
||||
if (place == names.length || names[place] != chunk)
|
||||
{
|
||||
errorMessage("Invalid character entity \"&%s;\""
|
||||
.format(cast(string) chunk));
|
||||
return 1;
|
||||
}
|
||||
dest.put(cast(ubyte[]) (*entity)[0..$]);
|
||||
auto entity = characterEntities[place].value;
|
||||
dest.put(cast(ubyte[]) entity);
|
||||
return entity.length;
|
||||
default:
|
||||
errorMessage("Invalid escape sequence");
|
||||
|
@ -2099,7 +2102,7 @@ private:
|
|||
if (ch >= '[' && ch <= '^') return true;
|
||||
if (ch >= '{' && ch <= '~') return true;
|
||||
if (ch == '`') return true;
|
||||
if (isWhite()) return true; //TODO: test only long 'whites'
|
||||
if ((ch & 0x80) && isLongWhite()) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2108,24 +2111,30 @@ private:
|
|||
auto c = src.front;
|
||||
if (c & 0x80) // multi-byte utf-8
|
||||
{
|
||||
//TODO: here and elsewhere we'd better have
|
||||
// some kind of lookahead in LexSource instead of .save
|
||||
auto r = src.save();
|
||||
if (r.front != 0xe2)
|
||||
return false;
|
||||
else
|
||||
r.popFront();
|
||||
if (r.empty || r.front != 0x80)
|
||||
return false;
|
||||
else
|
||||
r.popFront();
|
||||
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
|
||||
return false;
|
||||
return true;
|
||||
return isLongWhite();
|
||||
}
|
||||
else
|
||||
return c == 0x20 || (c >= 0x09 && c <= 0x0d);
|
||||
}
|
||||
|
||||
bool isLongWhite()
|
||||
{
|
||||
assert(src.front & 0x80); // only non-ascii
|
||||
//TODO: here and elsewhere we'd better have
|
||||
// some kind of lookahead in LexSource instead of .save
|
||||
auto r = src.save();
|
||||
if (r.front != 0xe2)
|
||||
return false;
|
||||
else
|
||||
r.popFront();
|
||||
if (r.empty || r.front != 0x80)
|
||||
return false;
|
||||
else
|
||||
r.popFront();
|
||||
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void errorMessage(string s)
|
||||
{
|
||||
|
@ -3050,40 +3059,33 @@ struct StringCache
|
|||
if(isRandomAccessRange!R
|
||||
&& is(Unqual!(ElementType!R) : const(ubyte)))
|
||||
{
|
||||
size_t bucket;
|
||||
hash_t h;
|
||||
string* val = find(range, bucket, h);
|
||||
if (val !is null)
|
||||
|
||||
uint h = hash(range);
|
||||
uint bucket = h % mapSize;
|
||||
Slot *s = &index[bucket];
|
||||
//1st slot not yet initialized?
|
||||
if(s.value.ptr == null)
|
||||
{
|
||||
*s = Slot(putIntoCache(range), null, h);
|
||||
return s.value;
|
||||
}
|
||||
Slot* insSlot = s;
|
||||
for(;;)
|
||||
{
|
||||
return *val;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto s = putIntoCache(range);
|
||||
index[bucket] ~= s;
|
||||
return s;
|
||||
}
|
||||
if(s.hash == h && s.value.equal(range))
|
||||
return s.value;
|
||||
insSlot = s;
|
||||
s = s.next;
|
||||
if(s == null) break;
|
||||
}
|
||||
string str = putIntoCache(range);
|
||||
insertIntoSlot(insSlot, str, h);
|
||||
return str;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
import core.stdc.string;
|
||||
string* find(R)(R data, out size_t bucket, out hash_t h)
|
||||
{
|
||||
h = hash(data);
|
||||
bucket = h % mapSize;
|
||||
foreach (i; 0 .. index[bucket].length)
|
||||
{
|
||||
if (equal(index[bucket][i], data))
|
||||
{
|
||||
return &index[bucket][i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static hash_t hash(R)(R data)
|
||||
|
||||
static uint hash(R)(R data)
|
||||
{
|
||||
uint hash = 0;
|
||||
foreach (b; data)
|
||||
|
@ -3095,28 +3097,54 @@ private:
|
|||
}
|
||||
|
||||
enum mapSize = 2048;
|
||||
string[][mapSize] index;
|
||||
|
||||
struct Slot
|
||||
{
|
||||
string value;
|
||||
Slot* next;
|
||||
uint hash;
|
||||
};
|
||||
|
||||
void insertIntoSlot(Slot* tgt, string val, uint hash)
|
||||
{
|
||||
auto slice = allocateInCache(Slot.sizeof);
|
||||
auto newSlot = cast(Slot*)slice.ptr;
|
||||
*newSlot = Slot(val, null, hash);
|
||||
tgt.next = newSlot;
|
||||
}
|
||||
|
||||
Slot[mapSize] index;
|
||||
|
||||
// leave some slack for alloctors/GC meta-data
|
||||
enum chunkSize = 16*1024 - size_t.sizeof*8;
|
||||
ubyte*[] chunkS;
|
||||
size_t next = chunkSize;
|
||||
|
||||
string putIntoCache(R)(R data)
|
||||
{
|
||||
import core.memory;
|
||||
|
||||
if(next + data.length > chunkSize)
|
||||
|
||||
ubyte[] allocateInCache(size_t size)
|
||||
{
|
||||
import core.memory;
|
||||
if(next + size > chunkSize)
|
||||
{
|
||||
// avoid huge strings
|
||||
if(data.length > chunkSize/4)
|
||||
return (cast(char[])data).idup;
|
||||
// avoid huge allocations
|
||||
if(size> chunkSize/4)
|
||||
{
|
||||
ubyte* p = cast(ubyte*)GC.malloc(size,
|
||||
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
|
||||
return p[0..size];
|
||||
}
|
||||
chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
|
||||
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
|
||||
next = 0;
|
||||
}
|
||||
auto slice = chunkS[$-1][next..next+data.length];
|
||||
auto slice = chunkS[$-1][next..next+size];
|
||||
next += size;
|
||||
return slice;
|
||||
}
|
||||
|
||||
string putIntoCache(R)(R data)
|
||||
{
|
||||
auto slice = allocateInCache(data.length);
|
||||
slice[] = data[];
|
||||
next += data.length;
|
||||
return cast(string)slice;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue