Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

This commit is contained in:
Hackerpilot 2013-02-22 22:24:26 +00:00
commit 9b0d3d78d1
4 changed files with 2240 additions and 2432 deletions

View File

@ -1,232 +0,0 @@
import std.stdio;
import std.algorithm;
string[] opkwds = [
"=", // Assign
"@", // At
"&", // BitAnd
"&=", // BitAndEquals
"|", // BitOr
"|=", // BitOrEquals
"~=", // CatEquals
":", // Colon
",", // Comma
"--", // Decrement
"/", // Div
"/=", // DivEquals
"$", // Dollar
".", // Dot
"==", // Equals
"=>", // GoesTo
">", // Greater
">=", // GreaterEqual
"#", // Hash
"++", // Increment
"{", // LBrace
"[", // LBracket
"<", // Less
"<=", // LessEqual
"<>=", // LessEqualGreater
"<>", // LessOrGreater
"&&", // LogicAnd
"||", // LogicOr
"(", // LParen
"-", // Minus
"-=", // MinusEquals
"%", // Mod
"%=", // ModEquals
"*=", // MulEquals
"!", // Not
"!=", // NotEquals
"!>", // NotGreater
"!>=", // NotGreaterEqual
"!<", // NotLess
"!<=", // NotLessEqual
"!<>", // NotLessEqualGreater
"+", // Plus
"+=", // PlusEquals
"^^", // Pow
"^^=", // PowEquals
"}", // RBrace
"]", // RBracket
")", // RParen
";", // Semicolon
"<<", // ShiftLeft
"<<=", // ShiftLeftEqual
">>", // ShiftRight
">>=", // ShiftRightEqual
"..", // Slice
"*", // Star
"?", // Ternary
"~", // Tilde
"!<>=", // Unordered
">>>", // UnsignedShiftRight
">>>=", // UnsignedShiftRightEqual
"...", // Vararg
"^", // Xor
"^=", // XorEquals
"bool",
"byte",
"cdouble",
"cent",
"cfloat",
"char",
"creal",
"dchar",
"double",
"dstring",
"float",
"function",
"idouble",
"ifloat",
"int",
"ireal",
"long",
"real",
"short",
"string",
"ubyte",
"ucent",
"uint",
"ulong",
"ushort",
"void",
"wchar",
"wstring",
"align",
"deprecated",
"extern",
"pragma",
"export",
"package",
"private",
"protected",
"public",
"abstract",
"auto",
"const",
"final",
"__gshared",
"immutable",
"inout",
"scope",
"shared",
"static",
"synchronized",
"alias",
"asm",
"assert",
"body",
"break",
"case",
"cast",
"catch",
"class",
"continue",
"debug",
"default",
"delegate",
"delete",
"do",
"else",
"enum",
"false",
"finally",
"foreach",
"foreach_reverse",
"for",
"goto",
"if",
"import",
"in",
"interface",
"invariant",
"is",
"lazy",
"macro",
"mixin",
"module",
"new",
"nothrow",
"null",
"out",
"override",
"pure",
"ref",
"return",
"struct",
"super",
"switch",
"template",
"this",
"throw",
"true",
"try",
"typedef",
"typeid",
"typeof",
"union",
"unittest",
"version",
"volatile",
"while",
"with",
"__DATE__",
"__EOF__",
"__TIME__",
"__TIMESTAMP__",
"__VENDOR__",
"__VERSION__",
"__FILE__",
"__LINE__",
null, // Comment
null, // Identifier
null, // ScriptLine
"__traits",
"__parameters",
"__vector",
null, // Whitespace
null, // SpecialTokenSequence
null, // DoubleLiteral
null, // FloatLiteral
null, // IDoubleLiteral
null, // IFloatLiteral
null, // IntLiteral
null, // LongLiteral
null, // RealLiteral
null, // IRealLiteral
null, // UnsignedIntLiteral
null, // UnsignedLongLiteral
null, // DStringLiteral
null, // StringLiteral
null, // WStringLiteral
];
immutable string opKwdValues =
"#/=*=+=++-=--^^=~=<<=%==>>>=||=&&=,;:!<=!<>=!=!>=?...()[]{}@$"
~ "boolcdoublecentcfloatcrealdchardstringfunctionidoubleifloatirealubyte"
~ "ucentuintulongushortvoidwcharwstringaligndeprecatedexternpragmaexport"
~ "packageprivateprotectedpublicabstractautoconstfinal__gsharedimmutable"
~ "inoutscopesharedstaticsynchronizedaliasasmassertbodybreakcasecastcatch"
~ "classcontinuedebugdefaultdelegatedeleteelseenumfalsefinally"
~ "foreach_reversegotoimportinterfaceinvariantlazymacromixinmodule"
~ "newnothrownulloverridepurerefreturnstructsuperswitchtemplatethistruetry"
~ "typedeftypeidtypeofunionunittestversionvolatilewhilewith__traits"
~ "__vector__parameters__DATE__EOF__TIME__TIMESTAMP__VENDOR__VERSION__"
~ "FILE__LINE__";
void main(string[] args)
{
writeln("immutable(string[]) tokenValues = [");
foreach (s; opkwds)
{
if (s is null)
{
writeln("\tnull,");
continue;
}
auto n = opKwdValues.countUntil(s);
writeln("\topKwdValues[", n, " .. ", n + s.length, "], // ", s);
}
writeln("];");
}

26
main.d
View File

@ -97,12 +97,13 @@ int main(string[] args)
{
string[] importDirs;
bool sloc;
/+bool dotComplete;+/
/+bool json;+/
/+bool parenComplete;+/
bool dotComplete;
bool parenComplete;
bool highlight;
bool ctags;
bool recursiveCtags;
bool json;
bool declaration;
bool recursive;
bool format;
bool help;
bool tokenCount;
@ -112,8 +113,9 @@ int main(string[] args)
{
getopt(args, "I", &importDirs,/+ "dotComplete|d", &dotComplete,+/ "sloc|l", &sloc,
/+"json|j", &json,+/ /+"parenComplete|p", &parenComplete,+/ "highlight", &highlight,
"ctags|c", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help,
"tokenCount", &tokenCount, "frequencyCount", &frequencyCount);
"ctags|c", &ctags, "recursive|r|R", &recursive, "help|h", &help,
"tokenCount", &tokenCount, "frequencyCount", &frequencyCount,
"declaration|e", &declaration);
}
catch (Exception e)
{
@ -148,7 +150,10 @@ int main(string[] args)
config.fileName = arg;
uint count;
auto f = File(arg);
ubyte[] buffer = uninitializedArray!(ubyte[])(f.size);
import core.stdc.stdlib;
ubyte[] buffer = (cast(ubyte*)malloc(f.size))[0..f.size];
scope(exit) free(buffer.ptr);
//uninitializedArray!(ubyte[])(f.size);
foreach (t; byToken(f.rawRead(buffer), config))
{
if (tokenCount)
@ -210,9 +215,16 @@ options:
and methods available in the current scope that begin with the text
before the cursor position.
--declaration | -e [sourceFile] cursorPosition
Prints the absolute path to the file in which the symbol at the cursor
position was declared, as well as its line number.
--highlight [sourceFile] - Syntax-highlight the given source file. The
resulting HTML will be written to standard output.
--imports | -i [sourceFiles]
Prints modules imported by the given source file.
-I includePath
Include _includePath_ in the list of paths used to search for imports.
By default dscanner will search in the current working directory as

File diff suppressed because it is too large Load Diff

View File

@ -975,10 +975,10 @@ private:
void lexWhitespace(bool keep)()
{
current.type = TokenType.whitespace;
while (!isEoF() && isWhite())
do
{
nextChar();
}
}while (!isEoF() && isWhite());
static if (keep) setTokenValue();
}
@ -2010,15 +2010,18 @@ private:
return idx;
}
}
auto chunk = buffer[0..idx];
auto entity = cast(string)chunk in characterEntities;
if (entity is null)
//TODO: avoid looking up as UTF string, use raw bytes
string chunk = cast(string)buffer[0..idx];
auto names = assumeSorted(map!"a.name"(characterEntities));
auto place = names.lowerBound(chunk).length;
if (place == names.length || names[place] != chunk)
{
errorMessage("Invalid character entity \"&%s;\""
.format(cast(string) chunk));
return 1;
}
dest.put(cast(ubyte[]) (*entity)[0..$]);
auto entity = characterEntities[place].value;
dest.put(cast(ubyte[]) entity);
return entity.length;
default:
errorMessage("Invalid escape sequence");
@ -2099,7 +2102,7 @@ private:
if (ch >= '[' && ch <= '^') return true;
if (ch >= '{' && ch <= '~') return true;
if (ch == '`') return true;
if (isWhite()) return true; //TODO: test only long 'whites'
if ((ch & 0x80) && isLongWhite()) return true;
return false;
}
@ -2108,24 +2111,30 @@ private:
auto c = src.front;
if (c & 0x80) // multi-byte utf-8
{
//TODO: here and elsewhere we'd better have
// some kind of lookahead in LexSource instead of .save
auto r = src.save();
if (r.front != 0xe2)
return false;
else
r.popFront();
if (r.empty || r.front != 0x80)
return false;
else
r.popFront();
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
return false;
return true;
return isLongWhite();
}
else
return c == 0x20 || (c >= 0x09 && c <= 0x0d);
}
bool isLongWhite()
{
assert(src.front & 0x80); // only non-ascii
//TODO: here and elsewhere we'd better have
// some kind of lookahead in LexSource instead of .save
auto r = src.save();
if (r.front != 0xe2)
return false;
else
r.popFront();
if (r.empty || r.front != 0x80)
return false;
else
r.popFront();
if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
return false;
return true;
}
void errorMessage(string s)
{
@ -3050,40 +3059,33 @@ struct StringCache
if(isRandomAccessRange!R
&& is(Unqual!(ElementType!R) : const(ubyte)))
{
size_t bucket;
hash_t h;
string* val = find(range, bucket, h);
if (val !is null)
uint h = hash(range);
uint bucket = h % mapSize;
Slot *s = &index[bucket];
//1st slot not yet initialized?
if(s.value.ptr == null)
{
*s = Slot(putIntoCache(range), null, h);
return s.value;
}
Slot* insSlot = s;
for(;;)
{
return *val;
}
else
{
auto s = putIntoCache(range);
index[bucket] ~= s;
return s;
}
if(s.hash == h && s.value.equal(range))
return s.value;
insSlot = s;
s = s.next;
if(s == null) break;
}
string str = putIntoCache(range);
insertIntoSlot(insSlot, str, h);
return str;
}
private:
import core.stdc.string;
string* find(R)(R data, out size_t bucket, out hash_t h)
{
h = hash(data);
bucket = h % mapSize;
foreach (i; 0 .. index[bucket].length)
{
if (equal(index[bucket][i], data))
{
return &index[bucket][i];
}
}
return null;
}
static hash_t hash(R)(R data)
static uint hash(R)(R data)
{
uint hash = 0;
foreach (b; data)
@ -3095,28 +3097,54 @@ private:
}
enum mapSize = 2048;
string[][mapSize] index;
struct Slot
{
string value;
Slot* next;
uint hash;
};
void insertIntoSlot(Slot* tgt, string val, uint hash)
{
auto slice = allocateInCache(Slot.sizeof);
auto newSlot = cast(Slot*)slice.ptr;
*newSlot = Slot(val, null, hash);
tgt.next = newSlot;
}
Slot[mapSize] index;
// leave some slack for alloctors/GC meta-data
enum chunkSize = 16*1024 - size_t.sizeof*8;
ubyte*[] chunkS;
size_t next = chunkSize;
string putIntoCache(R)(R data)
{
import core.memory;
if(next + data.length > chunkSize)
ubyte[] allocateInCache(size_t size)
{
import core.memory;
if(next + size > chunkSize)
{
// avoid huge strings
if(data.length > chunkSize/4)
return (cast(char[])data).idup;
// avoid huge allocations
if(size> chunkSize/4)
{
ubyte* p = cast(ubyte*)GC.malloc(size,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
return p[0..size];
}
chunkS ~= cast(ubyte*)GC.malloc(chunkSize,
GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR);
next = 0;
}
auto slice = chunkS[$-1][next..next+data.length];
auto slice = chunkS[$-1][next..next+size];
next += size;
return slice;
}
string putIntoCache(R)(R data)
{
auto slice = allocateInCache(data.length);
slice[] = data[];
next += data.length;
return cast(string)slice;
}