checking in work on lexer and caching

This commit is contained in:
Hackerpilot 2013-01-14 15:08:24 -08:00
parent 9d6b96135e
commit c429199c1c
12 changed files with 707 additions and 138 deletions

View File

@ -75,7 +75,9 @@ list. See the documentation on the --dotComplete option for details
success k
# JSON output
Generates a JSON summary of the input file.
Generates a JSON summary of the input file. The JSON output produced complies
with a JSON schema included with the project under the "schemas" directory. (Note
that the schema is not yet complete)
### Example
The given D code:

View File

@ -1,2 +1,2 @@
#dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner #-inline
dmd *.d -g -m64 -w -wi -property -ofdscanner
dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline
#dmd *.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest

217
cache.d Normal file
View File

@ -0,0 +1,217 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module cache;
import etc.c.sqlite3;
import std.c.stdlib;
import std.datetime;
import std.file;
import std.uuid;
import std.array;
import std.string;
import std.conv;
import location;
import parser;
import types;
import tokenizer;
private sqlite3* database;
version (Posix)
{
private immutable char* DB_PATH = "~/.dscanner/cache.db";
}
else version (Windows)
{
pragma(msg, "Caching not supported on Windows yet");
immutable string DB_PATH = "";
}
private enum Queries : string
{
getUpdateTime = "select mtime from files where filepath = ?",
insertContainer = "insert into containers values ()",
deleteContainer = "delete from containers where fileId = ?",
deleteSymbol = "delete from symbols where containerId = ?",
deleteFile = "delete from files where path = ?",
getPublicImports = "select importedId from publicImports where importerId = ?",
getModuleId = "select id from files where path = ?",
getContainersByModule = "select id from containers where fileId = ?"
}
private sqlite3* getDatabase()
{
if (database !is null)
return database;
int status = sqlite3_open(DB_PATH, &database);
if (status != SQLITE_OK)
{
throw new Exception("Could not open %s: %s".format(DB_PATH,
sqlite3_errmsg(database)));
}
return database;
}
void closeDatabase()
{
if (database !is null)
{
sqlite3_close(database);
database = null;
}
}
private long getCachedModTime(sqlite3* db, sqlite3_stmt* statement, string filePath)
{
bindText(statement, 1, filePath);
if (sqlite3_step(statement) != SQLITE_ROW)
throw new Exception("%s".format(sqlite3_errmsg(db)));
return sqlite3_column_int64(statement, 1);
}
/**
* Updates the sqlite database with current autocomplete information for the
* given modules.
*/
void updateCache(string dirs[], string moduleNames[])
{
string[] filePaths;
foreach (moduleName; moduleNames)
{
string path = findAbsPath(dirs, moduleName);
if (path is null)
continue;
filePaths ~= path;
}
sqlite3* db = getDatabase();
sqlite3_stmt* statement;
scope(exit) { if (statement) sqlite3_finalize(statement); }
char* pzTail;
scope(exit) { if (pzTail) free(pzTail); }
sqlite3_prepare_v2(db, Queries.getUpdateTime.toStringz(),
cast(int) Queries.getUpdateTime.length + 1, &statement, &pzTail);
foreach (string filePath; filePaths)
{
immutable long mtime = getCachedModTime(db, statement, filePath);
SysTime timeLastModified = timeLastModified(filePath);
// if the times match, we don't need to update the cache.
if (timeLastModified.stdTime == mtime)
continue;
// re-parse the module
Module m = parseModule(tokenize(readText(filePath)));
updateCache(m);
sqlite3_reset(statement);
}
}
private void updateCache(const Module m)
in
{
assert(m !is null);
}
body
{
}
private string[] getImportedModules(string modulePath, sqlite3_stmt* statement = null)
{
auto app = appender!(string[])();
sqlite3* db = getDatabase();
bool statementAllocated = false;
scope(exit) { if (statementAllocated && statement !is null) sqlite3_finalize(statement); }
if (statement is null)
{
statementAllocated = true;
char* pzTail;
scope(exit) { if (pzTail) free(pzTail); }
sqlite3_prepare_v2(db, Queries.getPublicImports.toStringz(),
cast(int) Queries.getPublicImports.length + 1, &statement, &pzTail);
}
string moduleId = getModuleIdFromPath(modulePath);
bindText(statement, 1, moduleId);
while (sqlite3_step(statement) == SQLITE_ROW)
{
app.put(to!string(sqlite3_column_text(statement, 1)));
}
sqlite3_reset(statement);
foreach (string imported; app.data)
{
string[] r = getImportedModules(imported, statement);
}
return app.data;
}
private string getModuleIdFromPath(string filePath)
{
sqlite3* db = getDatabase();
sqlite3_stmt* statement;
char* pzTail;
scope(exit) if (pzTail) free(pzTail);
sqlite3_prepare_v2(db, Queries.getModuleId.toStringz(),
cast(int) Queries.getModuleId.length + 1, &statement,
&pzTail);
bindText(statement, 1, filePath);
if (sqlite3_step(statement) != SQLITE_ROW)
return null;
return to!string(sqlite3_column_text(statement, 1));
}
/**
* Returns: the container IDs of the containers that have
* been imported
*/
public string[] getContainersImported(string modulePath)
{
immutable string moduleId = getModuleIdFromPath(modulePath);
sqlite3* db = getDatabase();
sqlite3_stmt* statement;
char* pzTail;
scope(exit) if (pzTail) free(pzTail);
string[] moduleIds = getImportedModules(modulePath);
string[] containerIds;
foreach (string id; moduleIds)
{
containerIds ~= getContainersByModule(id);
}
return containerIds;
}
private string[] getContainersByModule(string moduleId)
{
sqlite3* db = getDatabase();
sqlite3_stmt* statement;
scope(exit) if (statement !is null) sqlite3_finalize(statement);
char* pzTail;
prepareStatement(db, statement, Queries.getContainersByModule);
bindText(statement, 1, moduleId);
string[] rVal;
while (sqlite3_step(statement) == SQLITE_ROW)
{
rVal ~= to!string(sqlite3_column_text(statement, 1));
}
return rVal;
}
private void prepareStatement(sqlite3* db, sqlite3_stmt* statement, string query)
{
char* pzTail;
scope(exit) if (pzTail) free(pzTail);
sqlite3_prepare_v2(db, query.toStringz(), cast(int) query.length + 1,
&statement, &pzTail);
}
private void bindText(sqlite3_stmt* statement, int argPos, string text)
{
sqlite3_bind_text(statement, argPos, text.toStringz(),
cast(int) text.length + 1, SQLITE_TRANSIENT);
}

145
circularbuffer.d Normal file
View File

@ -0,0 +1,145 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module circularbuffer;
import std.math;
import std.array;
import std.range;
struct CircularBuffer(T, R) if (isInputRange!(R) && is (ElementType!(R) == T))
{
public:
this (size_t size, R range)
{
this.range = range;
this.margin = size;
data = new T[(margin * 2) + 1];
if (range.empty())
{
_empty = true;
return;
}
for (size_t i = 0; i <= margin && !this.range.empty(); ++i)
{
data[i] = this.range.front();
this.range.popFront();
end++;
}
}
T opIndex(size_t index) const
in
{
assert (index <= sourceIndex + margin);
assert (index >= sourceIndex - margin);
}
body
{
return data[index % data.length];
}
T front() const @property
{
return data[index];
}
T peek(int offset)
in
{
assert(abs(offset) <= margin);
assert(sourceIndex + offset >= 0);
}
body
{
return data[(index + offset) % data.length];
}
T popFront()
in
{
assert (!_empty);
}
body
{
T v = data[index];
index = (index + 1) % data.length;
++sourceIndex;
if (range.empty())
{
if (index == end)
_empty = true;
}
else
{
data[end] = range.front();
end = (end + 1) % data.length;
range.popFront();
}
return v;
}
bool empty() const @property
{
return _empty;
}
private:
R range;
immutable size_t margin;
T[] data;
size_t sourceIndex;
size_t end;
size_t index;
bool _empty;
}
unittest
{
int[] items = [1, 2];
auto buf = CircularBuffer!(int, int[])(5, items);
auto result = array(buf);
assert(result == items);
}
unittest
{
int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
auto buf = CircularBuffer!(int, int[])(2, arr);
assert (buf.data.length == 5);
auto iterated = array(buf);
assert (iterated == arr);
}
unittest
{
int[] arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
auto buf = CircularBuffer!(int, int[])(2, arr);
buf.popFront();
buf.popFront();
buf.popFront();
buf.popFront();
assert (buf.front == 4);
assert (buf[2] == 2);
assert (buf[6] == 6);
}
unittest
{
int[] arr = [0, 1, 2, 3];
auto buf = CircularBuffer!(int, int[])(2, arr);
assert (buf.peek(0) == 0);
assert (buf.peek(1) == 1);
assert (buf.peek(2) == 2);
buf.popFront();
buf.popFront();
assert (buf.peek(-2) == 0);
assert (buf.peek(-1) == 1);
assert (buf.peek(0) == 2);
assert (buf.peek(1) == 3);
}

View File

@ -1,4 +1,3 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
@ -109,29 +108,4 @@ string generateCaseTrie(string[] args ...)
t.add(args[i], args[i+1]);
}
return printCaseStatements(t, "");
}
/**
* Returns: true if index points to end of inputString, false otherwise
*/
pure nothrow bool isEoF(S)(S inputString, size_t index)
{
// note: EoF is determined according to D specification
return index >= inputString.length
|| inputString[index] == Character.NUL
|| inputString[index] == Character.SUB;
}
private:
// Unicode character literals
enum Character
{
// End of file (EoF)
NUL = '\u0000', // NUL character
SUB = '\u001A', // Substitute character
// Line feed (EoL)
CR = '\u000D', // CR character
LF = '\u000A', // LF character
}

4
createTable.sql Normal file
View File

@ -0,0 +1,4 @@
create table files (path, mtime, id);
create table publicImports (importerId, importedId);
create table containers (name, protection, fileId, id);
create table symbols (name, type, kind, containerId, id);

45
location.d Normal file
View File

@ -0,0 +1,45 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
module location;
import std.string;
import std.path;
import std.file;
import std.stdio;
import std.array;
/**
* Returns: the absolute path of the given module, or null if it could not be
* found.
*/
string findAbsPath(string[] dirs, string moduleName)
{
// For file names
if (endsWith(moduleName, ".d") || endsWith(moduleName, ".di"))
{
if (isAbsolute(moduleName))
return moduleName;
else
return buildPath(getcwd(), moduleName);
}
// Try to find the file name from a module name like "std.stdio"
foreach(dir; dirs)
{
string fileLocation = buildPath(dir, replace(moduleName, ".", dirSeparator));
string dfile = fileLocation ~ ".d";
if (exists(dfile) && isFile(dfile))
{
return dfile;
}
if (exists(fileLocation ~ ".di") && isFile(fileLocation ~ ".di"))
{
return fileLocation ~ ".di";
}
}
stderr.writeln("Could not locate import ", moduleName, " in ", dirs);
return null;
}

55
main.d
View File

@ -1,4 +1,3 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
@ -7,21 +6,22 @@
module main;
import std.file;
import std.stdio;
import std.algorithm;
import std.conv;
import std.array;
import std.path;
import std.regex;
import std.conv;
import std.file;
import std.getopt;
import std.parallelism;
import types;
import tokenizer;
import parser;
import langutils;
import std.path;
import std.regex;
import std.stdio;
import autocomplete;
import highlighter;
import langutils;
import location;
import parser;
import tokenizer;
import types;
pure bool isLineOfCode(TokenType t)
{
@ -74,38 +74,6 @@ else
}
}
/**
* Returns: the absolute path of the given module, or null if it could not be
* found.
*/
string findAbsPath(string[] dirs, string moduleName)
{
// For file names
if (endsWith(moduleName, ".d") || endsWith(moduleName, ".di"))
{
if (isAbsolute(moduleName))
return moduleName;
else
return buildPath(getcwd(), moduleName);
}
// Try to find the file name from a module name like "std.stdio"
foreach(dir; dirs)
{
string fileLocation = buildPath(dir, replace(moduleName, ".", dirSeparator));
string dfile = fileLocation ~ ".d";
if (exists(dfile) && isFile(dfile))
{
return dfile;
}
if (exists(fileLocation ~ ".di") && isFile(fileLocation ~ ".di"))
{
return fileLocation ~ ".di";
}
}
stderr.writeln("Could not locate import ", moduleName, " in ", dirs);
return null;
}
string[] loadConfig()
{
@ -152,7 +120,8 @@ int main(string[] args)
stderr.writeln(e.msg);
}
if (help)
if (help || (!sloc && !dotComplete && !json && !parenComplete && !highlight
&& !ctags && !format))
{
printHelp();
return 0;

View File

@ -265,7 +265,11 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
{
switch(tokens[index].type)
{
case TokenType.Else:
case TokenType.Pragma:
++index;
if (tokens[index] == TokenType.LParen)
skipParens(tokens, index);
break;
case TokenType.Mixin:
case TokenType.Assert:
++index;

View File

@ -1 +1,93 @@
{
"description" : "JSON schema for Dscanner's output",
"properties" : {
"name" : {
"required" : true,
"type" : "string"
},
"imports" : {
"type" : "array",
"items" : {
"type" : "string"
},
"required" : true
},
"interfaces" : {
"type": "array",
"required": true,
"items": {
}
},
"functions" : {
"type": "array",
"required" : true,
"items" : {
"properties" : {
"name": {
"type" : "string",
"required" : true
},
"line" : {
"type" : "integer",
"required" : true,
"minimum" : 1
},
"protection" : {
"type" : "string",
"enum" : [
"private",
"public",
"protected",
"package",
"export",
""
],
"required" : true
},
"attributes" : {
"type" : "array",
"items" : { "type" : "string" },
"required" : true
},
"constraint" : {
"type" : "string",
"required" : true
},
"templateParameters" : {
"type" : "array",
"items" : { "type" : "string" },
"required" : true
},
"parameters" : {
"type" : "array",
"required" : true,
"items" : {
"properties" : {
"name": {
"type" : "string",
"required" : true
},
"line" : {
"type" : "integer",
"required" : true,
"minimum" : 1
},
"attributes" : {
"type" : "array",
"items" : { "type" : "string" },
"required" : true
}
}
}
},
"returnType" : {
"type" : "string",
"required" : true
}
}
}
}
}
}

View File

@ -17,54 +17,70 @@ import std.stdio;
import langutils;
import codegen;
/**
* Increments endIndex until it indexes a non-whitespace character in
* inputString.
* Params:
* inputString = the source code to examine
* endIndex = an index into inputString
* lineNumber = the line number that corresponds to endIndex
* style = the code iteration style
* Returns: The whitespace, or null if style was CODE_ONLY
*/
pure nothrow string lexWhitespace(S)(S inputString, ref size_t endIndex,
ref uint lineNumber)
if (isSomeString!S)
pure bool isNewline(R)(R range)
{
immutable startIndex = endIndex;
while (!isEoF(inputString, endIndex) && isWhite(inputString[endIndex]))
return range.front == '\n' || range.front == '\r';
}
pure bool isEoF(R)(R range)
{
return range.empty || range.front == 0 || range.front == 0x1a;
}
char[] popNewline(R)(ref R range)
{
if (inputString[endIndex] == '\n')
lineNumber++;
++endIndex;
char[] chars;
if (range.front == '\r')
{
chars ~= range.front;
range.popFront();
}
return inputString[startIndex .. endIndex];
if (range.front == '\n')
{
chars ~= range.front;
range.popFront();
}
return chars;
}
unittest
{
auto s = "\r\ntest";
assert (popNewline(s) == "\r\n");
assert (s == "test");
}
/**
* If inputString starts with #!, increments endIndex until it indexes the next line.
* Params:
* inputString = the source code to examine
* endIndex = an index into inputString
* lineNumber = the line number that corresponds to endIndex
* Returns: The script line, or null if this inputString doesn't start from script line
* Returns:
*/
pure nothrow string lexScriptLine(S)(ref S inputString, ref size_t endIndex,
ref uint lineNumber) if (isSomeString!S)
string lexWhitespace(R)(ref R range, ref uint lineNumber)
{
auto startIndex = endIndex; // in current implementation endIndex is 0, but that could change (e.g., if BOM is not stripped from inputString)
string result = null;
if(inputString.length > 1 && inputString[0..2] == "#!") // safety check
auto app = appender!(char[])();
while (!isEoF(range) && isWhite(range.front))
{
endIndex = 2; // skip #!
while (!isEoF(inputString, endIndex) && inputString[endIndex] != '\n')
++endIndex;
result = inputString[startIndex..endIndex];
if (isNewline(range))
{
++lineNumber;
app.put(popNewline(range));
}
return result;
else
{
app.put(range.front);
range.popFront();
}
}
return to!string(app.data);
}
unittest
{
import std.stdio;
uint lineNum = 1;
auto chars = " \n \r\n \tabcde";
auto r = lexWhitespace(chars, lineNum);
assert (r == " \n \r\n \t");
assert (chars == "abcde");
assert (lineNum == 3);
}
/**
@ -76,52 +92,122 @@ pure nothrow string lexScriptLine(S)(ref S inputString, ref size_t endIndex,
* lineNumber = the line number that corresponds to endIndex
* Returns: The comment
*/
pure nothrow string lexComment(S)(ref S inputString, ref size_t endIndex,
ref uint lineNumber) if (isSomeString!S)
string lexComment(R)(ref R input, ref uint lineNumber)
in
{
if (isEoF(inputString, endIndex))
return "";
auto startIndex = endIndex - 1;
switch(inputString[endIndex])
assert (input.front == '/');
}
body
{
auto app = appender!(char[])();
app.put(input.front);
input.popFront();
switch(input.front)
{
case '/':
while (!isEoF(inputString, endIndex) && inputString[endIndex] != '\n')
while (!isEoF(input) && !isNewline(input))
{
if (inputString[endIndex] == '\n')
++lineNumber;
++endIndex;
app.put(input.front);
input.popFront();
}
break;
case '*':
while (!isEoF(inputString, endIndex)
&& !inputString[endIndex..$].startsWith("*/"))
while (!isEoF(input))
{
if (inputString[endIndex] == '\n')
if (isNewline(input))
{
app.put(popNewline(input));
++lineNumber;
++endIndex;
}
endIndex += 2;
else if (input.front == '*')
{
app.put(input.front);
input.popFront();
if (input.front == '/')
{
app.put(input.front);
input.popFront();
break;
}
}
else
{
app.put(input.front);
input.popFront();
}
}
break;
case '+':
++endIndex;
int depth = 1;
while (depth > 0 && !isEoF(inputString, endIndex))
while (depth > 0 && !isEoF(input))
{
if (inputString[endIndex] == '\n')
if (isNewline(input))
{
app.put(popNewline(input));
lineNumber++;
else if (inputString[endIndex..$].startsWith("+/"))
depth--;
else if (inputString[endIndex..$].startsWith("/+"))
depth++;
++endIndex;
}
if (!isEoF(inputString, endIndex))
++endIndex;
else if (input.front == '+')
{
app.put(input.front);
input.popFront();
if (input.front == '/')
{
app.put(input.front);
input.popFront();
--depth;
}
}
else if (input.front == '/')
{
app.put(input.front);
input.popFront();
if (input.front == '+')
{
app.put(input.front);
input.popFront();
++depth;
}
}
else
{
app.put(input.front);
input.popFront();
}
}
break;
default:
break;
}
return inputString[startIndex..endIndex];
return to!string(app.data);
}
unittest
{
uint lineNumber = 1;
auto chars = "//this is a comment\r\nthis is not";
auto comment = lexComment(chars, lineNumber);
assert (chars == "\r\nthis is not");
assert (comment == "//this is a comment");
}
unittest
{
uint lineNumber = 1;
auto chars = "/* this is a\n\tcomment\r\n */this is not";
auto comment = lexComment(chars, lineNumber);
assert (chars == "this is not");
assert (comment == "/* this is a\n\tcomment\r\n */");
assert (lineNumber == 3);
}
unittest
{
uint lineNumber = 1;
auto chars = "/+this is a /+c/+omm+/ent+/ \r\nthis+/ is not";
auto comment = lexComment(chars, lineNumber);
assert (chars == " is not");
assert (comment == "/+this is a /+c/+omm+/ent+/ \r\nthis+/");
assert (lineNumber == 2);
}
@ -233,6 +319,9 @@ string lexDelimitedString(S)(ref S inputString, ref size_t endIndex,
}
/**
* TODO: Fix this
*/
string lexTokenString(S)(ref S inputString, ref size_t endIndex, ref uint lineNumber)
{
/+auto r = byDToken(range, IterationStyle.EVERYTHING);
@ -562,6 +651,23 @@ nothrow void lexHex(S)(ref S inputString, ref size_t startIndex,
token.value = inputString[startIndex .. endIndex];
}
unittest
{
Token t;
size_t start, end;
start = 0;
end = 2;
lexHex!string("0x193abfq", start, end, t);
assert(t.value == "0x193abf", t.value);
assert(t.type == TokenType.IntLiteral);
start = 0;
end = 2;
lexHex!string("0x2130xabc", start, end, t);
assert(t.value == "0x2130");
assert(t.type == TokenType.IntLiteral);
}
/**
* Returns: true if ch marks the ending of one token and the beginning of
@ -595,6 +701,19 @@ enum IterationStyle
EVERYTHING
}
struct TokenRange(R) if (isInputRange(R))
{
bool empty() const @property
{
return _empty;
}
private:
R range;
bool _empty;
}
Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyle.CODE_ONLY)
if (isSomeString!S)
{
@ -638,7 +757,6 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
outerSwitch: switch(inputString[endIndex])
{
// TODO: Re-enable code generator when DMD bug 7900 is fixed
mixin(generateCaseTrie(
"=", "TokenType.Assign",
"&", "TokenType.BitAnd",

View File

@ -1,4 +1,3 @@
// Copyright Brian Schott (Sir Alaran) 2012.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at