Updated to include new special tokens

This commit is contained in:
Hackerpilot 2013-05-05 12:45:25 -07:00
commit 548996d1e5
2 changed files with 35 additions and 326 deletions

303
README.md
View File

@ -1,303 +0,0 @@
# Overview
Dscanner is a tool used to analyze D source code.
### Options
* **--dotComplete** [_sourceFile_] _cursorPosition_ - Provide autocompletion for the
insertion of the dot operator. The cursor position is the character position in
the **file**, not the position in the line. If no file is specified the file is read from stdin.
* **--sloc** [_sourceFiles_] - count the number of logical lines of code in the given
source files. If no files are specified, a file is read from stdin.
* **--json** [_sourceFile_] - Generate a JSON summary of the given source file. If no file is specifed, the file is read from stdin.
* **--parenComplete** [_sourceFile_] _cursorPosition_ - Provides a listing of function
parameters or pre-defined version identifiers at the cursor position. The cursor
position is the character position in the **file**, not the line. If no file is specified, the contents are read from stdin.
* **--highlight** [_sourceFile_] - Syntax-highlight the given source file. The
resulting HTML will be written to standard output.
* **-I** _includePath_ - Include _includePath_ in the list of paths used to search
for imports. By default dscanner will search in the current working directory as
well as any paths specified in /etc/dmd.conf. This is only used for the
--parenComplete and --dotComplete options. If no file is specified, the file is read from stdin.
* **--ctags** _sourceFile_ - Generates ctags information from the given source
code file. Note that ctags information requires a filename, so stdin cannot be used in place of a filename.
* **--recursive** **-R** **-r** _directory_ - When used with --ctags, dscanner
will produce ctags output for all .d and .di files contained within _directory_
and its sub-directories.
# Dot Completion
This is currently under development.
### Output format
The output of the --dotComplete option is a list of valid completions at the
given cursor position. The completions are printed one per line. Lines are ended
by a single line feed character (0x0a). Each line consists of the symbol name
followed by a single space character (0x20), followed by one character indicating
what the symbol is. Symbol definitions are taken from the list of recommended
"kind" values from the CTAGS standard unless there was no relevant recommendaton
present.
##### Example output:
foo v
bar f
##### Supported kinds
* c -- class names
* i -- interface names
* s -- structure names
* v -- variable
* m -- member variable
* k -- keyword, built-in version, scope statement
* f -- function or method
* g -- enum name
* P -- package
* M -- module
# Paren Completion
Provides either a call tip for a function call or a list of pre-defined version
identifiers for a version() statement, or a list of scope identifiers for a
scope() statement. Anyone integrating dscanner into a text editor needs to look
at the first line of the output to determine whether to display an autocomplete
list or a call tip. (In the case of Scintilla, these are different)
### Call tips
Outputs the word "calltips" followed by a newline, followed by the call tips for
the function before the cursor. One overload of the function is printed per
line. The call tip may have newlines and tabs escaped in the common "\n" and
"\t" format. These should be un-escaped for display.
##### Example output
calltips
Token[] tokenize(S inputString,\n\tIterationStyle iterationStyle)
### Completions
Outputs the word "completions" followed by a newline, followed by a completion
list. See the documentation on the --dotComplete option for details
##### Example output
completions
exit k
failure k
success k
# JSON output
Generates a JSON summary of the input file. The JSON output produced complies
with a JSON schema included with the project under the "schemas" directory. (Note
that the schema is not yet complete)
### Example
The given D code:
module example;
import std.stdio;
interface Iface {
double interfaceMethod();
}
class SomeClass(T) if (isSomeString!T) : IFace {
public:
this() {}
void doStuff(T);
override double interfaceMethod() {}
private:
T theTee;
}
int freeFunction(int x) { return x + x; }
void main(string[] args) {
}
is transformed into the following JSON markup:
{
"name" : "example",
"imports" : [
"std.stdio"
],
"interfaces" : [
{
"name" : "Iface",
"line" : 5,
"protection" : "public",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"functions" : [
{
"name" : "interfaceMethod",
"line" : 6,
"protection" : "",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
],
"returnType" : "double"
}
],
"variables" : [
],
"baseClasses" : [
]
}
],
"classes" : [
{
"name" : "SomeClass",
"line" : 9,
"protection" : "public",
"attributes" : [
],
"constraint" : "if (isSomeString!T)",
"templateParameters" : [
"T"
],
"functions" : [
{
"name" : "this",
"line" : 11,
"protection" : "",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
],
"returnType" : ""
},
{
"name" : "doStuff",
"line" : 12,
"protection" : "",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
{
"name" : "",
"line" : 0,
"protection" : "",
"attributes" : [
],
"type" : "T"
}
],
"returnType" : "void"
},
{
"name" : "interfaceMethod",
"line" : 13,
"protection" : "",
"attributes" : [
"override"
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
],
"returnType" : "double"
}
],
"variables" : [
{
"name" : "theTee",
"line" : 15,
"protection" : "private",
"attributes" : [
],
"type" : "T"
}
],
"baseClasses" : [
"IFace"
]
}
],
"structs" : [
],
"structs" : [
],
"functions" : [
{
"name" : "freeFunction",
"line" : 18,
"protection" : "",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
{
"name" : "x",
"line" : 18,
"protection" : "",
"attributes" : [
],
"type" : "int"
}
],
"returnType" : "int"
},
{
"name" : "main",
"line" : 20,
"protection" : "",
"attributes" : [
],
"constraint" : "",
"templateParameters" : [
],
"parameters" : [
{
"name" : "args",
"line" : 20,
"protection" : "",
"attributes" : [
],
"type" : "string[]"
}
],
"returnType" : "void"
}
],
"variables" : [
],
"enums" : [
]
}
# Ctags output
Dscanner can create a tags file from the specified file. Output is formatted as
specified at http://ctags.sourceforge.net/FORMAT. The result of generating ctags
on the same file used in the JSON example will produce this output:
!_TAG_FILE_FORMAT 2
!_TAG_FILE_SORTED 1
!_TAG_PROGRAM_URL https://github.com/Hackerpilot/Dscanner/
Iface tmp.d 3;" c inherits:
SomeClass tmp.d 7;" c inherits:IFace
doStuff tmp.d 10;" f arity:1 struct:SomeClass
freeFunction tmp.d 16;" f arity:1
interfaceMethod tmp.d 11;" f arity:0 struct:SomeClass
interfaceMethod tmp.d 4;" f arity:0 struct:Iface
main tmp.d 18;" f arity:1
theTee tmp.d 13;" m struct:SomeClass
this tmp.d 9;" f arity:0 struct:SomeClass
# Line of Code count
This option counts the logical lines of code in the given source files, not
simply the physical lines. More specifically, it counts the number of
semicolons, **if**, **while**, **case**, **foreach**, and **for** tokens in the
given files.
# Highlighting
Syntax highlights the given file in HTML format. Output is written to _stdout_.
The CSS styling information is currently hard-coded.

View File

@ -12,7 +12,7 @@
* ---
* LexerConfig config;
* config.iterStyle = IterationStyle.everything;
* config.tokenStyle = IterationStyle.source;
* config.tokenStyle = TokenStyle.source;
* config.versionNumber = 2061;
* config.vendorString = "Lexer Example";
* ---
@ -592,7 +592,7 @@ L_advance:
current.value = getTokenValue(current.type);
if (current.value is null)
setTokenValue();
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.specialEof)
{
_empty = true;
return;
@ -1775,17 +1775,17 @@ L_advance:
{
switch (current.type)
{
case TokenType.date:
case TokenType.specialDate:
current.type = TokenType.stringLiteral;
auto time = Clock.currTime();
current.value = format("%s %02d %04d", time.month, time.day, time.year);
return;
case TokenType.time:
case TokenType.specialTime:
auto time = Clock.currTime();
current.type = TokenType.stringLiteral;
current.value = (cast(TimeOfDay)(time)).toISOExtString();
return;
case TokenType.timestamp:
case TokenType.specialTimestamp:
auto time = Clock.currTime();
auto dt = cast(DateTime) time;
current.type = TokenType.stringLiteral;
@ -1793,19 +1793,19 @@ L_advance:
dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
dt.second, dt.year);
return;
case TokenType.vendor:
case TokenType.specialVendor:
current.type = TokenType.stringLiteral;
current.value = config.vendorString;
return;
case TokenType.compilerVersion:
case TokenType.specialVersion:
current.type = TokenType.stringLiteral;
current.value = format("%d", config.versionNumber);
return;
case TokenType.line:
case TokenType.specialLine:
current.type = TokenType.intLiteral;
current.value = format("%d", current.line);
return;
case TokenType.file:
case TokenType.specialFile:
current.type = TokenType.stringLiteral;
current.value = config.fileName;
return;
@ -1889,7 +1889,7 @@ pure nothrow bool isBasicType(const TokenType t)
*/
pure nothrow bool isBasicType(ref const Token t)
{
return isType(t.type);
return isBasicType(t.type);
}
/**
@ -1929,7 +1929,7 @@ pure nothrow bool isProtection(ref const Token t)
*/
pure nothrow bool isConstant(const TokenType t)
{
return t >= TokenType.date && t <= TokenType.traits;
return t >= TokenType.specialDate && t <= TokenType.traits;
}
/**
@ -2183,7 +2183,7 @@ enum TokenType: ushort
specialDate, /// ___DATE__
specialEof, /// ___EOF__
specialTime, /// ___TIME__
specialimestamp, /// ___TIMESTAMP__
specialTimestamp, /// ___TIMESTAMP__
specialVendor, /// ___VENDOR__
specialVersion, /// ___VERSION__
specialFile, /// $(D_KEYWORD ___FILE__)
@ -2681,6 +2681,9 @@ immutable(string[TokenType.max + 1]) tokenValues = [
"__VERSION__",
"__FILE__",
"__LINE__",
"__MODULE__",
"__FUNCTION__",
"__PRETTY_FUNCTION",
null,
null,
null,
@ -2845,7 +2848,7 @@ pure TokenType lookupTokenType(R)(R input)
case 7:
switch (input[0])
{
case '_': if (input[1..$].equal("_EOF__")) return TokenType.eof; else break;
case '_': if (input[1..$].equal("_EOF__")) return TokenType.specialEof; else break;
case 'c': if (input[1..$].equal("double")) return TokenType.cdouble_; else break;
case 'd': if (input[1..$].equal("efault")) return TokenType.default_; else break;
case 'f': if (input[1..$].equal("inally")) return TokenType.finally_;
@ -2862,10 +2865,10 @@ pure TokenType lookupTokenType(R)(R input)
case 8:
switch (input[0])
{
case '_': if (input[1..$].equal("_DATE__")) return TokenType.date;
else if (input[1..$].equal("_FILE__")) return TokenType.file;
else if (input[1..$].equal("_LINE__")) return TokenType.line;
else if (input[1..$].equal("_TIME__")) return TokenType.time;
case '_': if (input[1..$].equal("_DATE__")) return TokenType.specialDate;
else if (input[1..$].equal("_FILE__")) return TokenType.specialFile;
else if (input[1..$].equal("_LINE__")) return TokenType.specialLine;
else if (input[1..$].equal("_TIME__")) return TokenType.specialTime;
else if (input[1..$].equal("_traits")) return TokenType.traits; else break;
case 'a': if (input[1..$].equal("bstract")) return TokenType.abstract_; else break;
case 'c': if (input[1..$].equal("ontinue")) return TokenType.continue_; else break;
@ -2893,26 +2896,35 @@ pure TokenType lookupTokenType(R)(R input)
switch (input[0])
{
case 'd': if (input[1..$].equal("eprecated")) return TokenType.deprecated_; else break;
case '_': if (input[1..$].equal("_VENDOR__")) return TokenType.vendor; else break;
case '_':
if (input[1..$].equal("_VENDOR__")) return TokenType.specialVendor;
else if (input[1..$].equal("_MODULE__")) return TokenType.specialModule; else break;
default: break;
}
break;
case 11:
if (input[1..$].equal("_VERSION__"))
return TokenType.compilerVersion;
return TokenType.specialVersion;
break;
case 12:
if (input[1..$].equal("ynchronized"))
return TokenType.synchronized_;
break;
switch (input[0])
{
case 's': if (input[1..$].equal("ynchronized")) return TokenType.synchronized_; else break;
case '_': if (input[1..$].equal("_FUNCTION__")) return TokenType.specialFunction; else break;
default: break;
}
case 13:
if (input[1..$].equal("_TIMESTAMP__"))
return TokenType.timestamp;
return TokenType.specialTimestamp;
break;
case 15:
if (input[1..$].equal("oreach_reverse"))
return TokenType.foreach_reverse_;
break;
case 19:
if (input[1..$].equal("_PRETTY_FUNCTION__"))
return TokenType.specialPrettyFunction;
break;
default: break;
}
return TokenType.identifier;