From 119c6b917d405385a97040dda3c8294a64c249f3 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Wed, 20 Feb 2013 22:36:55 -0800 Subject: [PATCH] code cleanup --- createTable.sql | 4 +- main.d | 209 +-- std/d/lexer.d | 3971 +++++++++++++++++++++++------------------------ 3 files changed, 2046 insertions(+), 2138 deletions(-) diff --git a/createTable.sql b/createTable.sql index ea56287..7b65c55 100644 --- a/createTable.sql +++ b/createTable.sql @@ -1,4 +1,4 @@ -create table files (path, mtime, id); +create table modules (path, mtime, id); create table publicImports (importerId, importedId); -create table containers (name, protection, fileId, id); +create table containers (name, protection, moduleId, id); create table symbols (name, type, kind, containerId, id); diff --git a/main.d b/main.d index 7df99cf..bc4776a 100644 --- a/main.d +++ b/main.d @@ -110,9 +110,9 @@ int main(string[] args) try { - getopt(args, "I", &importDirs,/+ "dotComplete", &dotComplete,+/ "sloc", &sloc, - /+"json", &json,+/ /+"parenComplete", &parenComplete,+/ "highlight", &highlight, - "ctags", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help, + getopt(args, "I", &importDirs,/+ "dotComplete|d", &dotComplete,+/ "sloc|l", &sloc, + /+"json|j", &json,+/ /+"parenComplete|p", &parenComplete,+/ "highlight", &highlight, + "ctags|c", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help, "tokenCount", &tokenCount, "frequencyCount", &frequencyCount); } catch (Exception e) @@ -120,188 +120,96 @@ int main(string[] args) stderr.writeln(e.msg); } - if (help || (!sloc && /+!dotComplete &&+/ /+!json &&+/ /+!parenComplete &&+/ !highlight - && !ctags && !format && !tokenCount && !frequencyCount)) - { - printHelp(); - return 0; - } - - importDirs ~= loadConfig(); - - if (tokenCount) - { - import core.memory; - /+if (args.length == 1) - { - writeln((cast (ubyte[]) stdin.byLine(KeepTerminator.yes).join()).byToken().walkLength()); - } - else - {+/ - LexerConfig config; - config.tokenStyle = TokenStyle.doNotReplaceSpecial; - foreach (arg; args[1..$]) - { - config.fileName = arg; - uint count; - auto f = File(arg); - ubyte[] buffer = uninitializedArray!(ubyte[])(f.size); - foreach(t; byToken(f.rawRead(buffer), config)) - { - ++count; - } - writefln("%s: %d", arg, count); - } - /+}+/ - } - /+else if (frequencyCount) + if (help) { - uint[TokenType] frequency; - foreach (t; byToken(cast(ubyte[]) File(args[1]).byLine(KeepTerminator.yes).join())) - { - frequency[t.type]++; - } - foreach (k, v; frequency) - { - writeln(v, ":", cast(TokenType) k); - } - }+/ + printHelp(args[0]); + return 0; + } - /+if (sloc) + auto optionCount = count!"a"([sloc, highlight, ctags, json, tokenCount]); + if (optionCount > 1) + { + stderr.writeln("Too many options specified"); + return 1; + } + else if (optionCount < 1) + { + printHelp(args[0]); + return 1; + } + + if (tokenCount || sloc) { - if (args.length == 1) + LexerConfig config; + config.tokenStyle = TokenStyle.doNotReplaceSpecial; + ulong[] counts = new ulong[args.length - 1]; + foreach (i, arg; parallel(args[1..$])) { - writeln(stdin.byLine(KeepTerminator.yes).join().byToken().count!(a => isLineOfCode(a.type))()); - } - else - { - writeln(args[1..$].map!(a => File(a).byLine(KeepTerminator.yes).join().byToken(a))() - .joiner().count!(a => isLineOfCode(a.type))()); - } - return 0; - }+/ + config.fileName = arg; + uint count; + auto f = File(arg); + ubyte[] buffer = uninitializedArray!(ubyte[])(f.size); + foreach (t; byToken(f.rawRead(buffer), config)) + { + if (tokenCount) + ++counts[i]; + else if (isLineOfCode(t.type)) + ++counts[i]; + } - if (highlight) + } + foreach(i; 0 .. counts.length) + writefln("%s: %d", args[i + 1], counts[i]); + } + else if (highlight) { LexerConfig config; config.iterStyle = IterationStyle.everything; config.tokenStyle = TokenStyle.source; File f = args.length == 1 ? stdin : File(args[1]); - highlighter.highlight((cast(ubyte[]) f.byLine(KeepTerminator.yes).join()).byToken(config), + ubyte[] buffer = uninitializedArray!(ubyte[])(f.size); + highlighter.highlight(byToken(f.rawRead(buffer), config), args.length == 1 ? "stdin" : args[1]); return 0; } - /+if (dotComplete || parenComplete) - { - if (isAbsolute(args[1])) - importDirs ~= dirName(args[1]); - else - importDirs ~= getcwd(); - Token[] tokens; - try - { - to!size_t(args[1]); - auto f = appender!string(); - char[] buf; - while (stdin.readln(buf)) - f.put(buf); - tokens = f.data.byToken().array(); - } - catch(ConvException e) - { - tokens = args[1].readText().byToken().array(); - args.popFront(); - } - auto mod = parseModule(tokens); - CompletionContext context = new CompletionContext(mod); - context.importDirectories = importDirs; - foreach (im; parallel(mod.imports)) - { - auto p = findAbsPath(importDirs, im); - if (p is null || !p.exists()) - continue; - context.addModule(p.readText().byToken().array().parseModule()); - } - auto complete = AutoComplete(tokens, context); - if (parenComplete) - writeln(complete.parenComplete(to!size_t(args[1]))); - else if (dotComplete) - writeln(complete.dotComplete(to!size_t(args[1]))); - return 0; - }+/ - - /+if (json) - { - CircularBuffer!(Token) tokens; - File f = args.length == 1 ? stdin : File(args[1]); - tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, - f.byLine(KeepTerminator.yes).join().byToken!(char[])()); - auto mod = parseModule(tokens); - mod.writeJSONTo(stdout); - return 0; - }+/ - -// if (ctags) -// { -// if (!recursiveCtags) -// { -// auto tokens = byToken(readText(args[1])); -// auto mod = parseModule(tokens.array()); -// mod.writeCtagsTo(stdout, args[1]); -// } -// else -// { -// Module m; -// foreach (dirEntry; dirEntries(args[1], SpanMode.breadth)) -// { -// if (!dirEntry.name.endsWith(".d", ".di")) -// continue; -// stderr.writeln("Generating tags for ", dirEntry.name); -// auto tokens = byToken(readText(dirEntry.name)); -// if (m is null) -// m = parseModule(tokens.array()); -// else -// { -// auto mod = parseModule(tokens.array()); -// m.merge(mod); -// } -// } -// m.writeCtagsTo(stdout, ""); -// } -// } return 0; } -void printHelp() +void printHelp(string programName) { - writeln( -q{ - Usage: dscanner options + writefln( +` + Usage: %s options options: --help | -h Prints this help message - --sloc [sourceFiles] + --sloc | -l [sourceFiles] count the number of logical lines of code in the given source files. If no files are specified, a file is read from stdin. - --json [sourceFile] + --json | -j [sourceFile] Generate a JSON summary of the given source file. If no file is specifed, the file is read from stdin. - --dotComplete [sourceFile] cursorPosition + --dotComplete | -d [sourceFile] cursorPosition Provide autocompletion for the insertion of the dot operator. The cursor position is the character position in the *file*, not the position in the line. If no file is specified, the file is read from stdin. - --parenComplete [sourceFile] cursorPosition - Provides a listing of function parameters or pre-defined version + --parenComplete | -p [sourceFile] cursorPosition + Provide a listing of function parameters or pre-defined version identifiers at the cursor position. The cursor position is the character position in the *file*, not the line. If no file is specified, the contents are read from stdin. + --symbolComplete | -s [sourceFile] cursorPosition + Provide a listing of classes, structs, interfaces, variables, functions, + and methods available in the current scope that begin with the text + before the cursor position. + --highlight [sourceFile] - Syntax-highlight the given source file. The resulting HTML will be written to standard output. @@ -311,12 +219,13 @@ options: well as any paths specified in /etc/dmd.conf. This is only used for the --parenComplete and --dotComplete options. - --ctags sourceFile + --ctags | -c sourceFile Generates ctags information from the given source code file. Note that ctags information requires a filename, so stdin cannot be used in place of a filename. --recursive | -R | -r directory When used with --ctags, dscanner will produce ctags output for all .d - and .di files contained within directory and its sub-directories.}); + and .di files contained within directory and its sub-directories.`, + programName); } diff --git a/std/d/lexer.d b/std/d/lexer.d index d2b722f..8aeb9aa 100644 --- a/std/d/lexer.d +++ b/std/d/lexer.d @@ -129,62 +129,62 @@ public: */ struct Token { - /** - * The token type. - */ - TokenType type; + /** + * The token type. + */ + TokenType type; - /** - * The representation of the token in the original source code. - */ - string value; + /** + * The representation of the token in the original source code. + */ + string value; - /** - * The number of the line the token is on. - */ - uint line; + /** + * The number of the line the token is on. + */ + uint line; - /** - * The column number of the start of the token in the original source. - * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN) - */ - uint column; + /** + * The column number of the start of the token in the original source. + * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN) + */ + uint column; - /** - * The index of the start of the token in the original source. - * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN) - */ - size_t startIndex; + /** + * The index of the start of the token in the original source. + * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN) + */ + size_t startIndex; - /** - * Check to see if the token is of the same type and has the same string - * representation as the given token. - */ - bool opEquals(ref const(Token) other) const - { - return other.type == type && other.value == value; - } + /** + * Check to see if the token is of the same type and has the same string + * representation as the given token. + */ + bool opEquals(ref const(Token) other) const + { + return other.type == type && other.value == value; + } - /** - * Checks to see if the token's string representation is equal to the given - * string. - */ - bool opEquals(string value) const { return this.value == value; } + /** + * Checks to see if the token's string representation is equal to the given + * string. + */ + bool opEquals(string value) const { return this.value == value; } - /** - * Checks to see if the token is of the given type. - */ - bool opEquals(TokenType type) const { return type == type; } + /** + * Checks to see if the token is of the given type. + */ + bool opEquals(TokenType type) const { return type == type; } - /** - * Comparison operator orders tokens by start index. - */ - int opCmp(ref const(Token) other) const - { - if (startIndex < other.startIndex) return -1; - if (startIndex > other.startIndex) return 1; - return 0; - } + /** + * Comparison operator orders tokens by start index. + */ + int opCmp(ref const(Token) other) const + { + if (startIndex < other.startIndex) return -1; + if (startIndex > other.startIndex) return 1; + return 0; + } } /** @@ -193,18 +193,18 @@ struct Token */ enum IterationStyle { - /// Only include code, not whitespace or comments - codeOnly = 0, - /// Includes comments - includeComments = 0b0001, - /// Includes whitespace - includeWhitespace = 0b0010, - /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens) - includeSpecialTokens = 0b0100, - /// Do not stop iteration on reaching the ___EOF__ token - ignoreEOF = 0b1000, - /// Include everything - everything = includeComments | includeWhitespace | ignoreEOF + /// Only include code, not whitespace or comments + codeOnly = 0, + /// Includes comments + includeComments = 0b0001, + /// Includes whitespace + includeWhitespace = 0b0010, + /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens) + includeSpecialTokens = 0b0100, + /// Do not stop iteration on reaching the ___EOF__ token + ignoreEOF = 0b1000, + /// Include everything + everything = includeComments | includeWhitespace | ignoreEOF } /** @@ -213,41 +213,41 @@ enum IterationStyle */ enum TokenStyle : uint { - /** - * Escape sequences will be replaced with their equivalent characters, - * enclosing quote characters will not be included. Special tokens such as - * __VENDOR__ will be replaced with their equivalent strings. Useful for - * creating a compiler or interpreter. - */ - default_ = 0b0000, + /** + * Escape sequences will be replaced with their equivalent characters, + * enclosing quote characters will not be included. Special tokens such as + * __VENDOR__ will be replaced with their equivalent strings. Useful for + * creating a compiler or interpreter. + */ + default_ = 0b0000, - /** - * Escape sequences will not be processed. An escaped quote character will - * not terminate string lexing, but it will not be replaced with the quote - * character in the token. - */ - notEscaped = 0b0001, + /** + * Escape sequences will not be processed. An escaped quote character will + * not terminate string lexing, but it will not be replaced with the quote + * character in the token. + */ + notEscaped = 0b0001, - /** - * Strings will include their opening and closing quote characters as well - * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will - * include the $(D_STRING 'w') character as well as the opening and closing - * quotes$(RPAREN) - */ - includeQuotes = 0b0010, + /** + * Strings will include their opening and closing quote characters as well + * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will + * include the $(D_STRING 'w') character as well as the opening and closing + * quotes$(RPAREN) + */ + includeQuotes = 0b0010, - /** - * Do not replace the value field of the special tokens such as ___DATE__ - * with their string equivalents. - */ - doNotReplaceSpecial = 0b0100, + /** + * Do not replace the value field of the special tokens such as ___DATE__ + * with their string equivalents. + */ + doNotReplaceSpecial = 0b0100, - /** - * Strings will be read exactly as they appeared in the source, including - * their opening and closing quote characters. Useful for syntax - * highlighting. - */ - source = notEscaped | includeQuotes | doNotReplaceSpecial + /** + * Strings will be read exactly as they appeared in the source, including + * their opening and closing quote characters. Useful for syntax + * highlighting. + */ + source = notEscaped | includeQuotes | doNotReplaceSpecial } /** @@ -255,46 +255,46 @@ enum TokenStyle : uint */ struct LexerConfig { - /** - * Iteration style - */ - IterationStyle iterStyle = IterationStyle.codeOnly; + /** + * Iteration style + */ + IterationStyle iterStyle = IterationStyle.codeOnly; - /** - * Token style - */ - TokenStyle tokenStyle = tokenStyle.default_; + /** + * Token style + */ + TokenStyle tokenStyle = tokenStyle.default_; - /** - * Replacement for the ___VERSION__ token. Defaults to 1. - */ - uint versionNumber = 100; + /** + * Replacement for the ___VERSION__ token. Defaults to 1. + */ + uint versionNumber = 100; - /** - * Replacement for the ___VENDOR__ token. Defaults to $(D_STRING "std.d.lexer") - */ - string vendorString = "std.d.lexer"; + /** + * Replacement for the ___VENDOR__ token. Defaults to $(D_STRING "std.d.lexer") + */ + string vendorString = "std.d.lexer"; - /** - * Name used when creating error messages that are sent to errorFunc. This - * is needed because the lexer operates on any forwarad range of ASCII - * characters or UTF-8 code units and does not know what to call its input - * source. Defaults to the empty string. - */ - string fileName = ""; + /** + * Name used when creating error messages that are sent to errorFunc. This + * is needed because the lexer operates on any forwarad range of ASCII + * characters or UTF-8 code units and does not know what to call its input + * source. Defaults to the empty string. + */ + string fileName = ""; - /** - * This function is called when an error is encountered during lexing. - * Parameters are file name, code uint index, line number, column, - * and error messsage. - */ - void delegate(string, size_t, uint, uint, string) errorFunc; + /** + * This function is called when an error is encountered during lexing. + * Parameters are file name, code uint index, line number, column, + * and error messsage. + */ + void delegate(string, size_t, uint, uint, string) errorFunc; - /** - * Initial size of the lexer's internal token buffer in bytes. The lexer - * will grow this buffer if necessary. - */ - size_t bufferSize = 1024 * 4; + /** + * Initial size of the lexer's internal token buffer in bytes. The lexer + * will grow this buffer if necessary. + */ + size_t bufferSize = 1024 * 4; } /** @@ -313,10 +313,10 @@ auto byToken(R)(R range, LexerConfig config, size_t bufferSize = 4*1024) // 4K of circular buffer by default auto r = TokenRange!(typeof(lexerSource(range))) (lexerSource(range, bufferSize), config); - r.config = config; - r.lineNumber = 1; - r.popFront(); - return r; + r.config = config; + r.lineNumber = 1; + r.popFront(); + return r; } ///ditto @@ -337,13 +337,13 @@ auto byToken(R)(R range, LexerConfig config) // ATM it is byte-oriented private struct LexSource(R) if(isForwardRange!R && !isRandomAccessRange!R) - { + { bool empty() const { return _empty; } auto ref front() const { return accum[accumIdx]; - } + } auto ref peek() const in @@ -618,1092 +618,1090 @@ unittest struct TokenRange(LexSrc) //if ( is(LexSrc : LexSource!(U...), U...)) //check for LexSource { - /** - * Returns: true if the range is empty - */ - bool empty() const @property - { - return _empty; - } + /** + * Returns: true if the range is empty + */ + bool empty() const @property + { + return _empty; + } - /** - * Returns: the current token - */ - ref const(Token) front() const @property - { + /** + * Returns: the current token + */ + ref const(Token) front() const @property + { assert(!empty, "trying to get front of an empty token range"); - return current; - } + return current; + } - /** - * Returns the current token and then removes it from the range - */ - Token moveFront() - { + /** + * Returns the current token and then removes it from the range + */ + Token moveFront() + { auto r = move(current); - popFront(); - return r; - } + popFront(); + return r; + } - /** + /** * Foreach operation - */ - int opApply(int delegate(Token) dg) - { - int result = 0; - while (!empty) - { - result = dg(front); - if (result) - break; - popFront(); - } - return result; - } + */ + int opApply(int delegate(Token) dg) + { + int result = 0; + while (!empty) + { + result = dg(front); + if (result) + break; + popFront(); + } + return result; + } - /** + /** * Foreach operation - */ - int opApply(int delegate(size_t, Token) dg) - { - int result = 0; - int i = 0; - while (!empty) - { - result = dg(i, front); - if (result) - break; - popFront(); - } - return result; - } + */ + int opApply(int delegate(size_t, Token) dg) + { + int result = 0; + int i = 0; + while (!empty) + { + result = dg(i, front); + if (result) + break; + popFront(); + } + return result; + } - /** - * Removes the current token from the range - */ - void popFront() - { - // Filter out tokens we don't care about + /** + * Removes the current token from the range + */ + void popFront() + { + // Filter out tokens we don't care about loop: while (true) - { - advance(); + { + advance(); if(empty) break loop; - switch (current.type) - { - case TokenType.whitespace: - if (config.iterStyle & IterationStyle.includeWhitespace) - break loop; - break; - case TokenType.comment: - if (config.iterStyle & IterationStyle.includeComments) - break loop; - break; - case TokenType.specialTokenSequence: - if (config.iterStyle & IterationStyle.includeSpecialTokens) - break loop; - break; - default: - break loop; - } - } - } + switch (current.type) + { + case TokenType.whitespace: + if (config.iterStyle & IterationStyle.includeWhitespace) + break loop; + break; + case TokenType.comment: + if (config.iterStyle & IterationStyle.includeComments) + break loop; + break; + case TokenType.specialTokenSequence: + if (config.iterStyle & IterationStyle.includeSpecialTokens) + break loop; + break; + default: + break loop; + } + } + } private: - /* - * Advances the range to the next token - */ - void advance() - { - if (isEoF()) - { - _empty = true; - return; - } + /* + * Advances the range to the next token + */ + void advance() + { + if (isEoF()) + { + _empty = true; + return; + } src.mark(); // mark a start of a lexing "frame" - current.line = lineNumber; + current.line = lineNumber; current.startIndex = src.index; - current.column = column; - current.value = null; + current.column = column; + current.value = null; - if (isWhite()) - { - if (config.iterStyle & IterationStyle.includeWhitespace) - lexWhitespace!true(); - else - lexWhitespace!false(); - return; - } + if (isWhite()) + { + if (config.iterStyle & IterationStyle.includeWhitespace) + lexWhitespace!true(); + else + lexWhitespace!false(); + return; + } switch (src.front) - { + { // pragma(msg, generateCaseTrie( - mixin(generateCaseTrie( - "=", "TokenType.assign", - "@", "TokenType.at", - "&", "TokenType.bitAnd", - "&=", "TokenType.bitAndEquals", - "|", "TokenType.bitOr", - "|=", "TokenType.bitOrEquals", - "~=", "TokenType.catEquals", - ":", "TokenType.colon", - ",", "TokenType.comma", - "--", "TokenType.decrement", - "$", "TokenType.dollar", - "==", "TokenType.equals", - "=>", "TokenType.goesTo", - ">", "TokenType.greater", - ">=", "TokenType.greaterEqual", - "++", "TokenType.increment", - "{", "TokenType.lBrace", - "[", "TokenType.lBracket", - "<", "TokenType.less", - "<=", "TokenType.lessEqual", - "<>=", "TokenType.lessEqualGreater", - "<>", "TokenType.lessOrGreater", - "&&", "TokenType.logicAnd", - "||", "TokenType.logicOr", - "(", "TokenType.lParen", - "-", "TokenType.minus", - "-=", "TokenType.minusEquals", - "%", "TokenType.mod", - "%=", "TokenType.modEquals", - "*=", "TokenType.mulEquals", - "!", "TokenType.not", - "!=", "TokenType.notEquals", - "!>", "TokenType.notGreater", - "!>=", "TokenType.notGreaterEqual", - "!<", "TokenType.notLess", - "!<=", "TokenType.notLessEqual", - "!<>", "TokenType.notLessEqualGreater", - "+", "TokenType.plus", - "+=", "TokenType.plusEquals", - "^^", "TokenType.pow", - "^^=", "TokenType.powEquals", - "}", "TokenType.rBrace", - "]", "TokenType.rBracket", - ")", "TokenType.rParen", - ";", "TokenType.semicolon", - "<<", "TokenType.shiftLeft", - "<<=", "TokenType.shiftLeftEqual", - ">>", "TokenType.shiftRight", - ">>=", "TokenType.shiftRightEqual", - "*", "TokenType.star", - "?", "TokenType.ternary", - "~", "TokenType.tilde", - "!<>=", "TokenType.unordered", - ">>>", "TokenType.unsignedShiftRight", - ">>>=", "TokenType.unsignedShiftRightEqual", - "^", "TokenType.xor", - "^=", "TokenType.xorEquals", - )); - case '/': + mixin(generateCaseTrie( + "=", "TokenType.assign", + "@", "TokenType.at", + "&", "TokenType.bitAnd", + "&=", "TokenType.bitAndEquals", + "|", "TokenType.bitOr", + "|=", "TokenType.bitOrEquals", + "~=", "TokenType.catEquals", + ":", "TokenType.colon", + ",", "TokenType.comma", + "--", "TokenType.decrement", + "$", "TokenType.dollar", + "==", "TokenType.equals", + "=>", "TokenType.goesTo", + ">", "TokenType.greater", + ">=", "TokenType.greaterEqual", + "++", "TokenType.increment", + "{", "TokenType.lBrace", + "[", "TokenType.lBracket", + "<", "TokenType.less", + "<=", "TokenType.lessEqual", + "<>=", "TokenType.lessEqualGreater", + "<>", "TokenType.lessOrGreater", + "&&", "TokenType.logicAnd", + "||", "TokenType.logicOr", + "(", "TokenType.lParen", + "-", "TokenType.minus", + "-=", "TokenType.minusEquals", + "%", "TokenType.mod", + "%=", "TokenType.modEquals", + "*=", "TokenType.mulEquals", + "!", "TokenType.not", + "!=", "TokenType.notEquals", + "!>", "TokenType.notGreater", + "!>=", "TokenType.notGreaterEqual", + "!<", "TokenType.notLess", + "!<=", "TokenType.notLessEqual", + "!<>", "TokenType.notLessEqualGreater", + "+", "TokenType.plus", + "+=", "TokenType.plusEquals", + "^^", "TokenType.pow", + "^^=", "TokenType.powEquals", + "}", "TokenType.rBrace", + "]", "TokenType.rBracket", + ")", "TokenType.rParen", + ";", "TokenType.semicolon", + "<<", "TokenType.shiftLeft", + "<<=", "TokenType.shiftLeftEqual", + ">>", "TokenType.shiftRight", + ">>=", "TokenType.shiftRightEqual", + "*", "TokenType.star", + "?", "TokenType.ternary", + "~", "TokenType.tilde", + "!<>=", "TokenType.unordered", + ">>>", "TokenType.unsignedShiftRight", + ">>>=", "TokenType.unsignedShiftRightEqual", + "^", "TokenType.xor", + "^=", "TokenType.xorEquals", + )); + case '/': nextCharNonLF(); - if (isEoF()) - { - current.type = TokenType.div; - current.value = "/"; - return; - } + if (isEoF()) + { + current.type = TokenType.div; + current.value = "/"; + return; + } switch (src.front) - { - case '/': - case '*': - case '+': - if (config.iterStyle & IterationStyle.includeComments) - lexComment!true(); - else - lexComment!false(); - return; - case '=': - current.type = TokenType.divEquals; - current.value = "/="; + { + case '/': + case '*': + case '+': + if (config.iterStyle & IterationStyle.includeComments) + lexComment!true(); + else + lexComment!false(); + return; + case '=': + current.type = TokenType.divEquals; + current.value = "/="; src.popFront(); - return; - default: - current.type = TokenType.div; - current.value = "/"; - return; - } - case '.': + return; + default: + current.type = TokenType.div; + current.value = "/"; + return; + } + case '.': if (!src.canPeek()) - { - current.type = TokenType.dot; - current.value = getTokenValue(TokenType.dot); - return; - } + { + current.type = TokenType.dot; + current.value = getTokenValue(TokenType.dot); + return; + } switch (src.peek()) - { - case '0': .. case '9': - lexNumber(); - return; - case '.': + { + case '0': .. case '9': + lexNumber(); + return; + case '.': nextCharNonLF(); nextCharNonLF(); - current.type = TokenType.slice; + current.type = TokenType.slice; if (src.front == '.') - { - current.type = TokenType.vararg; + { + current.type = TokenType.vararg; nextCharNonLF(); - } - current.value = getTokenValue(current.type); - return; - default: + } + current.value = getTokenValue(current.type); + return; + default: nextCharNonLF(); - current.type = TokenType.dot; - current.value = getTokenValue(TokenType.dot); - return; - } - case '0': .. case '9': - lexNumber(); - return; - case '\'': - lexCharacterLiteral(); - return; - case '"': - case '`': - lexString(); - return; - case 'q': + current.type = TokenType.dot; + current.value = getTokenValue(TokenType.dot); + return; + } + case '0': .. case '9': + lexNumber(); + return; + case '\'': + lexCharacterLiteral(); + return; + case '"': + case '`': + lexString(); + return; + case 'q': nextCharNonLF(); - if (isEoF()) - goto default; + if (isEoF()) + goto default; switch (src.front) - { - case '{': - lexTokenString(); - return; - case '"': - lexDelimitedString(); - return; - default: - break; - } - goto default; - case 'r': + { + case '{': + lexTokenString(); + return; + case '"': + lexDelimitedString(); + return; + default: + break; + } + goto default; + case 'r': nextCharNonLF(); - if (isEoF()) - goto default; + if (isEoF()) + goto default; else if (src.front == '"') - { - lexString(); - return; - } - else - goto default; - case 'x': + { + lexString(); + return; + } + else + goto default; + case 'x': nextCharNonLF(); - if (isEoF()) - goto default; + if (isEoF()) + goto default; else if (src.front == '"') - { - lexHexString(); - return; - } - else - goto default; - case '#': - lexSpecialTokenSequence(); - return; - default: - while(!isEoF() && !isSeparating()) - { + { + lexHexString(); + return; + } + else + goto default; + case '#': + lexSpecialTokenSequence(); + return; + default: + while(!isEoF() && !isSeparating()) + { nextCharNonLF(); - } + } current.type = lookupTokenType(src.slice); - current.value = getTokenValue(current.type); - if (current.value is null) - setTokenValue(); + current.value = getTokenValue(current.type); + if (current.value is null) + setTokenValue(); - if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof) - { - _empty = true; - return; - } + if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof) + { + _empty = true; + return; + } - if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial)) - return; + if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial)) + return; - switch (current.type) - { - case TokenType.date: - current.type = TokenType.stringLiteral; - auto time = Clock.currTime(); - current.value = format("%s %02d %04d", time.month, time.day, time.year); - return; - case TokenType.time: - auto time = Clock.currTime(); - current.type = TokenType.stringLiteral; - current.value = (cast(TimeOfDay)(time)).toISOExtString(); - return; - case TokenType.timestamp: - auto time = Clock.currTime(); - auto dt = cast(DateTime) time; - current.type = TokenType.stringLiteral; - current.value = format("%s %s %02d %02d:%02d:%02d %04d", - dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute, - dt.second, dt.year); - return; - case TokenType.vendor: - current.type = TokenType.stringLiteral; - current.value = config.vendorString; - return; - case TokenType.compilerVersion: - current.type = TokenType.stringLiteral; - current.value = format("%d", config.versionNumber); - return; - case TokenType.line: - current.type = TokenType.intLiteral; - current.value = format("%d", current.line); - return; - case TokenType.file: - current.type = TokenType.stringLiteral; - current.value = config.fileName; - return; - default: - return; - } - } - } + switch (current.type) + { + case TokenType.date: + current.type = TokenType.stringLiteral; + auto time = Clock.currTime(); + current.value = format("%s %02d %04d", time.month, time.day, time.year); + return; + case TokenType.time: + auto time = Clock.currTime(); + current.type = TokenType.stringLiteral; + current.value = (cast(TimeOfDay)(time)).toISOExtString(); + return; + case TokenType.timestamp: + auto time = Clock.currTime(); + auto dt = cast(DateTime) time; + current.type = TokenType.stringLiteral; + current.value = format("%s %s %02d %02d:%02d:%02d %04d", + dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute, + dt.second, dt.year); + return; + case TokenType.vendor: + current.type = TokenType.stringLiteral; + current.value = config.vendorString; + return; + case TokenType.compilerVersion: + current.type = TokenType.stringLiteral; + current.value = format("%d", config.versionNumber); + return; + case TokenType.line: + current.type = TokenType.intLiteral; + current.value = format("%d", current.line); + return; + case TokenType.file: + current.type = TokenType.stringLiteral; + current.value = config.fileName; + return; + default: + return; + } + } + } // TODO: LexSource could be improved for forward ranges // to avoid buffering at all (by disabling it for a moment) // so keep the 'keep' parameter here and elsewhere - void lexWhitespace(bool keep)() - { - current.type = TokenType.whitespace; - while (!isEoF() && isWhite()) - { + void lexWhitespace(bool keep)() + { + current.type = TokenType.whitespace; + while (!isEoF() && isWhite()) + { nextChar(); - } - static if (keep) setTokenValue(); - } + } + static if (keep) setTokenValue(); + } - void lexComment(bool keep)() - in - { + void lexComment(bool keep)() + in + { assert (src.front == '/' || src.front == '*' || src.front == '+'); - } - body - { - current.type = TokenType.comment; + } + body + { + current.type = TokenType.comment; switch(src.front) - { - case '/': + { + case '/': while (!isEoF() && !isNewline(src.front)) - { + { nextCharNonLF(); - } - break; - case '*': - while (!isEoF()) - { + } + break; + case '*': + while (!isEoF()) + { if (src.front == '*') - { + { static if (keep) nextCharNonLF(); else src.popFront(); if (src.front == '/') - { + { nextCharNonLF(); - break; - } - } - else + break; + } + } + else nextChar(); - } - break; - case '+': - int depth = 1; - while (depth > 0 && !isEoF()) - { + } + break; + case '+': + int depth = 1; + while (depth > 0 && !isEoF()) + { if (src.front == '+') - { + { nextCharNonLF(); if (src.front == '/') - { + { nextCharNonLF(); - --depth; - } - } + --depth; + } + } else if (src.front == '/') - { + { nextCharNonLF(); if (src.front == '+') - { + { nextCharNonLF(); - ++depth; - } - } - else + ++depth; + } + } + else nextChar(); - } - break; - default: - assert(false); - } + } + break; + default: + assert(false); + } static if (keep) - setTokenValue(); - } + setTokenValue(); + } - void lexHexString() - in - { + void lexHexString() + in + { assert (src.front == '"'); - } - body - { - current.type = TokenType.stringLiteral; + } + body + { + current.type = TokenType.stringLiteral; nextChar(); - while (true) - { - if (isEoF()) - { - errorMessage("Unterminated hex string literal"); - return; - } + while (true) + { + if (isEoF()) + { + errorMessage("Unterminated hex string literal"); + return; + } else if (isHexDigit(src.front)) - { + { nextCharNonLF(); - } - else if (isWhite() && (config.tokenStyle & TokenStyle.notEscaped)) - { + } + else if (isWhite() && (config.tokenStyle & TokenStyle.notEscaped)) + { nextChar(); - } + } else if (src.front == '"') - { + { nextCharNonLF(); - break; - } - else - { - errorMessage(format("Invalid character '%s' in hex string literal", + break; + } + else + { + errorMessage(format("Invalid character '%s' in hex string literal", cast(char) src.front)); - return; - } - } + return; + } + } bool hasSuffix = lexStringSuffix(); - if (config.tokenStyle & TokenStyle.notEscaped) - { - if (config.tokenStyle & TokenStyle.includeQuotes) - setTokenValue(); - else + if (config.tokenStyle & TokenStyle.notEscaped) + { + if (config.tokenStyle & TokenStyle.includeQuotes) + setTokenValue(); + else setTokenValue(2, hasSuffix ? -2 : -1); - } - else - { + } + else + { // TODO: appender is an allocation happy fat pig // remove it later auto a = appender!(char[])(); foreach (b; std.range.chunks(src.slice[2 .. $ - 1], 2)) - { + { auto s = cast(char[])b; - dchar ch = cast(dchar)parse!uint(s, 16); - char[4] utf8Buf; - size_t utf8Size = encode(utf8Buf, ch); - a.put(utf8Buf[0..utf8Size]); - } + ubyte ch = cast(ubyte)parse!uint(s, 16); + a.put(ch); + } // can safely assume ownership of data current.value = cast(string)a.data; - } - } + } + } - void lexNumber() - in - { + void lexNumber() + in + { assert(isDigit(src.front) || src.front == '.'); - } - body - { + } + body + { if (src.front != '0') { - lexDecimal(); + lexDecimal(); return; } - else - { + else + { switch (src.peek()) - { - case 'x': - case 'X': + { + case 'x': + case 'X': nextCharNonLF(); nextCharNonLF(); - lexHex(); - break; - case 'b': - case 'B': - nextCharNonLF(); - nextCharNonLF(); - lexBinary(); - break; - default: - lexDecimal(); + lexHex(); break; - } - } - } + case 'b': + case 'B': + nextCharNonLF(); + nextCharNonLF(); + lexBinary(); + break; + default: + lexDecimal(); + break; + } + } + } - void lexFloatSuffix() - { + void lexFloatSuffix() + { switch (src.front) - { - case 'L': + { + case 'L': nextCharNonLF(); - current.type = TokenType.doubleLiteral; - break; - case 'f': - case 'F': + current.type = TokenType.doubleLiteral; + break; + case 'f': + case 'F': nextCharNonLF(); - current.type = TokenType.floatLiteral; - break; - default: - break; - } + current.type = TokenType.floatLiteral; + break; + default: + break; + } if (!isEoF() && src.front == 'i') - { + { nextCharNonLF(); - if (current.type == TokenType.floatLiteral) - current.type = TokenType.ifloatLiteral; - else - current.type = TokenType.idoubleLiteral; - } - } + if (current.type == TokenType.floatLiteral) + current.type = TokenType.ifloatLiteral; + else + current.type = TokenType.idoubleLiteral; + } + } - void lexIntSuffix() - { - bool foundU; - bool foundL; - while (!isEoF()) - { + void lexIntSuffix() + { + bool foundU; + bool foundL; + while (!isEoF()) + { switch (src.front) - { - case 'u': - case 'U': - if (foundU) - return; - switch (current.type) - { - case TokenType.intLiteral: - current.type = TokenType.uintLiteral; + { + case 'u': + case 'U': + if (foundU) + return; + switch (current.type) + { + case TokenType.intLiteral: + current.type = TokenType.uintLiteral; nextCharNonLF(); - break; - case TokenType.longLiteral: - current.type = TokenType.ulongLiteral; + break; + case TokenType.longLiteral: + current.type = TokenType.ulongLiteral; nextCharNonLF(); - break; - default: + break; + default: assert (false); - } - foundU = true; - break; - case 'L': - if (foundL) - return; - switch (current.type) - { - case TokenType.intLiteral: - current.type = TokenType.longLiteral; + } + foundU = true; + break; + case 'L': + if (foundL) + return; + switch (current.type) + { + case TokenType.intLiteral: + current.type = TokenType.longLiteral; nextCharNonLF(); - break; - case TokenType.uintLiteral: - current.type = TokenType.ulongLiteral; + break; + case TokenType.uintLiteral: + current.type = TokenType.ulongLiteral; nextCharNonLF(); - break; - default: + break; + default: assert (false); - } - foundL = true; - break; - default: - return; - } - } - } + } + foundL = true; + break; + default: + return; + } + } + } - void lexExponent() - in - { + void lexExponent() + in + { assert (src.front == 'e' || src.front == 'E' || src.front == 'p' || src.front == 'P'); - } - body - { + } + body + { nextCharNonLF(); - bool foundSign = false; - bool foundDigit = false; - while (!isEoF()) - { + bool foundSign = false; + bool foundDigit = false; + while (!isEoF()) + { switch (src.front) - { - case '-': - case '+': + { + case '-': + case '+': if (foundSign) - { + { if (!foundDigit) - errorMessage("Expected an exponent"); - return; - } - foundSign = true; + errorMessage("Expected an exponent"); + return; + } + foundSign = true; nextCharNonLF(); - break; - case '0': .. case '9': - case '_': - foundDigit = true; + break; + case '0': .. case '9': + case '_': + foundDigit = true; nextCharNonLF(); - break; - case 'L': - case 'f': - case 'F': - case 'i': - lexFloatSuffix(); - return; - default: - if (!foundDigit) - errorMessage("Expected an exponent"); - return; - } - } - } + break; + case 'L': + case 'f': + case 'F': + case 'i': + lexFloatSuffix(); + return; + default: + if (!foundDigit) + errorMessage("Expected an exponent"); + return; + } + } + } - void lexDecimal() - in - { + void lexDecimal() + in + { assert (isDigit(src.front) || src.front == '.'); - } - body - { + } + body + { bool foundDot = src.front == '.'; if (foundDot) nextCharNonLF(); - current.type = TokenType.intLiteral; - decimalLoop: while (!isEoF()) - { + current.type = TokenType.intLiteral; + decimalLoop: while (!isEoF()) + { switch (src.front) - { - case '0': .. case '9': - case '_': + { + case '0': .. case '9': + case '_': nextCharNonLF(); - break; - case 'u': - case 'U': - if (!foundDot) - lexIntSuffix(); - break decimalLoop; - case 'i': - lexFloatSuffix(); - break decimalLoop; - case 'L': - if (foundDot) - lexFloatSuffix(); - else - lexIntSuffix(); - break decimalLoop; - case 'f': - case 'F': - lexFloatSuffix(); - break decimalLoop; - case 'e': - case 'E': - lexExponent(); - break decimalLoop; - case '.': - if (foundDot) - break decimalLoop; - if (src.canPeek() && src.peek() == '.') - break decimalLoop; - nextCharNonLF(); - foundDot = true; - current.type = TokenType.doubleLiteral; - break; - default: - break decimalLoop; - } - } - setTokenValue(); - } - - void lexBinary() - { - current.type = TokenType.intLiteral; - binaryLoop: while (!isEoF()) - { - switch (src.front) - { - case '0': - case '1': - case '_': - nextCharNonLF(); - break; - case 'u': - case 'U': - case 'L': - lexIntSuffix(); - break binaryLoop; - default: - break binaryLoop; - } - } - setTokenValue(); - } - - void lexHex() - { - current.type = TokenType.intLiteral; - bool foundDot; - hexLoop: while (!isEoF()) - { - switch (src.front) - { - case 'a': .. case 'f': - case 'A': .. case 'F': - case '0': .. case '9': - case '_': - nextCharNonLF(); - break; - case 'u': - case 'U': - lexIntSuffix(); - break hexLoop; - case 'i': - if (foundDot) - lexFloatSuffix(); - break hexLoop; - case 'L': - if (foundDot) - { - lexFloatSuffix(); - break hexLoop; - } - else - { - lexIntSuffix(); - break hexLoop; - } - case 'p': - case 'P': - lexExponent(); - break hexLoop; - case '.': - if (foundDot) - break hexLoop; - if (src.canPeek() && src.peek() == '.') - break hexLoop; - nextCharNonLF(); - foundDot = true; - current.type = TokenType.doubleLiteral; - break; - default: - break hexLoop; - } - } - setTokenValue(); - } - - bool lexStringSuffix() - { - current.type = TokenType.stringLiteral; - bool foundSuffix = false; - if (!isEoF()) - { - switch (src.front) - { - case 'w': - current.type = TokenType.wstringLiteral; - goto case 'c'; - case 'd': - current.type = TokenType.dstringLiteral; - goto case 'c'; - case 'c': - foundSuffix = true; - nextCharNonLF(); - break; - default: - break; - } - } - return foundSuffix; - } - - void lexCharacterLiteral() - in - { - assert (src.front == '\''); - } - body - { - current.type = TokenType.characterLiteral; - nextChar(); - if (isEoF()) - { - errorMessage("Unterminated character literal"); - return; - } - switch (src.front) - { - case '\'': break; - case '\\': - if (config.tokenStyle & TokenStyle.notEscaped) - skipEscapeSequence(); + case 'u': + case 'U': + if (!foundDot) + lexIntSuffix(); + break decimalLoop; + case 'i': + lexFloatSuffix(); + break decimalLoop; + case 'L': + if (foundDot) + lexFloatSuffix(); + else + lexIntSuffix(); + break decimalLoop; + case 'f': + case 'F': + lexFloatSuffix(); + break decimalLoop; + case 'e': + case 'E': + lexExponent(); + break decimalLoop; + case '.': + if (foundDot) + break decimalLoop; + if (src.canPeek() && src.peek() == '.') + break decimalLoop; + nextCharNonLF(); + foundDot = true; + current.type = TokenType.doubleLiteral; + break; + default: + break decimalLoop; + } + } + setTokenValue(); + } + + void lexBinary() + { + current.type = TokenType.intLiteral; + binaryLoop: while (!isEoF()) + { + switch (src.front) + { + case '0': + case '1': + case '_': + nextCharNonLF(); + break; + case 'u': + case 'U': + case 'L': + lexIntSuffix(); + break binaryLoop; + default: + break binaryLoop; + } + } + setTokenValue(); + } + + void lexHex() + { + current.type = TokenType.intLiteral; + bool foundDot; + hexLoop: while (!isEoF()) + { + switch (src.front) + { + case 'a': .. case 'f': + case 'A': .. case 'F': + case '0': .. case '9': + case '_': + nextCharNonLF(); + break; + case 'u': + case 'U': + lexIntSuffix(); + break hexLoop; + case 'i': + if (foundDot) + lexFloatSuffix(); + break hexLoop; + case 'L': + if (foundDot) + { + lexFloatSuffix(); + break hexLoop; + } else { - // the only special path - // 40 bytes is enough for 2 quotes - // and the longest character entity - ubyte[40] utf8; - size_t len; - if (config.tokenStyle & TokenStyle.includeQuotes) - { - utf8[0] = '\''; - len = decodeEscapeSequence(utf8[1..$]); - utf8[len++] = '\''; - } - else - len = decodeEscapeSequence(utf8[]); - if (src.front != '\'') - { - errorMessage("Expected \"'\" to end character literal"); - } - // skip over last "'" - nextChar(); - setTokenValue(utf8[0..len]); - return; + lexIntSuffix(); + break hexLoop; } - break; - default: - if (src.front & 0x80) - { - while (src.front & 0x80) - nextChar(); - break; - } - else - { + case 'p': + case 'P': + lexExponent(); + break hexLoop; + case '.': + if (foundDot) + break hexLoop; + if (src.canPeek() && src.peek() == '.') + break hexLoop; + nextCharNonLF(); + foundDot = true; + current.type = TokenType.doubleLiteral; + break; + default: + break hexLoop; + } + } + setTokenValue(); + } + + bool lexStringSuffix() + { + current.type = TokenType.stringLiteral; + bool foundSuffix = false; + if (!isEoF()) + { + switch (src.front) + { + case 'w': + current.type = TokenType.wstringLiteral; + goto case 'c'; + case 'd': + current.type = TokenType.dstringLiteral; + goto case 'c'; + case 'c': + foundSuffix = true; + nextCharNonLF(); + break; + default: + break; + } + } + return foundSuffix; + } + + void lexCharacterLiteral() + in + { + assert (src.front == '\''); + } + body + { + current.type = TokenType.characterLiteral; + nextChar(); + if (isEoF()) + { + errorMessage("Unterminated character literal"); + return; + } + switch (src.front) + { + case '\'': + break; + case '\\': + if (config.tokenStyle & TokenStyle.notEscaped) + skipEscapeSequence(); + else + { + // the only special path + // 40 bytes is enough for 2 quotes + // and the longest character entity + ubyte[40] utf8; + size_t len; + if (config.tokenStyle & TokenStyle.includeQuotes) + { + utf8[0] = '\''; + len = decodeEscapeSequence(utf8[1..$]); + utf8[len++] = '\''; + } + else + len = decodeEscapeSequence(utf8[]); + if (src.front != '\'') + { + errorMessage("Expected \"'\" to end character literal"); + } + // skip over last "'" + nextChar(); + setTokenValue(utf8[0..len]); + return; + } + break; + default: + if (src.front & 0x80) + { + while (src.front & 0x80) nextChar(); - break; - } - } + break; + } + else + { + nextChar(); + break; + } + } if (src.front != '\'') - errorMessage("Expected \"'\" to end character literal"); + errorMessage("Expected \"'\" to end character literal"); nextChar(); if (config.tokenStyle & TokenStyle.includeQuotes) setTokenValue(); else setTokenValue(1, -1); - } + } - void lexString() - in - { + void lexString() + in + { assert (src.front == '"'); - } - body - { - current.type = TokenType.stringLiteral; + } + body + { + current.type = TokenType.stringLiteral; bool longWysiwg = src.slice.length > 0 && src.slice[0] == 'r'; // 2 chars : r" bool isWysiwyg = src.front == '`'; // in case we need to unescape string Appender!(ubyte[]) unescaped; auto quote = src.front; nextChar(); - while (true) - { - if (isEoF()) - { - errorMessage("Unterminated string literal"); - return; - } + while (true) + { + if (isEoF()) + { + errorMessage("Unterminated string literal"); + return; + } else if (src.front == '\\') - { + { if (isWysiwyg || longWysiwg) nextChar(); else if(config.tokenStyle & TokenStyle.notEscaped) - { - skipEscapeSequence(); - } - else - { + { + skipEscapeSequence(); + } + else + { if(unescaped == Appender!(ubyte[]).init) unescaped = appender!(ubyte[])(); unescaped.put(src.slice()); decodeEscapeSequence(unescaped); src.mark(); //start next slice after escape sequence - } - } + } + } else if (src.front == quote) - { + { nextCharNonLF(); - break; - } - else + break; + } + else nextChar(); - } + } lexStringSuffix(); // helper to handle quotes void setData(R)(R range) - { + { if (config.tokenStyle & TokenStyle.includeQuotes) setTokenValue(range); else if (longWysiwg) setTokenValue(range[2..$-1]); - else + else setTokenValue(range[1..$-1]); - } + } import std.stdio; if(unescaped != Appender!(ubyte[]).init) - { + { //stuff in the last slice and used buffered data unescaped.put(src.slice); setData(unescaped.data); - } - else - { + } + else + { setData(src.slice); //slice directly - } - } + } + } - void lexDelimitedString() - in - { + void lexDelimitedString() + in + { assert(src.front == '"'); - } - body - { - current.type = TokenType.stringLiteral; + } + body + { + current.type = TokenType.stringLiteral; nextChar(); - bool heredoc; - ubyte open; - ubyte close; + bool heredoc; + ubyte open; + ubyte close; switch (src.front) - { - case '[': open = '['; close = ']'; break; - case '{': open = '{'; close = '}'; break; - case '(': open = '('; close = ')'; break; - case '<': open = '<'; close = '>'; break; - default: heredoc = true; break; - } - if (heredoc) - lexHeredocString(); - else - lexNormalDelimitedString(open, close); - } + { + case '[': open = '['; close = ']'; break; + case '{': open = '{'; close = '}'; break; + case '(': open = '('; close = ')'; break; + case '<': open = '<'; close = '>'; break; + default: heredoc = true; break; + } + if (heredoc) + lexHeredocString(); + else + lexNormalDelimitedString(open, close); + } - void lexNormalDelimitedString(ubyte open, ubyte close) - in - { + void lexNormalDelimitedString(ubyte open, ubyte close) + in + { assert(src.slice[0 .. 2] == `q"`); - } - body - { - current.type = TokenType.stringLiteral; - int depth = 1; + } + body + { + current.type = TokenType.stringLiteral; + int depth = 1; nextChar(); - while (true) - { - if (isEoF()) + while (true) + { + if (isEoF()) { - errorMessage("Unterminated string literal"); + errorMessage("Unterminated string literal"); break; } if (src.front == open) - { + { nextChar(); - ++depth; - } + ++depth; + } else if (src.front == close) - { + { nextChar(); - --depth; - if (depth <= 0) - { + --depth; + if (depth <= 0) + { auto r = src.save(); //TODO: allocates for Fwd range - if (r.front == '"') - { + if (r.front == '"') + { nextChar(); break; - } - else - { - errorMessage("Expected \" after balanced " - ~ cast(char) close ~ " but found " - ~ cast(char) r.front ~ " instead."); + } + else + { + errorMessage("Expected \" after balanced " + ~ cast(char) close ~ " but found " + ~ cast(char) r.front ~ " instead."); break; - } - } - } - else + } + } + } + else nextChar(); - } + } if (config.tokenStyle & TokenStyle.includeQuotes) setTokenValue(); else setTokenValue(3, -2); - } + } - void lexHeredocString() - in - { + void lexHeredocString() + in + { assert (src.slice.equal("q\"")); - } - body - { + } + body + { typeof(src.slice) ident; uint newlineBytes; - while (true) - { - if (isEoF()) - { - errorMessage("Unterminated string literal"); - return; - } + while (true) + { + if (isEoF()) + { + errorMessage("Unterminated string literal"); + return; + } else if (isNewline(src.front)) - { + { ident = src.slice[2..$]; nextChar(); newlineBytes = cast(uint) (src.slice.length - 2 - ident.length); - break; - } - else if (isSeparating()) - { + break; + } + else if (isSeparating()) + { nextChar(); ident = src.slice[2..$]; nextChar(); newlineBytes = 0; break; - } - else - { + } + else + { nextChar(); - } - } - while (true) - { - if (isEoF()) - { - errorMessage("Unterminated string literal"); + } + } + while (true) + { + if (isEoF()) + { + errorMessage("Unterminated string literal"); break; - } + } else if (src.slice.length > ident.length && src.slice[$-ident.length .. $].equal(ident)) - { + { if (src.front == '"') - { + { nextChar(); - lexStringSuffix(); + lexStringSuffix(); break; - } - else - { + } + else + { errorMessage("Unterminated string literal: " ~ cast(string) src.slice); break; - } - } - else + } + } + else nextChar(); } @@ -1715,17 +1713,17 @@ private: { setTokenValue(cast(int) (2 + newlineBytes + ident.length), cast(int) (-(ident.length + (hasSuffix ? 2 : 1)))); - } - } + } + } - void lexTokenString() - in - { + void lexTokenString() + in + { assert (src.front == '{'); - } - body - { - current.type = TokenType.stringLiteral; + } + body + { + current.type = TokenType.stringLiteral; nextChar(); auto app = appender!(ubyte[])(); if (config.tokenStyle & TokenStyle.includeQuotes) @@ -1733,25 +1731,25 @@ private: app.put('q'); app.put('{'); } - LexerConfig c = config; + LexerConfig c = config; scope (exit) config = c; - config.iterStyle = IterationStyle.everything; - config.tokenStyle = TokenStyle.source; - int depth = 1; + config.iterStyle = IterationStyle.everything; + config.tokenStyle = TokenStyle.source; + int depth = 1; - while (!isEoF()) - { - advance(); - if (current.type == TokenType.lBrace) - ++depth; - else if (current.type == TokenType.rBrace) - { - --depth; - if (depth <= 0) - break; - } + while (!isEoF()) + { + advance(); + if (current.type == TokenType.lBrace) + ++depth; + else if (current.type == TokenType.rBrace) + { + --depth; + if (depth <= 0) + break; + } app.put(representation(current.value)); - } + } config = c; if (config.tokenStyle & TokenStyle.includeQuotes) { @@ -1786,61 +1784,61 @@ private: } } current.value = cast(string) app.data; - } + } - void lexSpecialTokenSequence() - in - { + void lexSpecialTokenSequence() + in + { assert (src.front == '#'); - } - body - { + } + body + { nextChar(); auto r = src.save(); - auto app = appender!(ubyte[])(); - app.put('#'); - while (true) - { - if (r.isRangeEoF()) - { - errorMessage("Found EOF when interpreting special token sequence"); - return; - } - else if (isNewline(r.front)) - break; - else - { - app.put(r.front); - r.popFront(); - } - } - auto m = match((cast(char[]) app.data), - `#line\s+(?P\d+)\s*(?P".+")*?`); - if (m) - { - current.type = TokenType.specialTokenSequence; - current.value = (cast(char[]) app.data).idup; - column += app.data.length; - foreach (i; 0 .. app.data.length) + auto app = appender!(ubyte[])(); + app.put('#'); + while (true) + { + if (r.isRangeEoF()) + { + errorMessage("Found EOF when interpreting special token sequence"); + return; + } + else if (isNewline(r.front)) + break; + else + { + app.put(r.front); + r.popFront(); + } + } + auto m = match((cast(char[]) app.data), + `#line\s+(?P\d+)\s*(?P".+")*?`); + if (m) + { + current.type = TokenType.specialTokenSequence; + current.value = (cast(char[]) app.data).idup; + column += app.data.length; + foreach (i; 0 .. app.data.length) src.popFront(); - auto c = m.captures; - if (c["filespec"]) - config.fileName = c["filespec"].idup; - auto l = c["line"]; - lineNumber = parse!uint(l); - } - else - { - current.type = TokenType.hash; - current.value = getTokenValue(TokenType.hash); - } - } + auto c = m.captures; + if (c["filespec"]) + config.fileName = c["filespec"].idup; + auto l = c["line"]; + lineNumber = parse!uint(l); + } + else + { + current.type = TokenType.hash; + current.value = getTokenValue(TokenType.hash); + } + } //===================================================================== // Helpers for lexXYZ functions //===================================================================== void skipEscapeSequence() - { + { // no decoding, just minor sanity checks nextChar(); switch (src.front) @@ -1906,27 +1904,28 @@ private: errorMessage("Invalid escape sequence"); return; } - } + } size_t decodeEscapeSequence(OutputRange)(OutputRange dest) in - { + { assert (src.front == '\\'); } body { - static size_t reencodeNumeric(ubyte[] src, int radix, OutputRange dest) + size_t reencodeNumeric(ubyte[] src, int radix, OutputRange dest) { - /*scope(failure) //TODO: get rid of std.stdio in lexer - { - import std.stdio; - ("Failed on line ", lineNumber, " of file ", - config.fileName); - }*/ char[] chunk = cast(char[])src; char[4] utfBuf; uint codepoint = parse!uint(chunk, radix); - size_t len = encode(utfBuf, codepoint); + size_t len; + try + len = encode(utfBuf, codepoint); + catch (UTFException ex) + { + errorMessage(ex.msg); + return 0; + } dest.put(cast(ubyte[]) utfBuf[0..len]); return len; } @@ -1963,12 +1962,12 @@ private: case 'x': src.popFront(); foreach(i; 0 .. 2) - { + { if (!isHexDigit(src.front)) { errorMessage("Expected hex digit"); return 1; - } + } buffer[i] = src.front; src.popFront(); } @@ -1978,21 +1977,21 @@ private: uint digitCount = src.front == 'u' ? 4 : 8; src.popFront(); foreach (i; 0 .. digitCount) - { + { if (!isHexDigit(src.front)) { errorMessage("Expected hex digit"); return 1; - } + } buffer[i] = src.front; src.popFront(); - } + } return reencodeNumeric(buffer[0..digitCount], 16, dest); case '&': src.popFront(); size_t idx = 0; while (!isEoF()) - { + { if (isAlpha(src.front)) { buffer[idx++] = src.front; @@ -2004,17 +2003,17 @@ private: { src.popFront(); break; - } - else - { - errorMessage("Invalid character entity"); - return idx; - } - } + } + else + { + errorMessage("Invalid character entity"); + return idx; + } + } auto chunk = buffer[0..idx]; auto entity = cast(string)chunk in characterEntities; if (entity is null) - { + { errorMessage("Invalid character entity \"&%s;\"" .format(cast(string) chunk)); return 1; @@ -2029,47 +2028,47 @@ private: // advances underlying mark-slice range and counts lines, cols void nextChar() - { + { bool foundNewline; if (src.front == '\r') { src.popFront(); foundNewline = true; - } + } if (src.front == '\n') { src.popFront(); foundNewline = true; - } - else - { + } + else + { src.popFront(); - } - if (foundNewline) - { - ++lineNumber; - column = 0; - } + } + if (foundNewline) + { + ++lineNumber; + column = 0; + } else ++column; - } + } //same but don't bother for LF sequences void nextCharNonLF() - { + { src.popFront(); ++column; - } + } void setTokenValue()() - { + { current.value = cache.get(src.slice); - } + } void setTokenValue()(int startOffset, int endOffset) in - { + { assert(startOffset >= 0); assert(endOffset <= 0); } @@ -2085,48 +2084,48 @@ private: if(isRandomAccessRange!R && is(ElementType!R : const(ubyte))) { current.value = cache.get(range); - } + } - bool isEoF() const - { + bool isEoF() const + { return src.empty || src.front == 0 || src.front == 0x1a; - } + } bool isSeparating() - { + { auto ch = src.front; - if (ch <= 0x2f) return true; - if (ch >= ':' && ch <= '@') return true; - if (ch >= '[' && ch <= '^') return true; - if (ch >= '{' && ch <= '~') return true; - if (ch == '`') return true; + if (ch <= 0x2f) return true; + if (ch >= ':' && ch <= '@') return true; + if (ch >= '[' && ch <= '^') return true; + if (ch >= '{' && ch <= '~') return true; + if (ch == '`') return true; if (isWhite()) return true; //TODO: test only long 'whites' - return false; - } + return false; + } bool isWhite() - { + { auto c = src.front; if (c & 0x80) // multi-byte utf-8 { //TODO: here and elsewhere we'd better have // some kind of lookahead in LexSource instead of .save auto r = src.save(); - if (r.front != 0xe2) - return false; - else - r.popFront(); - if (r.empty || r.front != 0x80) - return false; - else - r.popFront(); + if (r.front != 0xe2) + return false; + else + r.popFront(); + if (r.empty || r.front != 0x80) + return false; + else + r.popFront(); if (r.empty || (r.front != 0xa8 && r.front != 0xa9)) - return false; - return true; - } - else - return c == 0x20 || (c >= 0x09 && c <= 0x0d); - } + return false; + return true; + } + else + return c == 0x20 || (c >= 0x09 && c <= 0x0d); + } void errorMessage(string s) { @@ -2148,381 +2147,381 @@ private: config = move(cfg); } - Token current; - uint lineNumber; - uint column; + Token current; + uint lineNumber; + uint column; LexSrc src; - bool _empty; - LexerConfig config; - StringCache cache; + bool _empty; + LexerConfig config; + StringCache cache; } /** - * Returns: true if the token is an operator - */ +* Returns: true if the token is an operator +*/ pure nothrow bool isOperator(const TokenType t) { - return t >= TokenType.assign && t <= TokenType.xorEquals; + return t >= TokenType.assign && t <= TokenType.xorEquals; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isOperator(ref const Token t) { - return isOperator(t.type); + return isOperator(t.type); } /** - * Returns: true if the token is a keyword - */ +* Returns: true if the token is a keyword +*/ pure nothrow bool isKeyword(const TokenType t) { - return t >= TokenType.bool_ && t <= TokenType.with_; + return t >= TokenType.bool_ && t <= TokenType.with_; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isKeyword(ref const Token t) { - return isKeyword(t.type); + return isKeyword(t.type); } /** - * Returns: true if the token is a built-in type - */ +* Returns: true if the token is a built-in type +*/ pure nothrow bool isType(const TokenType t) { - return t >= TokenType.bool_ && t <= TokenType.wchar_; + return t >= TokenType.bool_ && t <= TokenType.wchar_; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isType(ref const Token t) { - return isType(t.type); + return isType(t.type); } /** - * Returns: true if the token is an attribute - */ +* Returns: true if the token is an attribute +*/ pure nothrow bool isAttribute(const TokenType t) { - return t >= TokenType.align_ && t <= TokenType.static_; + return t >= TokenType.align_ && t <= TokenType.static_; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isAttribute(ref const Token t) { - return isAttribute(t.type); + return isAttribute(t.type); } /** - * Returns: true if the token is a protection attribute - */ +* Returns: true if the token is a protection attribute +*/ pure nothrow bool isProtection(const TokenType t) { - return t >= TokenType.export_ && t <= TokenType.public_; + return t >= TokenType.export_ && t <= TokenType.public_; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isProtection(ref const Token t) { - return isProtection(t.type); + return isProtection(t.type); } /** - * Returns: true if the token is a compile-time constant such as ___DATE__ - */ +* Returns: true if the token is a compile-time constant such as ___DATE__ +*/ pure nothrow bool isConstant(const TokenType t) { - return t >= TokenType.date && t <= TokenType.traits; + return t >= TokenType.date && t <= TokenType.traits; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isConstant(ref const Token t) { - return isConstant(t.type); + return isConstant(t.type); } /** - * Returns: true if the token is a string or number literal - */ +* Returns: true if the token is a string or number literal +*/ pure nothrow bool isLiteral(const TokenType t) { - return t >= TokenType.doubleLiteral && t <= TokenType.wstringLiteral; + return t >= TokenType.doubleLiteral && t <= TokenType.wstringLiteral; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isLiteral(ref const Token t) { - return isLiteral(t.type); + return isLiteral(t.type); } /** - * Returns: true if the token is a number literal - */ +* Returns: true if the token is a number literal +*/ pure nothrow bool isNumberLiteral(const TokenType t) { - return t >= TokenType.doubleLiteral && t <= TokenType.ulongLiteral; + return t >= TokenType.doubleLiteral && t <= TokenType.ulongLiteral; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isNumberLiteral(ref const Token t) { - return isNumberLiteral(t.type); + return isNumberLiteral(t.type); } /** - * Returns: true if the token is a string literal - */ +* Returns: true if the token is a string literal +*/ pure nothrow bool isStringLiteral(const TokenType t) { - return t >= TokenType.dstringLiteral && t <= TokenType.wstringLiteral; + return t >= TokenType.dstringLiteral && t <= TokenType.wstringLiteral; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isStringLiteral(ref const Token t) { - return isStringLiteral(t.type); + return isStringLiteral(t.type); } /** - * Returns: true if the token is whitespace, a commemnt, a special token - * sequence, or an identifier - */ +* Returns: true if the token is whitespace, a commemnt, a special token +* sequence, or an identifier +*/ pure nothrow bool isMisc(const TokenType t) { - return t >= TokenType.comment && t <= TokenType.specialTokenSequence; + return t >= TokenType.comment && t <= TokenType.specialTokenSequence; } /** - * ditto - */ +* ditto +*/ pure nothrow bool isMisc(ref const Token t) { - return isMisc(t.type); + return isMisc(t.type); } /** - * Listing of all the tokens in the D language. - */ +* Listing of all the tokens in the D language. +*/ enum TokenType: ushort { - assign, /// = - at, /// @ - bitAnd, /// & - bitAndEquals, /// &= - bitOr, /// | - bitOrEquals, /// |= - catEquals, /// ~= - colon, /// : - comma, /// , - decrement, /// -- - div, /// / - divEquals, /// /= - dollar, /// $ - dot, /// . - equals, /// == - goesTo, /// => - greater, /// > - greaterEqual, /// >= - hash, /// # - increment, /// ++ - lBrace, /// { - lBracket, /// [ - less, /// < - lessEqual, /// <= - lessEqualGreater, /// <>= - lessOrGreater, /// <> - logicAnd, /// && - logicOr, /// || - lParen, /// $(LPAREN) - minus, /// - - minusEquals, /// -= - mod, /// % - modEquals, /// %= - mulEquals, /// *= - not, /// ! - notEquals, /// != - notGreater, /// !> - notGreaterEqual, /// !>= - notLess, /// !< - notLessEqual, /// !<= - notLessEqualGreater, /// !<> - plus, /// + - plusEquals, /// += - pow, /// ^^ - powEquals, /// ^^= - rBrace, /// } - rBracket, /// ] - rParen, /// $(RPAREN) - semicolon, /// ; - shiftLeft, /// << - shiftLeftEqual, /// <<= - shiftRight, /// >> - shiftRightEqual, /// >>= - slice, /// .. - star, /// * - ternary, /// ? - tilde, /// ~ - unordered, /// !<>= - unsignedShiftRight, /// >>> - unsignedShiftRightEqual, /// >>>= - vararg, /// ... - xor, /// ^ - xorEquals, /// ^= + assign, /// = + at, /// @ + bitAnd, /// & + bitAndEquals, /// &= + bitOr, /// | + bitOrEquals, /// |= + catEquals, /// ~= + colon, /// : + comma, /// , + decrement, /// -- + div, /// / + divEquals, /// /= + dollar, /// $ + dot, /// . + equals, /// == + goesTo, /// => + greater, /// > + greaterEqual, /// >= + hash, /// # + increment, /// ++ + lBrace, /// { + lBracket, /// [ + less, /// < + lessEqual, /// <= + lessEqualGreater, /// <>= + lessOrGreater, /// <> + logicAnd, /// && + logicOr, /// || + lParen, /// $(LPAREN) + minus, /// - + minusEquals, /// -= + mod, /// % + modEquals, /// %= + mulEquals, /// *= + not, /// ! + notEquals, /// != + notGreater, /// !> + notGreaterEqual, /// !>= + notLess, /// !< + notLessEqual, /// !<= + notLessEqualGreater, /// !<> + plus, /// + + plusEquals, /// += + pow, /// ^^ + powEquals, /// ^^= + rBrace, /// } + rBracket, /// ] + rParen, /// $(RPAREN) + semicolon, /// ; + shiftLeft, /// << + shiftLeftEqual, /// <<= + shiftRight, /// >> + shiftRightEqual, /// >>= + slice, /// .. + star, /// * + ternary, /// ? + tilde, /// ~ + unordered, /// !<>= + unsignedShiftRight, /// >>> + unsignedShiftRightEqual, /// >>>= + vararg, /// ... + xor, /// ^ + xorEquals, /// ^= - bool_, /// $(D_KEYWORD bool) - byte_, /// $(D_KEYWORD byte) - cdouble_, /// $(D_KEYWORD cdouble) - cent_, /// $(D_KEYWORD cent) - cfloat_, /// $(D_KEYWORD cfloat) - char_, /// $(D_KEYWORD char) - creal_, /// $(D_KEYWORD creal) - dchar_, /// $(D_KEYWORD dchar) - double_, /// $(D_KEYWORD double) - float_, /// $(D_KEYWORD float) - function_, /// $(D_KEYWORD function) - idouble_, /// $(D_KEYWORD idouble) - ifloat_, /// $(D_KEYWORD ifloat) - int_, /// $(D_KEYWORD int) - ireal_, /// $(D_KEYWORD ireal) - long_, /// $(D_KEYWORD long) - real_, /// $(D_KEYWORD real) - short_, /// $(D_KEYWORD short) - ubyte_, /// $(D_KEYWORD ubyte) - ucent_, /// $(D_KEYWORD ucent) - uint_, /// $(D_KEYWORD uint) - ulong_, /// $(D_KEYWORD ulong) - ushort_, /// $(D_KEYWORD ushort) - void_, /// $(D_KEYWORD void) - wchar_, /// $(D_KEYWORD wchar) + bool_, /// $(D_KEYWORD bool) + byte_, /// $(D_KEYWORD byte) + cdouble_, /// $(D_KEYWORD cdouble) + cent_, /// $(D_KEYWORD cent) + cfloat_, /// $(D_KEYWORD cfloat) + char_, /// $(D_KEYWORD char) + creal_, /// $(D_KEYWORD creal) + dchar_, /// $(D_KEYWORD dchar) + double_, /// $(D_KEYWORD double) + float_, /// $(D_KEYWORD float) + function_, /// $(D_KEYWORD function) + idouble_, /// $(D_KEYWORD idouble) + ifloat_, /// $(D_KEYWORD ifloat) + int_, /// $(D_KEYWORD int) + ireal_, /// $(D_KEYWORD ireal) + long_, /// $(D_KEYWORD long) + real_, /// $(D_KEYWORD real) + short_, /// $(D_KEYWORD short) + ubyte_, /// $(D_KEYWORD ubyte) + ucent_, /// $(D_KEYWORD ucent) + uint_, /// $(D_KEYWORD uint) + ulong_, /// $(D_KEYWORD ulong) + ushort_, /// $(D_KEYWORD ushort) + void_, /// $(D_KEYWORD void) + wchar_, /// $(D_KEYWORD wchar) - align_, /// $(D_KEYWORD align) - deprecated_, /// $(D_KEYWORD deprecated) - extern_, /// $(D_KEYWORD extern) - pragma_, /// $(D_KEYWORD pragma) - export_, /// $(D_KEYWORD export) - package_, /// $(D_KEYWORD package) - private_, /// $(D_KEYWORD private) - protected_, /// $(D_KEYWORD protected) - public_, /// $(D_KEYWORD public) - abstract_, /// $(D_KEYWORD abstract) - auto_, /// $(D_KEYWORD auto) - const_, /// $(D_KEYWORD const) - final_, /// $(D_KEYWORD final) - gshared, /// $(D_KEYWORD __gshared) - immutable_, // immutable - inout_, // inout - scope_, /// $(D_KEYWORD scope) - shared_, // shared - static_, /// $(D_KEYWORD static) + align_, /// $(D_KEYWORD align) + deprecated_, /// $(D_KEYWORD deprecated) + extern_, /// $(D_KEYWORD extern) + pragma_, /// $(D_KEYWORD pragma) + export_, /// $(D_KEYWORD export) + package_, /// $(D_KEYWORD package) + private_, /// $(D_KEYWORD private) + protected_, /// $(D_KEYWORD protected) + public_, /// $(D_KEYWORD public) + abstract_, /// $(D_KEYWORD abstract) + auto_, /// $(D_KEYWORD auto) + const_, /// $(D_KEYWORD const) + final_, /// $(D_KEYWORD final) + gshared, /// $(D_KEYWORD __gshared) + immutable_, // immutable + inout_, // inout + scope_, /// $(D_KEYWORD scope) + shared_, // shared + static_, /// $(D_KEYWORD static) - synchronized_, /// $(D_KEYWORD synchronized) - alias_, /// $(D_KEYWORD alias) - asm_, /// $(D_KEYWORD asm) - assert_, /// $(D_KEYWORD assert) - body_, /// $(D_KEYWORD body) - break_, /// $(D_KEYWORD break) - case_, /// $(D_KEYWORD case) - cast_, /// $(D_KEYWORD cast) - catch_, /// $(D_KEYWORD catch) - class_, /// $(D_KEYWORD class) - continue_, /// $(D_KEYWORD continue) - debug_, /// $(D_KEYWORD debug) - default_, /// $(D_KEYWORD default) - delegate_, /// $(D_KEYWORD delegate) - delete_, /// $(D_KEYWORD delete) - do_, /// $(D_KEYWORD do) - else_, /// $(D_KEYWORD else) - enum_, /// $(D_KEYWORD enum) - false_, /// $(D_KEYWORD false) - finally_, /// $(D_KEYWORD finally) - foreach_, /// $(D_KEYWORD foreach) - foreach_reverse_, /// $(D_KEYWORD foreach_reverse) - for_, /// $(D_KEYWORD for) - goto_, /// $(D_KEYWORD goto) - if_, /// $(D_KEYWORD if) - import_, /// $(D_KEYWORD import) - in_, /// $(D_KEYWORD in) - interface_, /// $(D_KEYWORD interface) - invariant_, /// $(D_KEYWORD invariant) - is_, /// $(D_KEYWORD is) - lazy_, /// $(D_KEYWORD lazy) - macro_, /// $(D_KEYWORD macro) - mixin_, /// $(D_KEYWORD mixin) - module_, /// $(D_KEYWORD module) - new_, /// $(D_KEYWORD new) - nothrow_, /// $(D_KEYWORD nothrow) - null_, /// $(D_KEYWORD null) - out_, /// $(D_KEYWORD out) - override_, /// $(D_KEYWORD override) - pure_, /// $(D_KEYWORD pure) - ref_, /// $(D_KEYWORD ref) - return_, /// $(D_KEYWORD return) - struct_, /// $(D_KEYWORD struct) - super_, /// $(D_KEYWORD super) - switch_, /// $(D_KEYWORD switch) - template_, /// $(D_KEYWORD template) - this_, /// $(D_KEYWORD this) - throw_, /// $(D_KEYWORD throw) - true_, /// $(D_KEYWORD true) - try_, /// $(D_KEYWORD try) - typedef_, /// $(D_KEYWORD typedef) - typeid_, /// $(D_KEYWORD typeid) - typeof_, /// $(D_KEYWORD typeof) - union_, /// $(D_KEYWORD union) - unittest_, /// $(D_KEYWORD unittest) - version_, /// $(D_KEYWORD version) - volatile_, /// $(D_KEYWORD volatile) - while_, /// $(D_KEYWORD while) - with_, /// $(D_KEYWORD with) + synchronized_, /// $(D_KEYWORD synchronized) + alias_, /// $(D_KEYWORD alias) + asm_, /// $(D_KEYWORD asm) + assert_, /// $(D_KEYWORD assert) + body_, /// $(D_KEYWORD body) + break_, /// $(D_KEYWORD break) + case_, /// $(D_KEYWORD case) + cast_, /// $(D_KEYWORD cast) + catch_, /// $(D_KEYWORD catch) + class_, /// $(D_KEYWORD class) + continue_, /// $(D_KEYWORD continue) + debug_, /// $(D_KEYWORD debug) + default_, /// $(D_KEYWORD default) + delegate_, /// $(D_KEYWORD delegate) + delete_, /// $(D_KEYWORD delete) + do_, /// $(D_KEYWORD do) + else_, /// $(D_KEYWORD else) + enum_, /// $(D_KEYWORD enum) + false_, /// $(D_KEYWORD false) + finally_, /// $(D_KEYWORD finally) + foreach_, /// $(D_KEYWORD foreach) + foreach_reverse_, /// $(D_KEYWORD foreach_reverse) + for_, /// $(D_KEYWORD for) + goto_, /// $(D_KEYWORD goto) + if_, /// $(D_KEYWORD if) + import_, /// $(D_KEYWORD import) + in_, /// $(D_KEYWORD in) + interface_, /// $(D_KEYWORD interface) + invariant_, /// $(D_KEYWORD invariant) + is_, /// $(D_KEYWORD is) + lazy_, /// $(D_KEYWORD lazy) + macro_, /// $(D_KEYWORD macro) + mixin_, /// $(D_KEYWORD mixin) + module_, /// $(D_KEYWORD module) + new_, /// $(D_KEYWORD new) + nothrow_, /// $(D_KEYWORD nothrow) + null_, /// $(D_KEYWORD null) + out_, /// $(D_KEYWORD out) + override_, /// $(D_KEYWORD override) + pure_, /// $(D_KEYWORD pure) + ref_, /// $(D_KEYWORD ref) + return_, /// $(D_KEYWORD return) + struct_, /// $(D_KEYWORD struct) + super_, /// $(D_KEYWORD super) + switch_, /// $(D_KEYWORD switch) + template_, /// $(D_KEYWORD template) + this_, /// $(D_KEYWORD this) + throw_, /// $(D_KEYWORD throw) + true_, /// $(D_KEYWORD true) + try_, /// $(D_KEYWORD try) + typedef_, /// $(D_KEYWORD typedef) + typeid_, /// $(D_KEYWORD typeid) + typeof_, /// $(D_KEYWORD typeof) + union_, /// $(D_KEYWORD union) + unittest_, /// $(D_KEYWORD unittest) + version_, /// $(D_KEYWORD version) + volatile_, /// $(D_KEYWORD volatile) + while_, /// $(D_KEYWORD while) + with_, /// $(D_KEYWORD with) - date, /// ___DATE__ - eof, /// ___EOF__ - time, /// ___TIME__ - timestamp, /// ___TIMESTAMP__ - vendor, /// ___VENDOR__ - compilerVersion, /// ___VERSION__ - file, /// $(D_KEYWORD ___FILE__) - line, /// $(D_KEYWORD ___LINE__) - comment, /// $(D_COMMENT /** comment */) or $(D_COMMENT // comment) or $(D_COMMENT ///comment) - identifier, /// anything else - scriptLine, // Line at the beginning of source file that starts from #! - traits, /// $(D_KEYWORD ___traits) - parameters, /// $(D_KEYWORD ___parameters) - vector, /// $(D_KEYWORD ___vector) - whitespace, /// whitespace - specialTokenSequence, /// #line 10 "file.d" - doubleLiteral, /// 123.456 - floatLiteral, /// 123.456f or 0x123_45p-3 - idoubleLiteral, /// 123.456i - ifloatLiteral, /// 123.456fi - intLiteral, /// 123 or 0b1101010101 - longLiteral, /// 123L - realLiteral, /// 123.456L - irealLiteral, /// 123.456Li - uintLiteral, /// 123u - ulongLiteral, /// 123uL - characterLiteral, /// 'a' - dstringLiteral, /// $(D_STRING "32-bit character string"d) - stringLiteral, /// $(D_STRING "an 8-bit string") - wstringLiteral, /// $(D_STRING "16-bit character string"w) + date, /// ___DATE__ + eof, /// ___EOF__ + time, /// ___TIME__ + timestamp, /// ___TIMESTAMP__ + vendor, /// ___VENDOR__ + compilerVersion, /// ___VERSION__ + file, /// $(D_KEYWORD ___FILE__) + line, /// $(D_KEYWORD ___LINE__) + comment, /// $(D_COMMENT /** comment */) or $(D_COMMENT // comment) or $(D_COMMENT ///comment) + identifier, /// anything else + scriptLine, // Line at the beginning of source file that starts from #! + traits, /// $(D_KEYWORD ___traits) + parameters, /// $(D_KEYWORD ___parameters) + vector, /// $(D_KEYWORD ___vector) + whitespace, /// whitespace + specialTokenSequence, /// #line 10 "file.d" + doubleLiteral, /// 123.456 + floatLiteral, /// 123.456f or 0x123_45p-3 + idoubleLiteral, /// 123.456i + ifloatLiteral, /// 123.456fi + intLiteral, /// 123 or 0b1101010101 + longLiteral, /// 123L + realLiteral, /// 123.456L + irealLiteral, /// 123.456Li + uintLiteral, /// 123u + ulongLiteral, /// 123uL + characterLiteral, /// 'a' + dstringLiteral, /// $(D_STRING "32-bit character string"d) + stringLiteral, /// $(D_STRING "an 8-bit string") + wstringLiteral, /// $(D_STRING "16-bit character string"w) } // Implementation details follow @@ -2531,7 +2530,7 @@ private: // uses auto-detection for pure, safe nothrow bool isRangeEoF(R)(ref R range) { - return range.empty || range.front == 0 || range.front == 0x1a; + return range.empty || range.front == 0 || range.front == 0x1a; } /* @@ -2539,233 +2538,233 @@ bool isRangeEoF(R)(ref R range) * generated. */ immutable(string[TokenType.max + 1]) tokenValues = [ - "=", - "@", - "&", - "&=", - "|", - "|=", - "~=", - ":", - ",", - "--", - "/", - "/=", - "$", - ".", - "==", - "=>", - ">", - ">=", - "#", - "++", - "{", - "[", - "<", - "<=", - "<>=", - "<>", - "&&", - "||", - "(", - "-", - "-=", - "%", - "%=", - "*=", - "!", - "!=", - "!>", - "!>=", - "!<", - "!<=", - "!<>", - "+", - "+=", - "^^", - "^^=", - "}", - "]", - ")", - ";", - "<<", - "<<=", - ">>", - ">>=", - "..", - "*", - "?", - "~", - "!<>=", - ">>>", - ">>>=", - "...", - "^", - "^=", - "bool", - "byte", - "cdouble", - "cent", - "cfloat", - "char", - "creal", - "dchar", - "double", - "float", - "function", - "idouble", - "ifloat", - "int", - "ireal", - "long", - "real", - "short", - "ubyte", - "ucent", - "uint", - "ulong", - "ushort", - "void", - "wchar", - "align", - "deprecated", - "extern", - "pragma", - "export", - "package", - "private", - "protected", - "public", - "abstract", - "auto", - "const", - "final", - "__gshared", - "immutable", - "inout", - "scope", - "shared", - "static", - "synchronized", - "alias", - "asm", - "assert", - "body", - "break", - "case", - "cast", - "catch", - "class", - "continue", - "debug", - "default", - "delegate", - "delete", - "do", - "else", - "enum", - "false", - "finally", - "foreach", - "foreach_reverse", - "for", - "goto", - "if", - "import", - "in", - "interface", - "invariant", - "is", - "lazy", - "macro", - "mixin", - "module", - "new", - "nothrow", - "null", - "out", - "override", - "pure", - "ref", - "return", - "struct", - "super", - "switch", - "template", - "this", - "throw", - "true", - "try", - "typedef", - "typeid", - "typeof", - "union", - "unittest", - "version", - "volatile", - "while", - "with", - "__DATE__", - "__EOF__", - "__TIME__", - "__TIMESTAMP__", - "__VENDOR__", - "__VERSION__", - "__FILE__", - "__LINE__", - null, - null, - null, - "__traits", - "__parameters", - "__vector", - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, + "=", + "@", + "&", + "&=", + "|", + "|=", + "~=", + ":", + ",", + "--", + "/", + "/=", + "$", + ".", + "==", + "=>", + ">", + ">=", + "#", + "++", + "{", + "[", + "<", + "<=", + "<>=", + "<>", + "&&", + "||", + "(", + "-", + "-=", + "%", + "%=", + "*=", + "!", + "!=", + "!>", + "!>=", + "!<", + "!<=", + "!<>", + "+", + "+=", + "^^", + "^^=", + "}", + "]", + ")", + ";", + "<<", + "<<=", + ">>", + ">>=", + "..", + "*", + "?", + "~", + "!<>=", + ">>>", + ">>>=", + "...", + "^", + "^=", + "bool", + "byte", + "cdouble", + "cent", + "cfloat", + "char", + "creal", + "dchar", + "double", + "float", + "function", + "idouble", + "ifloat", + "int", + "ireal", + "long", + "real", + "short", + "ubyte", + "ucent", + "uint", + "ulong", + "ushort", + "void", + "wchar", + "align", + "deprecated", + "extern", + "pragma", + "export", + "package", + "private", + "protected", + "public", + "abstract", + "auto", + "const", + "final", + "__gshared", + "immutable", + "inout", + "scope", + "shared", + "static", + "synchronized", + "alias", + "asm", + "assert", + "body", + "break", + "case", + "cast", + "catch", + "class", + "continue", + "debug", + "default", + "delegate", + "delete", + "do", + "else", + "enum", + "false", + "finally", + "foreach", + "foreach_reverse", + "for", + "goto", + "if", + "import", + "in", + "interface", + "invariant", + "is", + "lazy", + "macro", + "mixin", + "module", + "new", + "nothrow", + "null", + "out", + "override", + "pure", + "ref", + "return", + "struct", + "super", + "switch", + "template", + "this", + "throw", + "true", + "try", + "typedef", + "typeid", + "typeof", + "union", + "unittest", + "version", + "volatile", + "while", + "with", + "__DATE__", + "__EOF__", + "__TIME__", + "__TIMESTAMP__", + "__VENDOR__", + "__VERSION__", + "__FILE__", + "__LINE__", + null, + null, + null, + "__traits", + "__parameters", + "__vector", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, ]; pure string getTokenValue(const TokenType type) { - return tokenValues[type]; + return tokenValues[type]; } private pure bool isNewline(ubyte ch) { - return ch == '\n' || ch == '\r'; + return ch == '\n' || ch == '\r'; } pure TokenType lookupTokenType(R)(R input) { - switch(input.length) - { - case 2: - switch (input[0]) - { + switch(input.length) + { + case 2: + switch (input[0]) + { case 'd': if (input[1] == 'o') return TokenType.do_; else break; - case 'i': + case 'i': if (input[1] == 'f') return TokenType.if_; else if (input[1] == 'n') return TokenType.in_; else if (input[1] == 's') return TokenType.is_; - else break; - default: break; - } - break; - case 3: - switch (input[0]) - { + else break; + default: break; + } + break; + case 3: + switch (input[0]) + { case 'a': if (input[1..$].equal("sm")) return TokenType.asm_; else break; case 'f': if (input[1..$].equal("or")) return TokenType.for_; else break; case 'i': if (input[1..$].equal("nt")) return TokenType.int_; else break; @@ -2773,44 +2772,44 @@ pure TokenType lookupTokenType(R)(R input) case 'o': if (input[1..$].equal("ut")) return TokenType.out_; else break; case 'r': if (input[1..$].equal("ef")) return TokenType.ref_; else break; case 't': if (input[1..$].equal("ry")) return TokenType.try_; else break; - default: break; - } - break; - case 4: - switch (input[0]) - { + default: break; + } + break; + case 4: + switch (input[0]) + { case 'a': if (input[1..$].equal("uto")) return TokenType.auto_; else break; case 'b': if (input[1..$].equal("ody")) return TokenType.body_; else if (input[1..$].equal("ool")) return TokenType.bool_; else if (input[1..$].equal("yte")) return TokenType.byte_; - else break; + else break; case 'c': if (input[1..$].equal("ase")) return TokenType.case_; else if (input[1..$].equal("ast")) return TokenType.cast_; else if (input[1..$].equal("ent")) return TokenType.cent_; else if (input[1..$].equal("har")) return TokenType.char_; - else break; + else break; case 'e': if (input[1..$].equal("lse")) return TokenType.else_; else if (input[1..$].equal("num")) return TokenType.enum_; - else break; + else break; case 'g': if (input[1..$].equal("oto")) return TokenType.goto_; else break; case 'l': if (input[1..$].equal("azy")) return TokenType.lazy_; else if (input[1..$].equal("ong")) return TokenType.long_; - else break; + else break; case 'n': if (input[1..$].equal("ull")) return TokenType.null_; else break; case 'p': if (input[1..$].equal("ure")) return TokenType.pure_; else break; case 'r': if (input[1..$].equal("eal")) return TokenType.real_; else break; case 't': if (input[1..$].equal("his")) return TokenType.this_; else if (input[1..$].equal("rue")) return TokenType.true_; - else break; + else break; case 'u': if (input[1..$].equal("int")) return TokenType.uint_; else break; case 'v': if (input[1..$].equal("oid")) return TokenType.void_; else break; case 'w': if (input[1..$].equal("ith")) return TokenType.with_; else break; - default: break; - } - break; - case 5: - switch (input[0]) - { + default: break; + } + break; + case 5: + switch (input[0]) + { case 'a': if (input[1..$].equal("lias")) return TokenType.alias_; else if (input[1..$].equal("lign")) return TokenType.align_; else break; case 'b': if (input[1..$].equal("reak")) return TokenType.break_; else break; @@ -2818,13 +2817,13 @@ pure TokenType lookupTokenType(R)(R input) else if (input[1..$].equal("lass")) return TokenType.class_; else if (input[1..$].equal("onst")) return TokenType.const_; else if (input[1..$].equal("real")) return TokenType.creal_; - else break; + else break; case 'd': if (input[1..$].equal("char")) return TokenType.dchar_; else if (input[1..$].equal("ebug")) return TokenType.debug_; else break; case 'f': if (input[1..$].equal("alse")) return TokenType.false_; else if (input[1..$].equal("inal")) return TokenType.final_; else if (input[1..$].equal("loat")) return TokenType.float_; - else break; + else break; case 'i': if (input[1..$].equal("nout")) return TokenType.inout_; else if (input[1..$].equal("real")) return TokenType.ireal_; else break; case 'm': if (input[1..$].equal("acro")) return TokenType.macro_; @@ -2837,16 +2836,16 @@ pure TokenType lookupTokenType(R)(R input) else if (input[1..$].equal("cent")) return TokenType.ucent_; else if (input[1..$].equal("long")) return TokenType.ulong_; else if (input[1..$].equal("nion")) return TokenType.union_; - else break; + else break; case 'w': if (input[1..$].equal("char")) return TokenType.wchar_; else if (input[1..$].equal("hile")) return TokenType.while_; - else break; - default: break; - } - break; - case 6: - switch (input[0]) - { + else break; + default: break; + } + break; + case 6: + switch (input[0]) + { case 'a': if (input[1..$].equal("ssert")) return TokenType.assert_; else break; case 'c': if (input[1..$].equal("float")) return TokenType.cfloat_; else break; case 'd': if (input[1..$].equal("elete")) return TokenType.delete_; @@ -2866,12 +2865,12 @@ pure TokenType lookupTokenType(R)(R input) case 't': if (input[1..$].equal("ypeid")) return TokenType.typeid_; else if (input[1..$].equal("ypeof")) return TokenType.typeof_; else break; case 'u': if (input[1..$].equal("short")) return TokenType.ushort_; else break; - default: break; - } - break; - case 7: - switch (input[0]) - { + default: break; + } + break; + case 7: + switch (input[0]) + { case '_': if (input[1..$].equal("_EOF__")) return TokenType.eof; else break; case 'c': if (input[1..$].equal("double")) return TokenType.cdouble_; else break; case 'd': if (input[1..$].equal("efault")) return TokenType.default_; else break; @@ -2883,12 +2882,12 @@ pure TokenType lookupTokenType(R)(R input) else if (input[1..$].equal("rivate")) return TokenType.private_; else break; case 't': if (input[1..$].equal("ypedef")) return TokenType.typedef_; else break; case 'v': if (input[1..$].equal("ersion")) return TokenType.version_; else break; - default: break; - } - break; - case 8: - switch (input[0]) - { + default: break; + } + break; + case 8: + switch (input[0]) + { case '_': if (input[1..$].equal("_DATE__")) return TokenType.date; else if (input[1..$].equal("_FILE__")) return TokenType.file; else if (input[1..$].equal("_LINE__")) return TokenType.line; @@ -2902,145 +2901,145 @@ pure TokenType lookupTokenType(R)(R input) case 't': if (input[1..$].equal("emplate")) return TokenType.template_; else break; case 'u': if (input[1..$].equal("nittest")) return TokenType.unittest_; else break; case 'v': if (input[1..$].equal("olatile")) return TokenType.volatile_; else break; - default: break; - } - break; - case 9: - switch (input[0]) - { + default: break; + } + break; + case 9: + switch (input[0]) + { case '_': if (input[1..$].equal("_gshared")) return TokenType.gshared; else break; case 'i': if (input[1..$].equal("mmutable")) return TokenType.immutable_; else if (input[1..$].equal("nterface")) return TokenType.interface_; else if (input[1..$].equal("nvariant")) return TokenType.invariant_; else break; case 'p': if (input[1..$].equal("rotected")) return TokenType.protected_; else break; - default: break; - } - break; - case 10: - switch (input[0]) - { + default: break; + } + break; + case 10: + switch (input[0]) + { case 'd': if (input[1..$].equal("eprecated")) return TokenType.deprecated_; else break; case '_': if (input[1..$].equal("_VENDOR__")) return TokenType.vendor; else break; - default: break; - } - break; - case 11: + default: break; + } + break; + case 11: if (input[1..$].equal("_VERSION__")) - return TokenType.compilerVersion; - break; - case 12: + return TokenType.compilerVersion; + break; + case 12: if (input[1..$].equal("ynchronized")) - return TokenType.synchronized_; - break; - case 13: + return TokenType.synchronized_; + break; + case 13: if (input[1..$].equal("_TIMESTAMP__")) - return TokenType.timestamp; - break; - case 15: + return TokenType.timestamp; + break; + case 15: if (input[1..$].equal("oreach_reverse")) - return TokenType.foreach_reverse_; - break; - default: break; - } - return TokenType.identifier; + return TokenType.foreach_reverse_; + break; + default: break; + } + return TokenType.identifier; } class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) { - /** - * Adds the given value to the trie with the given key - */ - void add(K key, V value) pure - { - TrieNode!(K,V) current = this; - foreach(keyPart; key) - { - if ((keyPart in current.children) is null) - { - auto node = new TrieNode!(K, V); - current.children[keyPart] = node; - current = node; - } - else - current = current.children[keyPart]; - } - current.value = value; - } + /** + * Adds the given value to the trie with the given key + */ + void add(K key, V value) pure + { + TrieNode!(K,V) current = this; + foreach(keyPart; key) + { + if ((keyPart in current.children) is null) + { + auto node = new TrieNode!(K, V); + current.children[keyPart] = node; + current = node; + } + else + current = current.children[keyPart]; + } + current.value = value; + } } class TrieNode(K, V) if (isInputRange!K) { - V value; - TrieNode!(K,V)[ElementType!K] children; + V value; + TrieNode!(K,V)[ElementType!K] children; } string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) { - string caseStatement = ""; - foreach(dchar k, TrieNode!(K,V) v; node.children) - { - caseStatement ~= indentString; - caseStatement ~= "case '"; - caseStatement ~= k; - caseStatement ~= "':\n"; - caseStatement ~= indentString; + string caseStatement = ""; + foreach(dchar k, TrieNode!(K,V) v; node.children) + { + caseStatement ~= indentString; + caseStatement ~= "case '"; + caseStatement ~= k; + caseStatement ~= "':\n"; + caseStatement ~= indentString; caseStatement ~= "\tnextCharNonLF();\n"; - if (v.children.length > 0) - { - caseStatement ~= indentString; - caseStatement ~= "\tif (isEoF())\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\treturn;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - caseStatement ~= indentString; + if (v.children.length > 0) + { + caseStatement ~= indentString; + caseStatement ~= "\tif (isEoF())\n"; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\treturn;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + caseStatement ~= indentString; caseStatement ~= "\tswitch (src.front)\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= printCaseStatements(v, indentString ~ "\t"); - caseStatement ~= indentString; - caseStatement ~= "\tdefault:\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\treturn;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - } - else - { - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n"; - caseStatement ~= indentString; - caseStatement ~= "\treturn;\n"; - } - } - return caseStatement; + caseStatement ~= indentString; + caseStatement ~= "\t{\n"; + caseStatement ~= printCaseStatements(v, indentString ~ "\t"); + caseStatement ~= indentString; + caseStatement ~= "\tdefault:\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\tcurrent.value = getTokenValue(current.type);\n"; + caseStatement ~= indentString; + caseStatement ~= "\t\treturn;\n"; + caseStatement ~= indentString; + caseStatement ~= "\t}\n"; + } + else + { + caseStatement ~= indentString; + caseStatement ~= "\tcurrent.type = "; + caseStatement ~= v.value; + caseStatement ~= ";\n"; + caseStatement ~= indentString; + caseStatement ~= "\tcurrent.value = getTokenValue(current.type);\n"; + caseStatement ~= indentString; + caseStatement ~= "\treturn;\n"; + } + } + return caseStatement; } string generateCaseTrie(string[] args ...) { - auto t = new Trie!(string, string); - for(int i = 0; i < args.length; i+=2) - { - t.add(args[i], args[i+1]); - } - return printCaseStatements(t, ""); + auto t = new Trie!(string, string); + for(int i = 0; i < args.length; i+=2) + { + t.add(args[i], args[i+1]); + } + return printCaseStatements(t, ""); } struct StringCache @@ -3048,144 +3047,144 @@ struct StringCache string get(R)(R range) if(isRandomAccessRange!R && is(Unqual!(ElementType!R) : const(ubyte))) - { - size_t bucket; - hash_t h; + { + size_t bucket; + hash_t h; string* val = find(range, bucket, h); - if (val !is null) - { - return *val; - } - else - { + if (val !is null) + { + return *val; + } + else + { auto s = putIntoCache(range); - index[bucket] ~= s; - return s; - } - } + index[bucket] ~= s; + return s; + } + } private: import core.stdc.string; string* find(R)(R data, out size_t bucket, out hash_t h) - { - h = hash(data); - bucket = h % mapSize; - foreach (i; 0 .. index[bucket].length) - { + { + h = hash(data); + bucket = h % mapSize; + foreach (i; 0 .. index[bucket].length) + { if (equal(index[bucket][i], data)) - { - return &index[bucket][i]; - } - } - return null; - } + { + return &index[bucket][i]; + } + } + return null; + } static hash_t hash(R)(R data) - { - uint hash = 0; + { + uint hash = 0; foreach (b; data) - { - hash ^= sbox[b]; - hash *= 3; - } - return hash; - } + { + hash ^= sbox[b]; + hash *= 3; + } + return hash; + } - enum mapSize = 2048; - string[][mapSize] index; - // leave some slack for alloctors/GC meta-data - enum chunkSize = 16*1024 - size_t.sizeof*8; - ubyte*[] chunkS; - size_t next = chunkSize; + enum mapSize = 2048; + string[][mapSize] index; + // leave some slack for alloctors/GC meta-data + enum chunkSize = 16*1024 - size_t.sizeof*8; + ubyte*[] chunkS; + size_t next = chunkSize; string putIntoCache(R)(R data) - { - import core.memory; + { + import core.memory; - if(next + data.length > chunkSize) - { - // avoid huge strings - if(data.length > chunkSize/4) - return (cast(char[])data).idup; - chunkS ~= cast(ubyte*)GC.malloc(chunkSize, - GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR); - next = 0; - } - auto slice = chunkS[$-1][next..next+data.length]; - slice[] = data[]; - next += data.length; - return cast(string)slice; - } + if(next + data.length > chunkSize) + { + // avoid huge strings + if(data.length > chunkSize/4) + return (cast(char[])data).idup; + chunkS ~= cast(ubyte*)GC.malloc(chunkSize, + GC.BlkAttr.NO_SCAN | GC.BlkAttr.NO_INTERIOR); + next = 0; + } + auto slice = chunkS[$-1][next..next+data.length]; + slice[] = data[]; + next += data.length; + return cast(string)slice; + } } immutable uint[] sbox = [ - 0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53, - 0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982, - 0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56, - 0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300, - 0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991, - 0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8, - 0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8, - 0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7, - 0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0, - 0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26, - 0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595, - 0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB, - 0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1, - 0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8, - 0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03, - 0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C, - 0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6, - 0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B, - 0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3, - 0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B, - 0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A, - 0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292, - 0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381, - 0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3, - 0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154, - 0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761, - 0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1, - 0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F, - 0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7, - 0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE, - 0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1, - 0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81, - 0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F, - 0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D, - 0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF, - 0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0, - 0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF, - 0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94, - 0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414, - 0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA, - 0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089, - 0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F, - 0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802, - 0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5, - 0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74, - 0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629, - 0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843, - 0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27, - 0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B, - 0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C, - 0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F, - 0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6, - 0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933, - 0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F, - 0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5, - 0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E, - 0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2, - 0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA, - 0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE, - 0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64, - 0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B, - 0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4, - 0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41, - 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A, + 0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53, + 0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982, + 0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56, + 0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300, + 0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991, + 0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8, + 0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8, + 0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7, + 0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0, + 0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26, + 0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595, + 0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB, + 0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1, + 0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8, + 0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03, + 0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C, + 0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6, + 0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B, + 0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3, + 0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B, + 0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A, + 0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292, + 0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381, + 0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3, + 0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154, + 0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761, + 0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1, + 0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F, + 0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7, + 0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE, + 0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1, + 0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81, + 0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F, + 0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D, + 0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF, + 0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0, + 0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF, + 0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94, + 0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414, + 0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA, + 0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089, + 0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F, + 0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802, + 0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5, + 0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74, + 0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629, + 0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843, + 0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27, + 0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B, + 0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C, + 0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F, + 0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6, + 0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933, + 0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F, + 0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5, + 0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E, + 0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2, + 0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA, + 0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE, + 0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64, + 0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B, + 0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4, + 0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41, + 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A, ]; unittest @@ -3206,86 +3205,86 @@ unittest unittest { - import std.stdio; - auto source = cast(ubyte[]) ( - " bool byte cdouble cent cfloat char creal dchar double float function" - ~ " idouble ifloat int ireal long real short ubyte ucent uint ulong" - ~ " ushort void wchar align deprecated extern pragma export package private" - ~ " protected public abstract auto const final __gshared immutable inout" - ~ " scope shared static synchronized alias asm assert body break case" - ~ " cast catch class continue debug default delegate delete do else" - ~ " enum false finally foreach foreach_reverse for goto if import in" - ~ " interface invariant is lazy macro mixin module new nothrow null" - ~ " out override pure ref return struct super switch template this" - ~ " throw true try typedef typeid typeof union unittest version volatile" - ~ " while with __traits __parameters __vector"); - auto expected = ["bool", "byte", "cdouble", - "cent", "cfloat", "char", "creal", - "dchar", "double", "float", "function", - "idouble", "ifloat", "int", "ireal", "long", - "real", "short", "ubyte", "ucent", "uint", - "ulong", "ushort", "void", "wchar", "align", - "deprecated", "extern", "pragma", "export", - "package", "private", "protected", "public", - "abstract", "auto", "const", "final", "__gshared", - "immutable", "inout", "scope", "shared", - "static", "synchronized", "alias", "asm", "assert", - "body", "break", "case", "cast", "catch", - "class", "continue", "debug", "default", "delegate", - "delete", "do", "else", "enum", "false", - "finally", "foreach", "foreach_reverse", "for", - "goto", "if", "import", "in", "interface", - "invariant", "is", "lazy","macro", "mixin", - "module", "new", "nothrow", "null", "out", - "override", "pure", "ref", "return", "struct", - "super", "switch", "template", "this", "throw", - "true", "try", "typedef", "typeid", "typeof", - "union", "unittest", "version", "volatile", - "while", "with", "__traits", "__parameters", "__vector"]; - LexerConfig config; - auto tokens = byToken(source, config); - //writeln(tokens.map!"a.value"().array()); - assert (equal(map!"a.value"(tokens), expected)); + import std.stdio; + auto source = cast(ubyte[]) ( + " bool byte cdouble cent cfloat char creal dchar double float function" + ~ " idouble ifloat int ireal long real short ubyte ucent uint ulong" + ~ " ushort void wchar align deprecated extern pragma export package private" + ~ " protected public abstract auto const final __gshared immutable inout" + ~ " scope shared static synchronized alias asm assert body break case" + ~ " cast catch class continue debug default delegate delete do else" + ~ " enum false finally foreach foreach_reverse for goto if import in" + ~ " interface invariant is lazy macro mixin module new nothrow null" + ~ " out override pure ref return struct super switch template this" + ~ " throw true try typedef typeid typeof union unittest version volatile" + ~ " while with __traits __parameters __vector"); + auto expected = ["bool", "byte", "cdouble", + "cent", "cfloat", "char", "creal", + "dchar", "double", "float", "function", + "idouble", "ifloat", "int", "ireal", "long", + "real", "short", "ubyte", "ucent", "uint", + "ulong", "ushort", "void", "wchar", "align", + "deprecated", "extern", "pragma", "export", + "package", "private", "protected", "public", + "abstract", "auto", "const", "final", "__gshared", + "immutable", "inout", "scope", "shared", + "static", "synchronized", "alias", "asm", "assert", + "body", "break", "case", "cast", "catch", + "class", "continue", "debug", "default", "delegate", + "delete", "do", "else", "enum", "false", + "finally", "foreach", "foreach_reverse", "for", + "goto", "if", "import", "in", "interface", + "invariant", "is", "lazy","macro", "mixin", + "module", "new", "nothrow", "null", "out", + "override", "pure", "ref", "return", "struct", + "super", "switch", "template", "this", "throw", + "true", "try", "typedef", "typeid", "typeof", + "union", "unittest", "version", "volatile", + "while", "with", "__traits", "__parameters", "__vector"]; + LexerConfig config; + auto tokens = byToken(source, config); + //writeln(tokens.map!"a.value"().array()); + assert (equal(map!"a.value"(tokens), expected)); } unittest { - auto source = cast(ubyte[]) ("=@& &=| |=~=:,--/ /=$.===>> >=++{[< <=<>=<>&&||(- -=%%=*=!!=!>!>=!+ +=^^^^=}]);<< <<=>> >>=..*?~!<>=>>>>>>=...^ ^="); - auto expected = ["=", "@", "&", "&=", "|", "|=", "~=", - ":", ",", "--", "/", "/=", "$", ".", "==", - "=>", ">", ">=", "++", "{", "[", "<", - "<=", "<>=", "<>", "&&", "||", "(", "-", "-=", "%", - "%=", "*=", "!", "!=", "!>", "!>=", "!<", - "!<=", "!<>", "+", "+=", "^^", "^^=", - "}", "]", ")", ";", "<<", "<<=", ">>", - ">>=", "..", "*", "?", "~", "!<>=", - ">>>", ">>>=", "...", "^", "^="]; - LexerConfig config; - auto tokens = byToken(source, config); - //writeln(tokens.map!"a.value"().array()); + auto source = cast(ubyte[]) ("=@& &=| |=~=:,--/ /=$.===>> >=++{[< <=<>=<>&&||(- -=%%=*=!!=!>!>=!+ +=^^^^=}]);<< <<=>> >>=..*?~!<>=>>>>>>=...^ ^="); + auto expected = ["=", "@", "&", "&=", "|", "|=", "~=", + ":", ",", "--", "/", "/=", "$", ".", "==", + "=>", ">", ">=", "++", "{", "[", "<", + "<=", "<>=", "<>", "&&", "||", "(", "-", "-=", "%", + "%=", "*=", "!", "!=", "!>", "!>=", "!<", + "!<=", "!<>", "+", "+=", "^^", "^^=", + "}", "]", ")", ";", "<<", "<<=", ">>", + ">>=", "..", "*", "?", "~", "!<>=", + ">>>", ">>>=", "...", "^", "^="]; + LexerConfig config; + auto tokens = byToken(source, config); + //writeln(tokens.map!"a.value"().array()); assert (equal(map!"a.value"(tokens), expected), map!"a.value"(tokens).text()); } unittest { - auto source = cast(ubyte[]) (` - 1 1.2 //comment - 1.2f 1u 1uL 0b011 0b1uu 0b1 /+abc/+def+/+/0x11001uL - 123e1L 123e+1f 123e-1i 15e++ 4ea 1.2u 4i 1337L 4.2L 1..2 4.3.5.8 + auto source = cast(ubyte[]) (` + 1 1.2 //comment + 1.2f 1u 1uL 0b011 0b1uu 0b1 /+abc/+def+/+/0x11001uL + 123e1L 123e+1f 123e-1i 15e++ 4ea 1.2u 4i 1337L 4.2L 1..2 4.3.5.8 0xabc 0xabcp4 0x1P-10 0x40u 0x29L 0x4Lu 0xdeadbeef - `); - auto expected = ["1", "1.2", "1.2f", "1u", "1uL", "0b011", "0b1u", "u", "0b1", - "0x11001uL", "123e1L", "123e+1f", "123e-1i", "15e+", "+", "4e", "a", + `); + auto expected = ["1", "1.2", "1.2f", "1u", "1uL", "0b011", "0b1u", "u", "0b1", + "0x11001uL", "123e1L", "123e+1f", "123e-1i", "15e+", "+", "4e", "a", "1.2", "u", "4i", "1337L", "4.2L", "1", "..", "2", "4.3", ".5", ".8", "0xabc", "0xabcp4", "0x1P-10", "0x40u", "0x29L", "0x4Lu", "0xdeadbeef"]; - int errCount = 0; - void errorFunction(string file, size_t index, uint line, uint col, string msg) - { - ++errCount; - } - LexerConfig config; - config.errorFunc = &errorFunction; - auto tokens = byToken(source, config); + int errCount = 0; + void errorFunction(string file, size_t index, uint line, uint col, string msg) + { + ++errCount; + } + LexerConfig config; + config.errorFunc = &errorFunction; + auto tokens = byToken(source, config); //writeln(tokens.map!"a.value"()); assert (equal(map!"a.value"(tokens), expected), map!"a.value"(tokens).text()); assert (errCount == 2); @@ -3294,14 +3293,14 @@ unittest unittest { auto source = cast(ubyte[]) ("int #line 4\n double q{abcde (a + b) == 0} '\\u0020' q\"HEREDOC\r\nabcde\r\nHEREDOC\""); - LexerConfig config; - auto tokens = byToken(source, config); - assert (tokens.front.line == 1); - assert (tokens.moveFront() == TokenType.int_); - assert (tokens.front.line == 4); - assert (isType(tokens.front)); - assert (tokens.front.value == "double"); - tokens.popFront(); + LexerConfig config; + auto tokens = byToken(source, config); + assert (tokens.front.line == 1); + assert (tokens.moveFront() == TokenType.int_); + assert (tokens.front.line == 4); + assert (isType(tokens.front)); + assert (tokens.front.value == "double"); + tokens.popFront(); assert (tokens.front.value == "abcde (a + b) == 0", tokens.front.value); assert (isStringLiteral(tokens.front), tokens.front.type.text()); tokens.popFront(); @@ -3326,47 +3325,47 @@ unittest unittest { - auto source = cast(ubyte[]) (`"string`); - int errCount = 0; - void errorFunction(string file, size_t index, uint line, uint col, string msg) - { - ++errCount; - } - LexerConfig config; - config.errorFunc = &errorFunction; - auto tokens = byToken(source, config); - assert (errCount == 1); + auto source = cast(ubyte[]) (`"string`); + int errCount = 0; + void errorFunction(string file, size_t index, uint line, uint col, string msg) + { + ++errCount; + } + LexerConfig config; + config.errorFunc = &errorFunction; + auto tokens = byToken(source, config); + assert (errCount == 1); } unittest { - auto source = cast(ubyte[]) ("import foo"); - LexerConfig config; - auto tokens = byToken(source, config); - Token a = tokens.moveFront(); - assert (a.type == TokenType.import_); - Token b = tokens.moveFront(); - assert (b.type == TokenType.identifier); - assert (a != b); - assert (a != "foo"); - assert (a < b); + auto source = cast(ubyte[]) ("import foo"); + LexerConfig config; + auto tokens = byToken(source, config); + Token a = tokens.moveFront(); + assert (a.type == TokenType.import_); + Token b = tokens.moveFront(); + assert (b.type == TokenType.identifier); + assert (a != b); + assert (a != "foo"); + assert (a < b); assert (b == "foo"); - assert (b > a); - assert (!(a > a)); - assert (tokens.empty); + assert (b > a); + assert (!(a > a)); + assert (tokens.empty); } unittest { - auto source = cast(ubyte[]) ("import std.stdio; void main(){writeln(\"hello world\");}"); - LexerConfig config; - auto tokens = byToken(source, config); - int tokenCount = 0; - foreach (t; tokens) - { - ++tokenCount; - } - assert (tokenCount == 16); + auto source = cast(ubyte[]) ("import std.stdio; void main(){writeln(\"hello world\");}"); + LexerConfig config; + auto tokens = byToken(source, config); + int tokenCount = 0; + foreach (t; tokens) + { + ++tokenCount; + } + assert (tokenCount == 16); }