This commit is contained in:
Hackerpilot 2013-01-24 13:55:31 -08:00
parent 9e670cca65
commit c7b84ca0cc
1 changed files with 234 additions and 175 deletions

View File

@ -3,6 +3,64 @@
/** /**
* This module contains a range-based lexer for the D programming language. * This module contains a range-based lexer for the D programming language.
* *
* Examples:
*
* Generate HTML markup of D code.
* ---
* import std.stdio;
* import std.array;
* import std.file;
* import std.d.lexer;
*
* void writeSpan(string cssClass, string value)
* {
* stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
* }
*
* void highlight(R)(R tokens)
* {
* stdout.writeln(q"[<!DOCTYPE html>
* <html>
* <head>
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
* <body>
* <style type="text/css">
* html { background-color: #fff; color: #222; }
* .kwrd { font-weight: bold; color: blue; }
* .com { color: green; font-style: italic;}
* .num { color: orangered; font-weigth: bold; }
* .str { color: red; font-style: italic; }
* .op { color: 333; font-weight: bold; }
* .type { color: magenta; font-weight: bold; }
* </style>
* <pre>]");
*
* foreach (Token t; tokens)
* {
* if (t.type > TokenType.TYPES_BEGIN && t.type < TokenType.TYPES_END)
* writeSpan("type", t.value);
* else if (t.type > TokenType.KEYWORDS_BEGIN && t.type < TokenType.KEYWORDS_END)
* writeSpan("kwrd", t.value);
* else if (t.type == TokenType.Comment)
* writeSpan("com", t.value);
* else if (t.type > TokenType.STRINGS_BEGIN && t.type < TokenType.STRINGS_END)
* writeSpan("str", t.value);
* else if (t.type > TokenType.NUMBERS_BEGIN && t.type < TokenType.NUMBERS_END)
* writeSpan("num", t.value);
* else if (t.type > TokenType.OPERATORS_BEGIN && t.type < TokenType.OPERATORS_END)
* writeSpan("op", t.value);
* else
* stdout.write(t.value.replace("<", "&lt;"));
* }
* stdout.writeln("</pre>\n</body></html>");
* }
*
* void main(string[] args)
* {
* args[1].readText().byToken(IterationStyle.Everything, StringStyle.Source).highlight();
* }
* ---
*
* Copyright: Brian Schott 2013 * Copyright: Brian Schott 2013
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0) * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
* Authors: Brian Schott * Authors: Brian Schott
@ -143,7 +201,8 @@ TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = Iterati
} }
/** /**
* Range of tokens * Range of tokens. Avoid creating instances of this manually. Use
* $(DDOC_PSYMBOL byToken$(LPAREN)$(RPAREN)) instead, as it does some initialization work.
*/ */
class TokenRange(R) : InputRange!(Token) class TokenRange(R) : InputRange!(Token)
{ {
@ -235,7 +294,7 @@ class TokenRange(R) : InputRange!(Token)
private: private:
/** /*
* Advances the range to the next token * Advances the range to the next token
*/ */
void advance() void advance()
@ -474,199 +533,199 @@ enum TokenType: uint
{ {
// Operators // Operators
OPERATORS_BEGIN, /// OPERATORS_BEGIN, ///
Assign, /// = Assign, /// $(D_KEYWORD =)
At, /// @ At, /// $(D_KEYWORD @)
BitAnd, /// & BitAnd, /// $(D_KEYWORD &)
BitAndEquals, /// &= BitAndEquals, /// $(D_KEYWORD &=)
BitOr, /// | BitOr, /// $(D_KEYWORD |)
BitOrEquals, /// |= BitOrEquals, /// $(D_KEYWORD |=)
CatEquals, /// ~= CatEquals, /// $(D_KEYWORD ~=)
Colon, /// : Colon, /// $(D_KEYWORD :)
Comma, /// , Comma, /// $(D_KEYWORD ,)
Decrement, /// -- Decrement, /// $(D_KEYWORD --)
Div, /// / Div, /// $(D_KEYWORD /)
DivEquals, /// /= DivEquals, /// $(D_KEYWORD /=)
Dollar, /// $ Dollar, /// $(D_KEYWORD $)
Dot, /// . Dot, /// $(D_KEYWORD .)
Equals, /// == Equals, /// $(D_KEYWORD ==)
GoesTo, // => GoesTo, // =>
Greater, /// > Greater, /// $(D_KEYWORD >)
GreaterEqual, /// >= GreaterEqual, /// $(D_KEYWORD >=)
Hash, // # Hash, // $(D_KEYWORD #)
Increment, /// ++ Increment, /// $(D_KEYWORD ++)
LBrace, /// { LBrace, /// $(D_KEYWORD {)
LBracket, /// [ LBracket, /// $(D_KEYWORD [)
Less, /// < Less, /// $(D_KEYWORD <)
LessEqual, /// <= LessEqual, /// $(D_KEYWORD <=)
LessEqualGreater, // <>= LessEqualGreater, // $(D_KEYWORD <>=)
LessOrGreater, /// <> LessOrGreater, /// $(D_KEYWORD <>)
LogicAnd, /// && LogicAnd, /// $(D_KEYWORD &&)
LogicOr, /// || LogicOr, /// $(D_KEYWORD ||)
LParen, /// $(LPAREN) LParen, /// $(D_KEYWORD $(LPAREN))
Minus, /// - Minus, /// $(D_KEYWORD -)
MinusEquals, /// -= MinusEquals, /// $(D_KEYWORD -=)
Mod, /// % Mod, /// $(D_KEYWORD %)
ModEquals, /// %= ModEquals, /// $(D_KEYWORD %=)
MulEquals, /// *= MulEquals, /// $(D_KEYWORD *=)
Not, /// ! Not, /// $(D_KEYWORD !)
NotEquals, /// != NotEquals, /// $(D_KEYWORD !=)
NotGreater, /// !> NotGreater, /// $(D_KEYWORD !>)
NotGreaterEqual, /// !>= NotGreaterEqual, /// $(D_KEYWORD !>=)
NotLess, /// !< NotLess, /// $(D_KEYWORD !<)
NotLessEqual, /// !<= NotLessEqual, /// $(D_KEYWORD !<=)
NotLessEqualGreater, /// !<> NotLessEqualGreater, /// $(D_KEYWORD !<>)
Plus, /// + Plus, /// $(D_KEYWORD +)
PlusEquals, /// += PlusEquals, /// $(D_KEYWORD +=)
Pow, /// ^^ Pow, /// $(D_KEYWORD ^^)
PowEquals, /// ^^= PowEquals, /// $(D_KEYWORD ^^=)
RBrace, /// } RBrace, /// $(D_KEYWORD })
RBracket, /// ] RBracket, /// $(D_KEYWORD ])
RParen, /// $(RPAREN) RParen, /// $(D_KEYWORD $(RPAREN))
Semicolon, /// ; Semicolon, /// $(D_KEYWORD ;)
ShiftLeft, /// << ShiftLeft, /// $(D_KEYWORD <<)
ShiftLeftEqual, /// <<= ShiftLeftEqual, /// $(D_KEYWORD <<=)
ShiftRight, /// >> ShiftRight, /// $(D_KEYWORD >>)
ShiftRightEqual, /// >>= ShiftRightEqual, /// $(D_KEYWORD >>=)
Slice, // .. Slice, // ..
Star, /// * Star, /// $(D_KEYWORD *)
Ternary, /// ? Ternary, /// $(D_KEYWORD ?)
Tilde, /// ~ Tilde, /// $(D_KEYWORD ~)
Unordered, /// !<>= Unordered, /// $(D_KEYWORD !<>=)
UnsignedShiftRight, /// >>> UnsignedShiftRight, /// $(D_KEYWORD >>>)
UnsignedShiftRightEqual, /// >>>= UnsignedShiftRightEqual, /// $(D_KEYWORD >>>=)
Vararg, /// ... Vararg, /// $(D_KEYWORD ...)
Xor, /// ^ Xor, /// $(D_KEYWORD ^)
XorEquals, /// ^= XorEquals, /// $(D_KEYWORD ^=)
OPERATORS_END, /// OPERATORS_END, ///
// Keywords // Keywords
KEYWORDS_BEGIN, /// KEYWORDS_BEGIN, ///
TYPES_BEGIN, /// TYPES_BEGIN, ///
Bool, /// bool Bool, /// $(D_KEYWORD bool)
Byte, /// byte Byte, /// $(D_KEYWORD byte)
Cdouble, /// cdouble Cdouble, /// $(D_KEYWORD cdouble)
Cent, /// cent Cent, /// $(D_KEYWORD cent)
Cfloat, /// cfloat Cfloat, /// $(D_KEYWORD cfloat)
Char, /// char Char, /// $(D_KEYWORD char)
Creal, /// creal Creal, /// $(D_KEYWORD creal)
Dchar, /// dchar Dchar, /// $(D_KEYWORD dchar)
Double, /// double Double, /// $(D_KEYWORD double)
DString, /// dstring DString, /// $(D_KEYWORD dstring)
Float, /// float Float, /// $(D_KEYWORD float)
Function, /// function Function, /// $(D_KEYWORD function)
Idouble, /// idouble Idouble, /// $(D_KEYWORD idouble)
Ifloat, /// ifloat Ifloat, /// $(D_KEYWORD ifloat)
Int, /// int Int, /// $(D_KEYWORD int)
Ireal, /// ireal Ireal, /// $(D_KEYWORD ireal)
Long, /// long Long, /// $(D_KEYWORD long)
Real, /// real Real, /// $(D_KEYWORD real)
Short, /// short Short, /// $(D_KEYWORD short)
String, /// string String, /// $(D_KEYWORD string)
Ubyte, /// ubyte Ubyte, /// $(D_KEYWORD ubyte)
Ucent, /// ucent Ucent, /// $(D_KEYWORD ucent)
Uint, /// uint Uint, /// $(D_KEYWORD uint)
Ulong, /// ulong Ulong, /// $(D_KEYWORD ulong)
Ushort, /// ushort Ushort, /// $(D_KEYWORD ushort)
Void, /// void Void, /// $(D_KEYWORD void)
Wchar, /// wchar Wchar, /// $(D_KEYWORD wchar)
WString, /// wstring WString, /// $(D_KEYWORD wstring)
TYPES_END, /// TYPES_END, ///
ATTRIBUTES_BEGIN, /// ATTRIBUTES_BEGIN, ///
Align, /// align Align, /// $(D_KEYWORD align)
Deprecated, /// deprecated Deprecated, /// $(D_KEYWORD deprecated)
Extern, /// extern Extern, /// $(D_KEYWORD extern)
Pragma, /// pragma Pragma, /// $(D_KEYWORD pragma)
PROTECTION_BEGIN, /// PROTECTION_BEGIN, ///
Export, /// export Export, /// $(D_KEYWORD export)
Package, /// package Package, /// $(D_KEYWORD package)
Private, /// private Private, /// $(D_KEYWORD private)
Protected, /// protected Protected, /// $(D_KEYWORD protected)
Public, /// public Public, /// $(D_KEYWORD public)
PROTECTION_END, /// PROTECTION_END, ///
Abstract, /// abstract Abstract, /// $(D_KEYWORD abstract)
Auto, /// auto Auto, /// $(D_KEYWORD auto)
Const, /// const Const, /// $(D_KEYWORD const)
Final, /// final Final, /// $(D_KEYWORD final)
Gshared, /// __gshared Gshared, /// $(D_KEYWORD __gshared)
Immutable, // immutable Immutable, // immutable
Inout, // inout Inout, // inout
Scope, /// scope Scope, /// $(D_KEYWORD scope)
Shared, // shared Shared, // shared
Static, /// static Static, /// $(D_KEYWORD static)
Synchronized, /// synchronized Synchronized, /// $(D_KEYWORD synchronized)
ATTRIBUTES_END, /// ATTRIBUTES_END, ///
Alias, /// alias Alias, /// $(D_KEYWORD alias)
Asm, /// asm Asm, /// $(D_KEYWORD asm)
Assert, /// assert Assert, /// $(D_KEYWORD assert)
Body, /// body Body, /// $(D_KEYWORD body)
Break, /// break Break, /// $(D_KEYWORD break)
Case, /// case Case, /// $(D_KEYWORD case)
Cast, /// cast Cast, /// $(D_KEYWORD cast)
Catch, /// catch Catch, /// $(D_KEYWORD catch)
Class, /// class Class, /// $(D_KEYWORD class)
Continue, /// continue Continue, /// $(D_KEYWORD continue)
Debug, /// debug Debug, /// $(D_KEYWORD debug)
Default, /// default Default, /// $(D_KEYWORD default)
Delegate, /// delegate Delegate, /// $(D_KEYWORD delegate)
Delete, /// delete Delete, /// $(D_KEYWORD delete)
Do, /// do Do, /// $(D_KEYWORD do)
Else, /// else Else, /// $(D_KEYWORD else)
Enum, /// enum Enum, /// $(D_KEYWORD enum)
False, /// false False, /// $(D_KEYWORD false)
Finally, /// finally Finally, /// $(D_KEYWORD finally)
Foreach, /// foreach Foreach, /// $(D_KEYWORD foreach)
Foreach_reverse, /// foreach_reverse Foreach_reverse, /// $(D_KEYWORD foreach_reverse)
For, /// for For, /// $(D_KEYWORD for)
Goto, /// goto Goto, /// $(D_KEYWORD goto)
If, /// if If, /// $(D_KEYWORD if)
Import, /// import Import, /// $(D_KEYWORD import)
In, /// in In, /// $(D_KEYWORD in)
Interface, /// interface Interface, /// $(D_KEYWORD interface)
Invariant, /// invariant Invariant, /// $(D_KEYWORD invariant)
Is, /// is Is, /// $(D_KEYWORD is)
Lazy, /// lazy Lazy, /// $(D_KEYWORD lazy)
Macro, /// macro Macro, /// $(D_KEYWORD macro)
Mixin, /// mixin Mixin, /// $(D_KEYWORD mixin)
Module, /// module Module, /// $(D_KEYWORD module)
New, /// new New, /// $(D_KEYWORD new)
Nothrow, /// nothrow Nothrow, /// $(D_KEYWORD nothrow)
Null, /// null Null, /// $(D_KEYWORD null)
Out, /// out Out, /// $(D_KEYWORD out)
Override, /// override Override, /// $(D_KEYWORD override)
Pure, /// pure Pure, /// $(D_KEYWORD pure)
Ref, /// ref Ref, /// $(D_KEYWORD ref)
Return, /// return Return, /// $(D_KEYWORD return)
Struct, /// struct Struct, /// $(D_KEYWORD struct)
Super, /// super Super, /// $(D_KEYWORD super)
Switch, /// switch Switch, /// $(D_KEYWORD switch)
Template, /// template Template, /// $(D_KEYWORD template)
This, /// this This, /// $(D_KEYWORD this)
Throw, /// throw Throw, /// $(D_KEYWORD throw)
True, /// true True, /// $(D_KEYWORD true)
Try, /// try Try, /// $(D_KEYWORD try)
Typedef, /// typedef Typedef, /// $(D_KEYWORD typedef)
Typeid, /// typeid Typeid, /// $(D_KEYWORD typeid)
Typeof, /// typeof Typeof, /// $(D_KEYWORD typeof)
Union, /// union Union, /// $(D_KEYWORD union)
Unittest, /// unittest Unittest, /// $(D_KEYWORD unittest)
Version, /// version Version, /// $(D_KEYWORD version)
Volatile, /// volatile Volatile, /// $(D_KEYWORD volatile)
While, /// while While, /// $(D_KEYWORD while)
With, /// with With, /// $(D_KEYWORD with)
KEYWORDS_END, /// KEYWORDS_END, ///
// Constants // Constants
CONSTANTS_BEGIN, /// CONSTANTS_BEGIN, ///
File, /// __FILE__ File, /// $(D_KEYWORD __FILE__)
Line, /// __LINE__ Line, /// $(D_KEYWORD __LINE__)
Thread, /// __thread Thread, /// $(D_KEYWORD __thread)
Traits, /// __traits Traits, /// $(D_KEYWORD __traits)
CONSTANTS_END, /// CONSTANTS_END, ///
// Misc // Misc
MISC_BEGIN, /// MISC_BEGIN, ///
Comment, /// /** comment */ or // comment or ///comment Comment, /// $(D_COMMENT /** comment */) or $(D_COMMENT // comment) or $(D_COMMENT ///comment)
Identifier, /// anything else Identifier, /// anything else
ScriptLine, // Line at the beginning of source file that starts from #! ScriptLine, // Line at the beginning of source file that starts from #!
Whitespace, /// whitespace Whitespace, /// whitespace
@ -677,7 +736,7 @@ enum TokenType: uint
LITERALS_BEGIN, /// LITERALS_BEGIN, ///
NUMBERS_BEGIN, /// NUMBERS_BEGIN, ///
DoubleLiteral, /// 123.456 DoubleLiteral, /// 123.456
FloatLiteral, /// 123.456f or 0x123_45p-af FloatLiteral, /// 123.456f or 0x123_45p-3
IDoubleLiteral, /// 123.456i IDoubleLiteral, /// 123.456i
IFloatLiteral, /// 123.456fi IFloatLiteral, /// 123.456fi
IntLiteral, /// 123 or 0b1101010101 IntLiteral, /// 123 or 0b1101010101
@ -688,9 +747,9 @@ enum TokenType: uint
UnsignedLongLiteral, /// 123uL UnsignedLongLiteral, /// 123uL
NUMBERS_END, /// NUMBERS_END, ///
STRINGS_BEGIN, /// STRINGS_BEGIN, ///
DStringLiteral, /// "32-bit character string"d DStringLiteral, /// $(D_STRING "32-bit character string"d)
StringLiteral, /// "a string" StringLiteral, /// $(D_STRING "an 8-bit string")
WStringLiteral, /// "16-bit character string"w WStringLiteral, /// $(D_STRING "16-bit character string"w)
STRINGS_END, /// STRINGS_END, ///
LITERALS_END, /// LITERALS_END, ///
} }