From acfd44eea8d306dceb75d1feaf807b743eb85c75 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Mon, 3 Feb 2014 01:06:23 -0800 Subject: [PATCH] Delete lexer.html --- stdx/lexer.html | 483 ------------------------------------------------ 1 file changed, 483 deletions(-) delete mode 100644 stdx/lexer.html diff --git a/stdx/lexer.html b/stdx/lexer.html deleted file mode 100644 index 8881aa0..0000000 --- a/stdx/lexer.html +++ /dev/null @@ -1,483 +0,0 @@ -

stdx.lexer

-This module contains a range-based lexer generator. -

-The lexer generator consists of a template mixin, Lexer, along with several - helper templates for generating such things as token identifiers. -

- - To generate a lexer using this API, several constants must be supplied: -
staticTokens
-
A listing of the tokens whose exact value never changes and which cannot - possibly be a token handled by the default token lexing function. The - most common example of this kind of token is an operator such as "*", or - "-" in a programming language.
-
dynamicTokens
-
A listing of tokens whose value is variable, such as whitespace, - identifiers, number literals, and string literals.
-
possibleDefaultTokens
-
A listing of tokens that could posibly be one of the tokens handled by - the default token handling function. An common example of this is - a keyword such as "for", which looks like the beginning of - the identifier "fortunate". isSeparating is called to - determine if the character after the 'r' separates the - identifier, indicating that the token is "for", or if lexing - should be turned over to the defaultTokenFunction.
-
tokenHandlers
-
A mapping of prefixes to custom token handling function names. The - generated lexer will search for the even-index elements of this array, - and then call the function whose name is the element immedately after the - even-indexed element. This is used for lexing complex tokens whose prefix - is fixed.
-
-

- - Here are some example constants for a simple calculator lexer: -
// There are a near infinite number of valid number literals, so numbers are
-// dynamic tokens.
-enum string[] dynamicTokens = ["numberLiteral", "whitespace"];
-
-// The operators are always the same, and cannot start a numberLiteral, so
-// they are staticTokens
-enum string[] staticTokens = ["-", "+", "*", "/"];
-
-// In this simple example there are no keywords or other tokens that could
-// look like dynamic tokens, so this is blank.
-enum string[] possibleDefaultTokens = [];
-
-// If any whitespace character or digit is encountered, pass lexing over to
-// our custom handler functions. These will be demonstrated in an example
-// later on.
-enum string[] tokenHandlers = [
-    "0", "lexNumber",
-    "1", "lexNumber",
-    "2", "lexNumber",
-    "3", "lexNumber",
-    "4", "lexNumber",
-    "5", "lexNumber",
-    "6", "lexNumber",
-    "7", "lexNumber",
-    "8", "lexNumber",
-    "9", "lexNumber",
-    " ", "lexWhitespace",
-    "\n", "lexWhitespace",
-    "\t", "lexWhitespace",
-    "\r", "lexWhitespace"
-];
-
- -

-Examples:
-

-License:
License 1.0 -

-Authors:
Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu -

-Source:
-std/lexer.d

- -
template TokenIdType(alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)
-
Template for determining the type used for a token type. Selects the smallest - unsigned integral type that is able to hold the value - staticTokens.length + dynamicTokens.length. For example if there are 20 - static tokens, 30 dynamic tokens, and 10 possible default tokens, this - template will alias itself to ubyte, as 20 + 30 + 10 < ubyte.max. -

-Examples:
// In our calculator example this means that IdType is an alias for ubyte.
-alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
-
-

- -
-
@property string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens)(IdType type); -
-
Looks up the string representation of the given token type. This is the - opposite of the function of the TokenId template. -

-Parameters: - -
IdType typethe token type identifier

-Examples:
alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
-assert (str(tok!"*") == "*");
-
-

-See Also:
TokenId

- -
-
template TokenId(IdType, alias staticTokens, alias dynamicTokens, alias possibleDefaultTokens, string symbol)
-
Generates the token type identifier for the given symbol. There are two - special cases: -
  • If symbol is "", then the token identifier will be 0
  • -
  • If symbol is "\0", then the token identifier will be the maximum - valid token type identifier
  • -
- In all cases this template will alias itself to a constant of type IdType. - This template will fail at compile time if symbol is not one of - the staticTokens, dynamicTokens, or possibleDefaultTokens. -

-Examples:
template tok(string symbol)
-{
-    alias tok = TokenId!(IdType, staticTokens, dynamicTokens,
-        possibleDefaultTokens, symbol);
-}
-// num and plus are of type ubyte.
-IdType plus = tok!"+";
-IdType num = tok!"numberLiteral";
-
-

- -
-
struct TokenStructure(IdType, string extraFields = ""); -
-
The token that is returned by the lexer. -

-Parameters: - - - -
IdTypeThe D type of the "type" token type field.
extraFieldsA string containing D code for any extra fields that should - be included in the token structure body. This string is passed - directly to a mixin statement.

-Examples:
// No extra struct fields are desired in this example, so leave it blank.
-alias Token = TokenStructure!(IdType, "");
-Token minusToken = Token(tok!"-");
-
-

- -
const pure nothrow @safe bool opEquals(IdType type); -
-
== overload for the the token type.

- -
-
this(IdType type); -
-
Constructs a token from a token type. -

-Parameters: - -
IdType typethe token type

- -
-
this(IdType type, string text, size_t line, size_t column, size_t index); -
-
Constructs a token. -

-Parameters: - - - - - - - - - -
IdType typethe token type
string textthe text of the token, which may be null
size_t linethe line number at which this token occurs
size_t columnthe column nmuber at which this token occurs
size_t indexthe byte offset from the beginning of the input at which this - token occurs

- -
-
string text; -
-
The text of the token.

- -
-
size_t line; -
-
The line number at which this token occurs.

- -
-
size_t column; -
-
The Column nmuber at which this token occurs.

- -
-
size_t index; -
-
The byte offset from the beginning of the input at which this token - occurs.

- -
-
IdType type; -
-
The token type.

- -
-
-
-
template Lexer(IDType, Token, alias defaultTokenFunction, alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, alias tokenHandlers, alias possibleDefaultTokens)
-
The implementation of the lexer is contained within this mixin template. - To use it, this template should be mixed in to a struct that represents the - lexer for your language. This struct should implement the following methods: -
  • popFront, which should call this mixin's popFront() and - additionally perform any token filtering or shuffling you deem - necessary. For example, you can implement popFront to skip comment or - tokens.
  • -
  • A function that serves as the default token lexing function. For - most languages this will be the identifier lexing function.
  • -
  • A function that is able to determine if an identifier/keyword has - come to an end. This function must retorn bool and take - a single size_t argument representing the number of - bytes to skip over before looking for a separating character.
  • -
  • Any functions referred to in the tokenHandlers template paramater. - These functions must be marked pure nothrow, take no - arguments, and return a token
  • -
  • A constructor that initializes the range field as well as calls - popFront() exactly once (to initialize the front field).
  • -
-

-Examples:
struct CalculatorLexer
-{
-    mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating,
-        staticTokens, dynamicTokens, tokenHandlers, possibleDefaultTokens);
-
-    this (ubyte[] bytes)
-    {
-        this.range = LexerRange(bytes);
-        popFront();
-    }
-
-    void popFront() pure
-    {
-        _popFront();
-    }
-
-    Token lexNumber() pure nothrow @safe
-    {
-        ...
-    }
-
-    Token lexWhitespace() pure nothrow @safe
-    {
-        ...
-    }
-
-    Token defaultTokenFunction() pure nothrow @safe
-    {
-        // There is no default token in the example calculator language, so
-        // this is always an error.
-        range.popFront();
-        return Token(tok!"");
-    }
-
-    bool isSeparating(size_t offset) pure nothrow @safe
-    {
-        // For this example language, always return true.
-        return true;
-    }
-}
-
-

- -
const pure nothrow @property const(Token) front(); -
-
Implements the range primitive front().

- -
-
const pure nothrow @property bool empty(); -
-
Implements the range primitive empty().

- -
-
LexerRange range; -
-
The lexer input.

- -
-
Token _front; -
-
The token that is currently at the front of the range.

- -
-
-
-
struct LexerRange; -
-
Range structure that wraps the lexer's input.

- -
pure nothrow @safe this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1); -
-
Parameters: - - - - - - - -
const(ubyte)[] bytesthe lexer input
size_t indexthe initial offset from the beginning of bytes
size_t columnthe initial column number
size_t linethe initial line number

- -
-
const pure nothrow @safe size_t mark(); -
-
Returns:
a mark at the current position that can then be used with slice.

- -
-
pure nothrow @safe void seek(size_t m); -
-
Sets the range to the given position -

-Parameters: - -
size_t mthe position to seek to

- -
-
const pure nothrow @safe const(ubyte)[] slice(size_t m); -
-
Returs a slice of the input byte array betwene the given mark and the - current position. - Params m = the beginning index of the slice to return

- -
-
const pure nothrow @safe bool empty(); -
-
Implements the range primitive empty.

- -
-
const pure nothrow @safe ubyte front(); -
-
Implements the range primitive front.

- -
-
const pure nothrow @safe const(ubyte)[] peek(size_t p); -
-
Returns:
the current item as well as the items p items ahead.

- -
-
const pure nothrow @safe ubyte peekAt(size_t offset); -
-

-
-
const pure nothrow @safe bool canPeek(size_t p); -
-
Returns:
true if it is possible to peek p bytes ahead.

- -
-
pure nothrow @safe void popFront(); -
-
Implements the range primitive popFront.

- -
-
pure nothrow @safe void popFrontN(size_t n); -
-
Implements the algorithm popFrontN more efficiently.

- -
-
pure nothrow @safe void incrementLine(); -
-
Increments the range's line number and resets the column counter.

- -
-
const(ubyte)[] bytes; -
-
The input bytes.

- -
-
size_t index; -
-
The range's current position.

- -
-
size_t column; -
-
The current column number.

- -
-
size_t line; -
-
The current line number.

- -
-
-
-
struct StringCache; -
-
The string cache implements a map/set for strings. Placing a string in the - cache returns an identifier that can be used to instantly access the stored - string. It is then possible to simply compare these indexes instead of - performing full string comparisons when comparing the string content of - dynamic tokens. The string cache also handles its own memory, so that mutable - ubyte[] to lexers can still have immutable string fields in their tokens. - Because the string cache also performs de-duplication it is possible to - drastically reduce the memory usage of a lexer.

- -
this(size_t bucketCount); -
-
Parameters: - -
size_t bucketCountthe initial number of buckets.

- -
-
pure nothrow @safe string cacheGet(const(ubyte[]) bytes); -
-
Equivalent to calling cache() and get(). -
StringCache cache;
-ubyte[] str = ['a', 'b', 'c'];
-string s = cache.get(cache.cache(str));
-assert(s == "abc");
-
-

- -
-
pure nothrow @safe string cacheGet(const(ubyte[]) bytes, uint hash); -
-
Equivalent to calling cache() and get().

- -
-
pure nothrow @safe size_t cache(const(ubyte)[] bytes); -
-
Caches a string. -

-Parameters: - -
const(ubyte)[] bytesthe string to cache

-Returns:
A key that can be used to retrieve the cached string -

-Examples:
StringCache cache;
-ubyte[] bytes = ['a', 'b', 'c'];
-size_t first = cache.cache(bytes);
-size_t second = cache.cache(bytes);
-assert (first == second);
-
-

- -
-
pure nothrow @safe size_t cache(const(ubyte)[] bytes, uint hash); -
-
Caches a string as above, but uses the given has code instead of - calculating one itself. Use this alongside hashStep() can reduce the - amount of work necessary when lexing dynamic tokens.

- -
-
const pure nothrow @safe string get(size_t index); -
-
Gets a cached string based on its key. -

-Parameters: - -
size_t indexthe key

-Returns:
the cached string

- -
-
static pure nothrow @safe uint hashStep(ubyte b, uint h); -
-
Incremental hashing. -

-Parameters: - - - -
ubyte bthe byte to add to the hash
uint hthe hash that has been calculated so far

-Returns:
the new hash code for the string.

- -
-
static int defaultBucketCount; -
-
The default bucket count for the string cache.

- -
-
-
-
- -

[top]