lexer (HL), added a perfect dictionnary for the straight keywords

This commit is contained in:
Basile Burg 2016-04-10 04:44:08 +02:00
parent 39f868ad7e
commit 02af7dee46
3 changed files with 114 additions and 108 deletions

View File

@ -401,11 +401,6 @@
<OtherUnitFiles Value="..\src;..\etc\fcl-json\src"/>
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
<Parsing>
<SyntaxOptions>
<StaticKeyword Value="True"/>
</SyntaxOptions>
</Parsing>
<CodeGeneration>
<SmartLinkUnit Value="True"/>
<Optimizations>

View File

@ -9,52 +9,8 @@ uses
SynEditHighlighter, SynEditHighlighterFoldBase, SynEditTypes,
ce_dlangutils,ce_dlangmaps;
const
D2Kw: array[0..107] of string =
( 'abstract', 'alias', 'align', 'asm', 'assert', 'auto',
'body', 'bool', 'break', 'byte',
'case', 'cast', 'catch', 'cdouble', 'cent', 'cfloat', 'char', 'class',
'const', 'continue', 'creal',
'dchar', 'debug', 'default', 'delegate', 'delete', 'deprecated', 'do', 'double', 'dstring',
'else', 'enum', 'export', 'extern',
'false', 'final', 'finally', 'float', 'for', 'foreach',
'foreach_reverse', 'function',
'goto', '__gshared',
'idouble', 'if', 'ifloat', 'immutable', 'import', 'in', 'inout', 'int',
'interface', 'invariant', 'ireal', 'is',
'lazy', 'long',
'macro', 'mixin', 'module',
'new', 'nothrow', 'null',
'out', 'override',
'package', 'pragma', 'private', 'protected', 'ptrdiff_t', 'public', 'pure',
'real', 'ref', 'return',
'size_t', 'scope', 'shared', 'short', 'static', 'string', 'struct',
'super', 'switch', 'synchronized',
'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typeof',
'ubyte', 'ucent', 'uint', 'ulong', 'union', 'unittest', 'ushort',
'version', 'void', 'volatile',
'wchar', 'while', 'with', 'wstring'
);
type
TD2DictionaryEntry = record
filled: Boolean;
values: array of string;
end;
TD2Dictionary = object
private
fLongest, fShortest: NativeInt;
fEntries: array[Byte] of TD2DictionaryEntry;
function toHash(const aValue: string): Byte; {$IFNDEF DEBUG}inline;{$ENDIF}
procedure addEntry(const aValue: string);
public
constructor create(from: array of string);
function find(const aValue: string): boolean; {$IFNDEF DEBUG}inline;{$ENDIF}
end;
TTokenKind = (tkCommt, tkIdent, tkKeywd, tkStrng, tkBlank, tkSymbl, tkNumbr,
tkDDocs, tkSpecK, tkError, tkAsmbl);
@ -98,7 +54,6 @@ type
fAsblrAttrib: TSynHighlighterAttributes;
fSpeckAttrib: TSynHighlighterAttributes;
fErrorAttrib: TSynHighlighterAttributes;
fKeyWords: TD2Dictionary;
fLineBuf: string;
fTokStart, fTokStop: Integer;
fTokKind: TTokenKind;
@ -155,54 +110,6 @@ type
implementation
constructor TD2Dictionary.create(from: array of string);
var
value: string;
begin
for value in from do
addEntry(value);
end;
{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF}
function TD2Dictionary.toHash(const aValue: string): Byte;
var
i: Integer;
begin
result := 0;
for i := 1 to length(aValue) do
result += (Byte(aValue[i]) shl (4 and (1-i))) xor 25;
end;
{$IFDEF DEBUG}{$POP}{$ENDIF}
procedure TD2Dictionary.addEntry(const aValue: string);
var
hash: Byte;
begin
if find(aValue) then exit;
hash := toHash(aValue);
fEntries[hash].filled := true;
setLength(fEntries[hash].values, length(fEntries[hash].values) + 1);
fEntries[hash].values[high(fEntries[hash].values)] := aValue;
if fLongest <= length(aValue) then
fLongest := length(aValue);
if fShortest >= length(aValue) then
fShortest := length(aValue);
end;
function TD2Dictionary.find(const aValue: string): boolean;
var
hash: Byte;
i: NativeInt;
begin
result := false;
if length(aValue) > fLongest then exit;
if length(aValue) < fShortest then exit;
hash := toHash(aValue);
if (not fEntries[hash].filled) then exit(false);
for i:= 0 to high(fEntries[hash].values) do
if fEntries[hash].values[i] = aValue then exit(true);
end;
procedure TSynD2SynRange.Assign(Src: TSynCustomHighlighterRange);
var
src_t: TSynD2SynRange;
@ -268,8 +175,6 @@ begin
DefaultFilter:= 'D source|*.d|D interface|*.di';
fKeyWords.create(D2Kw);
fFoldKinds := [fkBrackets,fkRegion];
WordBreakChars := WordBreakChars - ['@'];
@ -986,7 +891,7 @@ begin
if isSymbol(reader^) then break;
if isOperator1(reader^) then break;
end;
if fKeyWords.find(fLineBuf[FTokStart..fTokStop-1]) then
if keywordsMap.match(fLineBuf[FTokStart..fTokStop-1]) then
begin
fTokKind := tkKeywd;
if (fLineBuf[FTokStart..fTokStop-1] = 'asm') then

View File

@ -4,12 +4,13 @@ unit ce_dlangmaps;
interface
type
(**
* Perfect static hash-map that detects the D2 "special" keywords such as
* __LINE__ or __FILE__.
*)
type
specialKeywordsMap = record
private
const fWords: array [0..15] of string =
@ -42,9 +43,90 @@ type
164, 80, 112, 61, 157, 26, 224, 53, 123, 105, 27, 170, 126, 101, 3, 65, 113,
101, 157, 109, 110, 252, 207, 0
);
class function hash(const w: string): Byte; static;
class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF}
public
class function match(const w: string): boolean; static;
class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF}
end;
(**
* Perfect static hash-map that detects the "straight" D2 keywords plus a few
* exception for the library types related to immutable strings.
*)
keywordsMap = record
private
const fWords: array [0..255] of string =
(
'', '', 'scope', '', 'creal', '', '', '', '', '', '', '', '', '', 'delegate',
'', 'dstring', '', 'override', '', '', '', 'is', 'while', 'asm', '', '',
'', 'struct', '', 'cast', '', '', '', 'long', '', '', 'wstring', '', '',
'super', 'else', 'real', '', '', '', '', 'mixin', '', '', '', '', '', 'align',
'', 'dchar', '', '__vector', '', 'bool', '', '', '', '', '', 'unittest',
'ireal', '', '', '', 'nothrow', 'pragma', '', 'null', '', 'do', '', 'cfloat',
'cent', '', '', 'true', '', '', 'macro', 'enum', '', '', '', 'immutable', '',
'', 'private', 'interface', '', 'foreach_reverse', '', '', 'delete', '', '',
'abstract', 'template', '', '', 'idouble', 'volatile', '', '', 'alias', 'version',
'char', 'catch', '', '__traits', 'break', '', 'byte', '', '', 'short', '',
'typeid', 'assert', '', 'goto', '', '', 'protected', '', 'this', '', '', '',
'default', '', '', '', 'deprecated', '', 'uint', '', '', 'false', '', '', '',
'ushort', '', '', 'class', '', '', '', 'ref', '', 'if', 'typeof', 'try', '',
'', 'return', 'void', '', 'throw', '', '', 'pure', 'static', '', 'export', '',
'', 'typedef', 'ucent', 'finally', '', 'union', 'lazy', '', '', 'with', 'case',
'body', '__parameters', '', 'float', '', '', 'invariant', '', '', 'string', 'new',
'ulong', '', '', '', 'function', 'inout', '', '', '', 'switch', '', 'int', '',
'wchar', 'module', '', '', '', '', '', '', 'import', 'for', '', '', '', '', '',
'', 'public', '__gshared', 'shared', 'const', '', 'final', 'foreach', '',
'ifloat', 'out', 'synchronized', '', 'continue', '', '', 'extern', 'package',
'', 'in', '', '', '', 'debug', '', '', 'double', '', '', 'cdouble', '', 'ubyte',
'auto', ''
);
const fHasEntry: array [0..255] of boolean =
(
false, false, true, false, true, false, false, false, false, false, false,
false, false, false, true, false, true, false, true, false, false, false,
true, true, true, false, false, false, true, false, true, false, false,
false, true, false, false, true, false, false, true, true, true, false,
false, false, false, true, false, false, false, false, false, true, false,
true, false, true, false, true, false, false, false, false, false, true,
true, false, false, false, true, true, false, true, false, true, false,
true, true, false, false, true, false, false, true, true, false, false,
false, true, false, false, true, true, false, true, false, false, true,
false, false, true, true, false, false, true, true, false, false, true,
true, true, true, false, true, true, false, true, false, false, true, false,
true, true, false, true, false, false, true, false, true, false, false, false,
true, false, false, false, true, false, true, false, false, true, false, false,
false, true, false, false, true, false, false, false, true, false, true, true,
true, false, false, true, true, false, true, false, false, true, true, false,
true, false, false, true, true, true, false, true, true, false, false, true,
true, true, true, false, true, false, false, true, false, false, true, true,
true, false, false, false, true, true, false, false, false, true, false, true,
false, true, true, false, false, false, false, false, false, true, true, false,
false, false, false, false, false, true, true, true, true, false, true, true,
false, true, true, true, false, true, false, false, true, true, false, true,
false, false, false, true, false, false, true, false, false, true, false, true,
true, false
);
const fCoeffs: array[0..255] of Byte =
(
52, 97, 140, 119, 15, 140, 19, 72, 97, 210, 250, 188, 57, 103, 183, 37, 46,
56, 13, 166, 218, 23, 103, 109, 208, 28, 53, 198, 197, 249, 112, 136, 245,
167, 160, 217, 160, 35, 91, 70, 207, 80, 9, 131, 0, 102, 137, 201, 201, 236,
161, 10, 120, 104, 42, 66, 179, 30, 76, 137, 43, 160, 178, 192, 113, 214,
208, 213, 9, 226, 182, 248, 107, 4, 227, 0, 44, 168, 54, 135, 93, 54, 179,
49, 127, 36, 114, 213, 191, 59, 205, 253, 99, 47, 4, 33, 105, 152, 134, 204,
63, 7, 38, 110, 46, 227, 60, 136, 193, 218, 165, 122, 168, 156, 239, 143,
255, 233, 189, 244, 39, 50, 219, 95, 8, 219, 231, 44, 104, 114, 59, 90, 240,
28, 50, 39, 90, 144, 70, 15, 57, 53, 198, 219, 126, 49, 14, 100, 75, 215,
90, 208, 147, 57, 240, 103, 141, 183, 65, 51, 14, 246, 49, 5, 102, 33, 156,
122, 135, 160, 212, 193, 195, 133, 86, 74, 182, 187, 115, 239, 64, 161, 16,
112, 28, 82, 18, 112, 139, 9, 250, 117, 16, 34, 40, 223, 113, 158, 26, 230,
2, 218, 158, 134, 136, 14, 156, 53, 193, 237, 238, 162, 75, 230, 241, 211,
140, 154, 137, 22, 193, 112, 118, 231, 220, 130, 151, 229, 78, 62, 21, 253,
30, 161, 223, 3, 220, 125, 140, 243, 86, 180, 166, 127, 40, 156, 212, 44,
104, 140, 251, 36, 211, 254, 77, 25
);
class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF}
public
class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF}
end;
@ -67,7 +149,31 @@ var
h: Byte;
begin
result := false;
if length(w) < 7 then
if (length(w) < 7) or (length(w) > 19) then
exit;
h := hash(w);
if fHasEntry[h] then
result := fWords[h] = w;
end;
{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF}
class function keywordsMap.hash(const w: string): Byte;
var
i: integer;
begin
Result := 0;
for i := 2 to length(w) do
Result += fCoeffs[(Byte(w[i]) + (Byte(i-1) xor Byte(w[i-1]))) and $FF];
Result := Result and $FF;
end;
{$IFDEF DEBUG}{$POP}{$ENDIF}
class function keywordsMap.match(const w: string): boolean;
var
h: Byte;
begin
result := false;
if (length(w) < 2) or (length(w) > 15) then
exit;
h := hash(w);
if fHasEntry[h] then