lexer (HL), added a perfect dictionnary for the straight keywords

This commit is contained in:
Basile Burg 2016-04-10 04:44:08 +02:00
parent 39f868ad7e
commit 02af7dee46
3 changed files with 114 additions and 108 deletions

View File

@ -401,11 +401,6 @@
<OtherUnitFiles Value="..\src;..\etc\fcl-json\src"/> <OtherUnitFiles Value="..\src;..\etc\fcl-json\src"/>
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
</SearchPaths> </SearchPaths>
<Parsing>
<SyntaxOptions>
<StaticKeyword Value="True"/>
</SyntaxOptions>
</Parsing>
<CodeGeneration> <CodeGeneration>
<SmartLinkUnit Value="True"/> <SmartLinkUnit Value="True"/>
<Optimizations> <Optimizations>

View File

@ -9,52 +9,8 @@ uses
SynEditHighlighter, SynEditHighlighterFoldBase, SynEditTypes, SynEditHighlighter, SynEditHighlighterFoldBase, SynEditTypes,
ce_dlangutils,ce_dlangmaps; ce_dlangutils,ce_dlangmaps;
const
D2Kw: array[0..107] of string =
( 'abstract', 'alias', 'align', 'asm', 'assert', 'auto',
'body', 'bool', 'break', 'byte',
'case', 'cast', 'catch', 'cdouble', 'cent', 'cfloat', 'char', 'class',
'const', 'continue', 'creal',
'dchar', 'debug', 'default', 'delegate', 'delete', 'deprecated', 'do', 'double', 'dstring',
'else', 'enum', 'export', 'extern',
'false', 'final', 'finally', 'float', 'for', 'foreach',
'foreach_reverse', 'function',
'goto', '__gshared',
'idouble', 'if', 'ifloat', 'immutable', 'import', 'in', 'inout', 'int',
'interface', 'invariant', 'ireal', 'is',
'lazy', 'long',
'macro', 'mixin', 'module',
'new', 'nothrow', 'null',
'out', 'override',
'package', 'pragma', 'private', 'protected', 'ptrdiff_t', 'public', 'pure',
'real', 'ref', 'return',
'size_t', 'scope', 'shared', 'short', 'static', 'string', 'struct',
'super', 'switch', 'synchronized',
'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typeof',
'ubyte', 'ucent', 'uint', 'ulong', 'union', 'unittest', 'ushort',
'version', 'void', 'volatile',
'wchar', 'while', 'with', 'wstring'
);
type type
TD2DictionaryEntry = record
filled: Boolean;
values: array of string;
end;
TD2Dictionary = object
private
fLongest, fShortest: NativeInt;
fEntries: array[Byte] of TD2DictionaryEntry;
function toHash(const aValue: string): Byte; {$IFNDEF DEBUG}inline;{$ENDIF}
procedure addEntry(const aValue: string);
public
constructor create(from: array of string);
function find(const aValue: string): boolean; {$IFNDEF DEBUG}inline;{$ENDIF}
end;
TTokenKind = (tkCommt, tkIdent, tkKeywd, tkStrng, tkBlank, tkSymbl, tkNumbr, TTokenKind = (tkCommt, tkIdent, tkKeywd, tkStrng, tkBlank, tkSymbl, tkNumbr,
tkDDocs, tkSpecK, tkError, tkAsmbl); tkDDocs, tkSpecK, tkError, tkAsmbl);
@ -98,7 +54,6 @@ type
fAsblrAttrib: TSynHighlighterAttributes; fAsblrAttrib: TSynHighlighterAttributes;
fSpeckAttrib: TSynHighlighterAttributes; fSpeckAttrib: TSynHighlighterAttributes;
fErrorAttrib: TSynHighlighterAttributes; fErrorAttrib: TSynHighlighterAttributes;
fKeyWords: TD2Dictionary;
fLineBuf: string; fLineBuf: string;
fTokStart, fTokStop: Integer; fTokStart, fTokStop: Integer;
fTokKind: TTokenKind; fTokKind: TTokenKind;
@ -155,54 +110,6 @@ type
implementation implementation
constructor TD2Dictionary.create(from: array of string);
var
value: string;
begin
for value in from do
addEntry(value);
end;
{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF}
function TD2Dictionary.toHash(const aValue: string): Byte;
var
i: Integer;
begin
result := 0;
for i := 1 to length(aValue) do
result += (Byte(aValue[i]) shl (4 and (1-i))) xor 25;
end;
{$IFDEF DEBUG}{$POP}{$ENDIF}
procedure TD2Dictionary.addEntry(const aValue: string);
var
hash: Byte;
begin
if find(aValue) then exit;
hash := toHash(aValue);
fEntries[hash].filled := true;
setLength(fEntries[hash].values, length(fEntries[hash].values) + 1);
fEntries[hash].values[high(fEntries[hash].values)] := aValue;
if fLongest <= length(aValue) then
fLongest := length(aValue);
if fShortest >= length(aValue) then
fShortest := length(aValue);
end;
function TD2Dictionary.find(const aValue: string): boolean;
var
hash: Byte;
i: NativeInt;
begin
result := false;
if length(aValue) > fLongest then exit;
if length(aValue) < fShortest then exit;
hash := toHash(aValue);
if (not fEntries[hash].filled) then exit(false);
for i:= 0 to high(fEntries[hash].values) do
if fEntries[hash].values[i] = aValue then exit(true);
end;
procedure TSynD2SynRange.Assign(Src: TSynCustomHighlighterRange); procedure TSynD2SynRange.Assign(Src: TSynCustomHighlighterRange);
var var
src_t: TSynD2SynRange; src_t: TSynD2SynRange;
@ -268,8 +175,6 @@ begin
DefaultFilter:= 'D source|*.d|D interface|*.di'; DefaultFilter:= 'D source|*.d|D interface|*.di';
fKeyWords.create(D2Kw);
fFoldKinds := [fkBrackets,fkRegion]; fFoldKinds := [fkBrackets,fkRegion];
WordBreakChars := WordBreakChars - ['@']; WordBreakChars := WordBreakChars - ['@'];
@ -986,7 +891,7 @@ begin
if isSymbol(reader^) then break; if isSymbol(reader^) then break;
if isOperator1(reader^) then break; if isOperator1(reader^) then break;
end; end;
if fKeyWords.find(fLineBuf[FTokStart..fTokStop-1]) then if keywordsMap.match(fLineBuf[FTokStart..fTokStop-1]) then
begin begin
fTokKind := tkKeywd; fTokKind := tkKeywd;
if (fLineBuf[FTokStart..fTokStop-1] = 'asm') then if (fLineBuf[FTokStart..fTokStop-1] = 'asm') then

View File

@ -4,12 +4,13 @@ unit ce_dlangmaps;
interface interface
(**
* Perfect static hash-map that detects the D2 "special" keywords such as
* __LINE__ or __FILE__.
*)
type type
(**
* Perfect static hash-map that detects the D2 "special" keywords such as
* __LINE__ or __FILE__.
*)
specialKeywordsMap = record specialKeywordsMap = record
private private
const fWords: array [0..15] of string = const fWords: array [0..15] of string =
@ -42,9 +43,90 @@ type
164, 80, 112, 61, 157, 26, 224, 53, 123, 105, 27, 170, 126, 101, 3, 65, 113, 164, 80, 112, 61, 157, 26, 224, 53, 123, 105, 27, 170, 126, 101, 3, 65, 113,
101, 157, 109, 110, 252, 207, 0 101, 157, 109, 110, 252, 207, 0
); );
class function hash(const w: string): Byte; static; class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF}
public public
class function match(const w: string): boolean; static; class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF}
end;
(**
* Perfect static hash-map that detects the "straight" D2 keywords plus a few
* exception for the library types related to immutable strings.
*)
keywordsMap = record
private
const fWords: array [0..255] of string =
(
'', '', 'scope', '', 'creal', '', '', '', '', '', '', '', '', '', 'delegate',
'', 'dstring', '', 'override', '', '', '', 'is', 'while', 'asm', '', '',
'', 'struct', '', 'cast', '', '', '', 'long', '', '', 'wstring', '', '',
'super', 'else', 'real', '', '', '', '', 'mixin', '', '', '', '', '', 'align',
'', 'dchar', '', '__vector', '', 'bool', '', '', '', '', '', 'unittest',
'ireal', '', '', '', 'nothrow', 'pragma', '', 'null', '', 'do', '', 'cfloat',
'cent', '', '', 'true', '', '', 'macro', 'enum', '', '', '', 'immutable', '',
'', 'private', 'interface', '', 'foreach_reverse', '', '', 'delete', '', '',
'abstract', 'template', '', '', 'idouble', 'volatile', '', '', 'alias', 'version',
'char', 'catch', '', '__traits', 'break', '', 'byte', '', '', 'short', '',
'typeid', 'assert', '', 'goto', '', '', 'protected', '', 'this', '', '', '',
'default', '', '', '', 'deprecated', '', 'uint', '', '', 'false', '', '', '',
'ushort', '', '', 'class', '', '', '', 'ref', '', 'if', 'typeof', 'try', '',
'', 'return', 'void', '', 'throw', '', '', 'pure', 'static', '', 'export', '',
'', 'typedef', 'ucent', 'finally', '', 'union', 'lazy', '', '', 'with', 'case',
'body', '__parameters', '', 'float', '', '', 'invariant', '', '', 'string', 'new',
'ulong', '', '', '', 'function', 'inout', '', '', '', 'switch', '', 'int', '',
'wchar', 'module', '', '', '', '', '', '', 'import', 'for', '', '', '', '', '',
'', 'public', '__gshared', 'shared', 'const', '', 'final', 'foreach', '',
'ifloat', 'out', 'synchronized', '', 'continue', '', '', 'extern', 'package',
'', 'in', '', '', '', 'debug', '', '', 'double', '', '', 'cdouble', '', 'ubyte',
'auto', ''
);
const fHasEntry: array [0..255] of boolean =
(
false, false, true, false, true, false, false, false, false, false, false,
false, false, false, true, false, true, false, true, false, false, false,
true, true, true, false, false, false, true, false, true, false, false,
false, true, false, false, true, false, false, true, true, true, false,
false, false, false, true, false, false, false, false, false, true, false,
true, false, true, false, true, false, false, false, false, false, true,
true, false, false, false, true, true, false, true, false, true, false,
true, true, false, false, true, false, false, true, true, false, false,
false, true, false, false, true, true, false, true, false, false, true,
false, false, true, true, false, false, true, true, false, false, true,
true, true, true, false, true, true, false, true, false, false, true, false,
true, true, false, true, false, false, true, false, true, false, false, false,
true, false, false, false, true, false, true, false, false, true, false, false,
false, true, false, false, true, false, false, false, true, false, true, true,
true, false, false, true, true, false, true, false, false, true, true, false,
true, false, false, true, true, true, false, true, true, false, false, true,
true, true, true, false, true, false, false, true, false, false, true, true,
true, false, false, false, true, true, false, false, false, true, false, true,
false, true, true, false, false, false, false, false, false, true, true, false,
false, false, false, false, false, true, true, true, true, false, true, true,
false, true, true, true, false, true, false, false, true, true, false, true,
false, false, false, true, false, false, true, false, false, true, false, true,
true, false
);
const fCoeffs: array[0..255] of Byte =
(
52, 97, 140, 119, 15, 140, 19, 72, 97, 210, 250, 188, 57, 103, 183, 37, 46,
56, 13, 166, 218, 23, 103, 109, 208, 28, 53, 198, 197, 249, 112, 136, 245,
167, 160, 217, 160, 35, 91, 70, 207, 80, 9, 131, 0, 102, 137, 201, 201, 236,
161, 10, 120, 104, 42, 66, 179, 30, 76, 137, 43, 160, 178, 192, 113, 214,
208, 213, 9, 226, 182, 248, 107, 4, 227, 0, 44, 168, 54, 135, 93, 54, 179,
49, 127, 36, 114, 213, 191, 59, 205, 253, 99, 47, 4, 33, 105, 152, 134, 204,
63, 7, 38, 110, 46, 227, 60, 136, 193, 218, 165, 122, 168, 156, 239, 143,
255, 233, 189, 244, 39, 50, 219, 95, 8, 219, 231, 44, 104, 114, 59, 90, 240,
28, 50, 39, 90, 144, 70, 15, 57, 53, 198, 219, 126, 49, 14, 100, 75, 215,
90, 208, 147, 57, 240, 103, 141, 183, 65, 51, 14, 246, 49, 5, 102, 33, 156,
122, 135, 160, 212, 193, 195, 133, 86, 74, 182, 187, 115, 239, 64, 161, 16,
112, 28, 82, 18, 112, 139, 9, 250, 117, 16, 34, 40, 223, 113, 158, 26, 230,
2, 218, 158, 134, 136, 14, 156, 53, 193, 237, 238, 162, 75, 230, 241, 211,
140, 154, 137, 22, 193, 112, 118, 231, 220, 130, 151, 229, 78, 62, 21, 253,
30, 161, 223, 3, 220, 125, 140, 243, 86, 180, 166, 127, 40, 156, 212, 44,
104, 140, 251, 36, 211, 254, 77, 25
);
class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF}
public
class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF}
end; end;
@ -67,7 +149,31 @@ var
h: Byte; h: Byte;
begin begin
result := false; result := false;
if length(w) < 7 then if (length(w) < 7) or (length(w) > 19) then
exit;
h := hash(w);
if fHasEntry[h] then
result := fWords[h] = w;
end;
{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF}
class function keywordsMap.hash(const w: string): Byte;
var
i: integer;
begin
Result := 0;
for i := 2 to length(w) do
Result += fCoeffs[(Byte(w[i]) + (Byte(i-1) xor Byte(w[i-1]))) and $FF];
Result := Result and $FF;
end;
{$IFDEF DEBUG}{$POP}{$ENDIF}
class function keywordsMap.match(const w: string): boolean;
var
h: Byte;
begin
result := false;
if (length(w) < 2) or (length(w) > 15) then
exit; exit;
h := hash(w); h := hash(w);
if fHasEntry[h] then if fHasEntry[h] then