From 02af7dee46f84b1e6f4c75cc4715b96a86640982 Mon Sep 17 00:00:00 2001 From: Basile Burg Date: Sun, 10 Apr 2016 04:44:08 +0200 Subject: [PATCH] lexer (HL), added a perfect dictionnary for the straight keywords --- lazproj/coedit.lpi | 5 -- src/ce_d2syn.pas | 97 +--------------------------------- src/ce_dlangmaps.pas | 120 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 114 insertions(+), 108 deletions(-) diff --git a/lazproj/coedit.lpi b/lazproj/coedit.lpi index a75ec624..f71a883f 100644 --- a/lazproj/coedit.lpi +++ b/lazproj/coedit.lpi @@ -401,11 +401,6 @@ - - - - - diff --git a/src/ce_d2syn.pas b/src/ce_d2syn.pas index b1737bf7..b035ef41 100644 --- a/src/ce_d2syn.pas +++ b/src/ce_d2syn.pas @@ -9,52 +9,8 @@ uses SynEditHighlighter, SynEditHighlighterFoldBase, SynEditTypes, ce_dlangutils,ce_dlangmaps; -const - - D2Kw: array[0..107] of string = - ( 'abstract', 'alias', 'align', 'asm', 'assert', 'auto', - 'body', 'bool', 'break', 'byte', - 'case', 'cast', 'catch', 'cdouble', 'cent', 'cfloat', 'char', 'class', - 'const', 'continue', 'creal', - 'dchar', 'debug', 'default', 'delegate', 'delete', 'deprecated', 'do', 'double', 'dstring', - 'else', 'enum', 'export', 'extern', - 'false', 'final', 'finally', 'float', 'for', 'foreach', - 'foreach_reverse', 'function', - 'goto', '__gshared', - 'idouble', 'if', 'ifloat', 'immutable', 'import', 'in', 'inout', 'int', - 'interface', 'invariant', 'ireal', 'is', - 'lazy', 'long', - 'macro', 'mixin', 'module', - 'new', 'nothrow', 'null', - 'out', 'override', - 'package', 'pragma', 'private', 'protected', 'ptrdiff_t', 'public', 'pure', - 'real', 'ref', 'return', - 'size_t', 'scope', 'shared', 'short', 'static', 'string', 'struct', - 'super', 'switch', 'synchronized', - 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typeof', - 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'unittest', 'ushort', - 'version', 'void', 'volatile', - 'wchar', 'while', 'with', 'wstring' - ); - type - TD2DictionaryEntry = record - filled: Boolean; - values: array of string; - end; - - TD2Dictionary = object - private - fLongest, fShortest: NativeInt; - fEntries: array[Byte] of TD2DictionaryEntry; - function toHash(const aValue: string): Byte; {$IFNDEF DEBUG}inline;{$ENDIF} - procedure addEntry(const aValue: string); - public - constructor create(from: array of string); - function find(const aValue: string): boolean; {$IFNDEF DEBUG}inline;{$ENDIF} - end; - TTokenKind = (tkCommt, tkIdent, tkKeywd, tkStrng, tkBlank, tkSymbl, tkNumbr, tkDDocs, tkSpecK, tkError, tkAsmbl); @@ -98,7 +54,6 @@ type fAsblrAttrib: TSynHighlighterAttributes; fSpeckAttrib: TSynHighlighterAttributes; fErrorAttrib: TSynHighlighterAttributes; - fKeyWords: TD2Dictionary; fLineBuf: string; fTokStart, fTokStop: Integer; fTokKind: TTokenKind; @@ -155,54 +110,6 @@ type implementation -constructor TD2Dictionary.create(from: array of string); -var - value: string; -begin - for value in from do - addEntry(value); -end; - -{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF} -function TD2Dictionary.toHash(const aValue: string): Byte; -var - i: Integer; -begin - result := 0; - for i := 1 to length(aValue) do - result += (Byte(aValue[i]) shl (4 and (1-i))) xor 25; -end; -{$IFDEF DEBUG}{$POP}{$ENDIF} - -procedure TD2Dictionary.addEntry(const aValue: string); -var - hash: Byte; -begin - if find(aValue) then exit; - hash := toHash(aValue); - fEntries[hash].filled := true; - setLength(fEntries[hash].values, length(fEntries[hash].values) + 1); - fEntries[hash].values[high(fEntries[hash].values)] := aValue; - if fLongest <= length(aValue) then - fLongest := length(aValue); - if fShortest >= length(aValue) then - fShortest := length(aValue); -end; - -function TD2Dictionary.find(const aValue: string): boolean; -var - hash: Byte; - i: NativeInt; -begin - result := false; - if length(aValue) > fLongest then exit; - if length(aValue) < fShortest then exit; - hash := toHash(aValue); - if (not fEntries[hash].filled) then exit(false); - for i:= 0 to high(fEntries[hash].values) do - if fEntries[hash].values[i] = aValue then exit(true); -end; - procedure TSynD2SynRange.Assign(Src: TSynCustomHighlighterRange); var src_t: TSynD2SynRange; @@ -268,8 +175,6 @@ begin DefaultFilter:= 'D source|*.d|D interface|*.di'; - fKeyWords.create(D2Kw); - fFoldKinds := [fkBrackets,fkRegion]; WordBreakChars := WordBreakChars - ['@']; @@ -986,7 +891,7 @@ begin if isSymbol(reader^) then break; if isOperator1(reader^) then break; end; - if fKeyWords.find(fLineBuf[FTokStart..fTokStop-1]) then + if keywordsMap.match(fLineBuf[FTokStart..fTokStop-1]) then begin fTokKind := tkKeywd; if (fLineBuf[FTokStart..fTokStop-1] = 'asm') then diff --git a/src/ce_dlangmaps.pas b/src/ce_dlangmaps.pas index cfdb528b..c0b2b129 100644 --- a/src/ce_dlangmaps.pas +++ b/src/ce_dlangmaps.pas @@ -4,12 +4,13 @@ unit ce_dlangmaps; interface -(** - * Perfect static hash-map that detects the D2 "special" keywords such as - * __LINE__ or __FILE__. - *) + type + (** + * Perfect static hash-map that detects the D2 "special" keywords such as + * __LINE__ or __FILE__. + *) specialKeywordsMap = record private const fWords: array [0..15] of string = @@ -42,9 +43,90 @@ type 164, 80, 112, 61, 157, 26, 224, 53, 123, 105, 27, 170, 126, 101, 3, 65, 113, 101, 157, 109, 110, 252, 207, 0 ); - class function hash(const w: string): Byte; static; + class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF} public - class function match(const w: string): boolean; static; + class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF} + end; + + (** + * Perfect static hash-map that detects the "straight" D2 keywords plus a few + * exception for the library types related to immutable strings. + *) + keywordsMap = record + private + const fWords: array [0..255] of string = + ( + '', '', 'scope', '', 'creal', '', '', '', '', '', '', '', '', '', 'delegate', + '', 'dstring', '', 'override', '', '', '', 'is', 'while', 'asm', '', '', + '', 'struct', '', 'cast', '', '', '', 'long', '', '', 'wstring', '', '', + 'super', 'else', 'real', '', '', '', '', 'mixin', '', '', '', '', '', 'align', + '', 'dchar', '', '__vector', '', 'bool', '', '', '', '', '', 'unittest', + 'ireal', '', '', '', 'nothrow', 'pragma', '', 'null', '', 'do', '', 'cfloat', + 'cent', '', '', 'true', '', '', 'macro', 'enum', '', '', '', 'immutable', '', + '', 'private', 'interface', '', 'foreach_reverse', '', '', 'delete', '', '', + 'abstract', 'template', '', '', 'idouble', 'volatile', '', '', 'alias', 'version', + 'char', 'catch', '', '__traits', 'break', '', 'byte', '', '', 'short', '', + 'typeid', 'assert', '', 'goto', '', '', 'protected', '', 'this', '', '', '', + 'default', '', '', '', 'deprecated', '', 'uint', '', '', 'false', '', '', '', + 'ushort', '', '', 'class', '', '', '', 'ref', '', 'if', 'typeof', 'try', '', + '', 'return', 'void', '', 'throw', '', '', 'pure', 'static', '', 'export', '', + '', 'typedef', 'ucent', 'finally', '', 'union', 'lazy', '', '', 'with', 'case', + 'body', '__parameters', '', 'float', '', '', 'invariant', '', '', 'string', 'new', + 'ulong', '', '', '', 'function', 'inout', '', '', '', 'switch', '', 'int', '', + 'wchar', 'module', '', '', '', '', '', '', 'import', 'for', '', '', '', '', '', + '', 'public', '__gshared', 'shared', 'const', '', 'final', 'foreach', '', + 'ifloat', 'out', 'synchronized', '', 'continue', '', '', 'extern', 'package', + '', 'in', '', '', '', 'debug', '', '', 'double', '', '', 'cdouble', '', 'ubyte', + 'auto', '' + ); + const fHasEntry: array [0..255] of boolean = + ( + false, false, true, false, true, false, false, false, false, false, false, + false, false, false, true, false, true, false, true, false, false, false, + true, true, true, false, false, false, true, false, true, false, false, + false, true, false, false, true, false, false, true, true, true, false, + false, false, false, true, false, false, false, false, false, true, false, + true, false, true, false, true, false, false, false, false, false, true, + true, false, false, false, true, true, false, true, false, true, false, + true, true, false, false, true, false, false, true, true, false, false, + false, true, false, false, true, true, false, true, false, false, true, + false, false, true, true, false, false, true, true, false, false, true, + true, true, true, false, true, true, false, true, false, false, true, false, + true, true, false, true, false, false, true, false, true, false, false, false, + true, false, false, false, true, false, true, false, false, true, false, false, + false, true, false, false, true, false, false, false, true, false, true, true, + true, false, false, true, true, false, true, false, false, true, true, false, + true, false, false, true, true, true, false, true, true, false, false, true, + true, true, true, false, true, false, false, true, false, false, true, true, + true, false, false, false, true, true, false, false, false, true, false, true, + false, true, true, false, false, false, false, false, false, true, true, false, + false, false, false, false, false, true, true, true, true, false, true, true, + false, true, true, true, false, true, false, false, true, true, false, true, + false, false, false, true, false, false, true, false, false, true, false, true, + true, false + ); + const fCoeffs: array[0..255] of Byte = + ( + 52, 97, 140, 119, 15, 140, 19, 72, 97, 210, 250, 188, 57, 103, 183, 37, 46, + 56, 13, 166, 218, 23, 103, 109, 208, 28, 53, 198, 197, 249, 112, 136, 245, + 167, 160, 217, 160, 35, 91, 70, 207, 80, 9, 131, 0, 102, 137, 201, 201, 236, + 161, 10, 120, 104, 42, 66, 179, 30, 76, 137, 43, 160, 178, 192, 113, 214, + 208, 213, 9, 226, 182, 248, 107, 4, 227, 0, 44, 168, 54, 135, 93, 54, 179, + 49, 127, 36, 114, 213, 191, 59, 205, 253, 99, 47, 4, 33, 105, 152, 134, 204, + 63, 7, 38, 110, 46, 227, 60, 136, 193, 218, 165, 122, 168, 156, 239, 143, + 255, 233, 189, 244, 39, 50, 219, 95, 8, 219, 231, 44, 104, 114, 59, 90, 240, + 28, 50, 39, 90, 144, 70, 15, 57, 53, 198, 219, 126, 49, 14, 100, 75, 215, + 90, 208, 147, 57, 240, 103, 141, 183, 65, 51, 14, 246, 49, 5, 102, 33, 156, + 122, 135, 160, 212, 193, 195, 133, 86, 74, 182, 187, 115, 239, 64, 161, 16, + 112, 28, 82, 18, 112, 139, 9, 250, 117, 16, 34, 40, 223, 113, 158, 26, 230, + 2, 218, 158, 134, 136, 14, 156, 53, 193, 237, 238, 162, 75, 230, 241, 211, + 140, 154, 137, 22, 193, 112, 118, 231, 220, 130, 151, 229, 78, 62, 21, 253, + 30, 161, 223, 3, 220, 125, 140, 243, 86, 180, 166, 127, 40, 156, 212, 44, + 104, 140, 251, 36, 211, 254, 77, 25 + ); + class function hash(const w: string): Byte; static; {$IFNDEF DEBUG}inline;{$ENDIF} + public + class function match(const w: string): boolean; static; {$IFNDEF DEBUG}inline;{$ENDIF} end; @@ -67,7 +149,31 @@ var h: Byte; begin result := false; - if length(w) < 7 then + if (length(w) < 7) or (length(w) > 19) then + exit; + h := hash(w); + if fHasEntry[h] then + result := fWords[h] = w; +end; + +{$IFDEF DEBUG}{$PUSH}{$R-}{$ENDIF} +class function keywordsMap.hash(const w: string): Byte; +var + i: integer; +begin + Result := 0; + for i := 2 to length(w) do + Result += fCoeffs[(Byte(w[i]) + (Byte(i-1) xor Byte(w[i-1]))) and $FF]; + Result := Result and $FF; +end; +{$IFDEF DEBUG}{$POP}{$ENDIF} + +class function keywordsMap.match(const w: string): boolean; +var + h: Byte; +begin + result := false; + if (length(w) < 2) or (length(w) > 15) then exit; h := hash(w); if fHasEntry[h] then