dexed/src/ce_dlang.pas

886 lines
21 KiB
Plaintext

unit ce_dlang;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, ce_dlangutils;
const
D2Kw: array[0..109] of string =
( 'abstract', 'alias', 'align', 'asm', 'assert', 'auto',
'body', 'bool', 'break', 'byte',
'case', 'cast', 'catch', 'cdouble', 'cent', 'cfloat', 'char', 'class',
'const', 'continue', 'creal',
'dchar', 'debug', 'default', 'delegate', 'delete', 'deprecated', 'do', 'double',
'else', 'enum', 'export', 'extern',
'false', 'final', 'finally', 'float', 'for', 'foreach',
'foreach_reverse', 'function',
'goto',
'idouble', 'if', 'ifloat', 'immutable', 'import', 'in', 'inout', 'int',
'interface', 'invariant', 'ireal', 'is',
'lazy', 'long',
'macro', 'mixin', 'module',
'new', 'nothrow', 'null',
'out', 'override',
'package', 'pragma', 'private', 'protected', 'ptrdiff_t', 'public', 'pure',
'real', 'ref', 'return',
'size_t', 'scope', 'shared', 'short', 'static', 'string', 'struct',
'super', 'switch', 'synchronized',
'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typeof',
'ubyte', 'ucent', 'uint', 'ulong', 'union', 'unittest', 'ushort',
'version', 'void', 'volatile',
'wchar', 'while', 'with',
'__FILE__', '__MODULE__', '__LINE__', '__FUNCTION__', '__PRETTY_FUNCTION__'
);
type
(**
* sector for an array of Keyword with a common hash.
*)
TD2DictionaryEntry = record
filled: Boolean;
values: array of string;
end;
(**
* Dictionary for the D2 keywords.
*)
TD2Dictionary = object
private
fLongest, fShortest: NativeInt;
fEntries: array[Byte] of TD2DictionaryEntry;
function toHash(const aValue: string): Byte; {$IFNDEF DEBUG}inline;{$ENDIF}
procedure addEntry(const aValue: string);
public
constructor create;
destructor destroy; // do not remove even if empty (compat with char-map version)
function find(const aValue: string): boolean;
end;
(**
* Represents the pointer in a source file.
* Automatically updates the line and the column.
*)
TReaderHead = object
private
fLineIndex: Integer;
fColumnIndex: Integer;
fAbsoluteIndex: Integer;
fReaderHead: PChar;
function getColAndLine: TPoint;
public
constructor create(const aText: PChar; const aColAndLine: TPoint);
procedure setReader(const aText: PChar; const aColAndLine: TPoint);
//
function next: PChar;
function previous: PChar;
//
property AbsoluteIndex: Integer read fAbsoluteIndex;
property LineIndex: Integer read fLineIndex;
property ColumnIndex: Integer read fColumnIndex;
property LineAnColumn: TPoint read getColAndLine;
//
property head: PChar read fReaderHead;
end;
TLexTokenKind = (ltkIllegal, ltkChar, ltkComment, ltkIdentifier, ltkKeyword,
ltkNumber, ltkOperator, ltkString, ltkSymbol);
const
LexTokenKindString : array[TLexTokenKind] of string =
( 'Illegal', 'Character', 'Comment', 'Identifier', 'Keyword',
'Number', 'Operator', 'String', 'Symbol');
type
(*****************************************************************************
* Lexer token
*)
PLexToken = ^TLexToken;
TLexToken = record
position: TPoint;
kind: TLexTokenKind;
data: string;
end;
(*****************************************************************************
* List of lexer tokens
*)
TLexTokenList = class(TList)
private
function getToken(index: integer): TLexToken;
public
procedure clear; override;
procedure addToken(aValue: PLexToken);
property token[index: integer]: TLexToken read getToken;
end;
TLexTokenEnumerator = class
fList: TLexTokenList;
fIndex: Integer;
function GetCurrent: TLexToken;
function MoveNext: Boolean;
property Current: TLexToken read GetCurrent;
end;
(*****************************************************************************
* Error record
*)
PLexError = ^TLexError;
TLexError = record
position: TPoint;
msg: string;
end;
(*****************************************************************************
* Error list
*)
TLexErrorList = class(TList)
private
function getError(index: integer): TLexError;
public
procedure clear; override;
procedure addError(aValue: PLexError);
property error[index: integer]: TLexError read getError;
end;
TLexErrorEnumerator = class
fList: TLexErrorList;
fIndex: Integer;
function GetCurrent: TLexError;
function MoveNext: Boolean;
property Current: TLexError read GetCurrent;
end;
operator enumerator(aTokenList: TLexTokenList): TLexTokenEnumerator;
operator enumerator(anErrorList: TLexErrorList): TLexErrorEnumerator;
(*****************************************************************************
* Lexes aText and fills aList with the TLexToken found.
*)
procedure lex(const aText: string; const aList: TLexTokenList);
(*****************************************************************************
* Detects various syntactic errors in a TLexTokenList
*)
procedure checkSyntacticErrors(const aTokenList: TLexTokenList; const anErrorList: TLexErrorList);
(*****************************************************************************
* Outputs the module name from a tokenized D source.
*)
function getModuleName(const aTokenList: TLexTokenList):string;
(*****************************************************************************
* Compares two TPoints.
*)
operator = (lhs: TPoint; rhs: TPoint): boolean;
implementation
var
D2Dictionary: TD2Dictionary;
{$REGION TReaderHead------------------------------------------------------------}
operator = (lhs: TPoint; rhs: TPoint): boolean;
begin
exit( (lhs.y = rhs.y) and (lhs.x = rhs.x) );
end;
constructor TReaderHead.create(const aText: PChar; const aColAndLine: TPoint);
begin
setReader(aText,aColAndLine);
end;
procedure TReaderHead.setReader(const aText: PChar; const aColAndLine: TPoint);
begin
fLineIndex := aColAndLine.y;
fColumnIndex := aColAndLine.x;
fReaderHead := aText;
while (LineAnColumn <> aColAndLine) do next;
//
// editor not 0 based ln index
if fLineIndex = 0 then fLineIndex := 1;
end;
function TReaderHead.getColAndLine: TPoint;
begin
exit( Point(fColumnIndex, fLineIndex) );
end;
function TReaderHead.next: PChar;
begin
Inc(fReaderHead);
Inc(fAbsoluteIndex);
Inc(fColumnIndex);
if (fReaderHead^ = #10) then
begin
Inc(fLineIndex);
fColumnIndex := 0;
end;
exit(fReaderHead);
end;
function TReaderHead.previous: PChar;
begin
// note: it breaks the column but not the line count
Dec(fReaderHead);
Dec(fColumnIndex);
Dec(fAbsoluteIndex);
exit(fReaderHead);
end;
{$ENDREGION}
{$REGION TD2Dictionary----------------------------------------------------------}
constructor TD2Dictionary.create;
var
value: string;
begin
for value in D2Kw do
addEntry(value);
end;
destructor TD2Dictionary.destroy;
begin
end;
{$IFDEF DEBUG}{$R-}{$ENDIF}
function TD2Dictionary.toHash(const aValue: string): Byte;
var
i: Integer;
begin
result := 0;
for i := 1 to length(aValue) do result +=
(Byte(aValue[i]) shl (4 and (1-i))) xor 25;
end;
{$IFDEF DEBUG}{$R+}{$ENDIF}
procedure TD2Dictionary.addEntry(const aValue: string);
var
hash: Byte;
begin
if find(aValue) then exit;
hash := toHash(aValue);
fEntries[hash].filled := true;
setLength(fEntries[hash].values, length(fEntries[hash].values) + 1);
fEntries[hash].values[high(fEntries[hash].values)] := aValue;
if fLongest <= length(aValue) then
fLongest := length(aValue);
if fShortest >= length(aValue) then
fShortest := length(aValue);
end;
function TD2Dictionary.find(const aValue: string): boolean;
var
hash: Byte;
i: NativeInt;
begin
result := false;
if length(aValue) > fLongest then exit;
if length(aValue) < fShortest then exit;
hash := toHash(aValue);
if (not fEntries[hash].filled) then exit(false);
for i:= 0 to high(fEntries[hash].values) do
if fEntries[hash].values[i] = aValue then exit(true);
end;
{$ENDREGION}
{$REGION Lexing-----------------------------------------------------------------}
function TLexTokenList.getToken(index: integer): TLexToken;
begin
result := PLexToken(Items[index])^;
end;
procedure TLexTokenList.clear;
begin
while Count > 0 do
begin
Dispose( PLexToken(Items[Count-1]) );
Delete(Count-1);
end;
end;
procedure TLexTokenList.addToken(aValue: PLexToken);
begin
add(Pointer(aValue));
end;
function TLexTokenEnumerator.GetCurrent: TLexToken;
begin
exit(fList.token[fIndex]);
end;
function TLexTokenEnumerator.MoveNext: Boolean;
begin
Inc(fIndex);
exit(fIndex < fList.Count);
end;
operator enumerator(aTokenList: TLexTokenList): TLexTokenEnumerator;
begin
result := TLexTokenEnumerator.Create;
result.fList := aTokenList;
result.fIndex := -1;
end;
{$BOOLEVAL ON}
procedure lex(const aText: string; const aList: TLexTokenList);
var
reader: TReaderHead;
identifier: string;
function isOutOfBound: boolean;
begin
exit(reader.AbsoluteIndex > length(aText))
end;
procedure addToken(aTk: TLexTokenKind);
var
ptk: PLexToken;
begin
ptk := new(PLexToken);
ptk^.kind := aTk;
ptk^.position := reader.LineAnColumn;
ptk^.data := identifier;
aList.Add(ptk);
end;
begin
reader.create(@aText[1], Point(0,0));
while (true) do
begin
if isOutOfBound then
exit;
identifier := '';
// skip blanks
while isWhite(reader.head^) do
begin
reader.next;
if isOutOfBound then exit;
end;
// line comment
if (reader.head^ = '/') then
begin
if (reader.next^ = '/') then
begin
if isOutOfBound then exit;
while (reader.head^ <> #10) do
begin
reader.next;
identifier += reader.head^;
if isOutOfBound then exit;
end;
reader.next;
addToken(ltkComment);
continue;
end
else
reader.previous;
end;
// block comments 1
if (reader.head^ = '/') then
begin
if (reader.next^ = '*') then
begin
if isOutOfBound then exit;
while (reader.head^ <> '*') or (reader.next^ <> '/') do
if isOutOfBound then exit;
reader.next;
addToken(ltkComment);
continue;
end
else
reader.previous;
end;
// block comments 2
if (reader.head^ = '/') then
begin
if (reader.next^ = '+') then
begin
if isOutOfBound then exit;
while (reader.head^ <> '+') or (reader.next^ <> '/') do
if isOutOfBound then exit;
reader.next;
addToken(ltkComment);
continue;
end
else
reader.previous;
end;
// string 1, note: same escape error as in SynD2Syn
if (reader.head^ in ['r', 'x']) then
begin
if not (reader.next^ = '"') then
reader.previous;
end;
if (reader.head^ = '"') then
begin
reader.next;
if isOutOfBound then exit;
if (reader.head^ = '"') then
begin
reader.next;
addToken(ltkString);
continue;
end;
while (true) do
begin
if reader.head^ = '\' then
begin
reader.next;
if (reader.head^ = '"') then
begin
reader.next;
continue;
end;
end;
if (reader.head^ = '"') then
break;
identifier += reader.head^;
reader.next;
if isOutOfBound then exit;
end;
if isStringPostfix(reader.next^) then
reader.next;
addToken(ltkString);
continue;
end;
// string 2
if (reader.head^ = '`') then
begin
reader.next;
if isOutOfBound then exit;
while (reader.head^ <> '`') do
begin
identifier += reader.head^;
reader.next;
if isOutOfBound then exit;
end;
if isStringPostfix(reader.next^) then
reader.next;
if isOutOfBound then exit;
addToken(ltkString);
continue;
end;
// token string
if (reader.head^ = 'q') and (reader.next^ = '{') then
begin
reader.next;
if isOutOfBound then exit;
while (reader.head^ <> '}') do
begin
identifier += reader.head^;
reader.next;
if isOutOfBound then exit;
end;
reader.next;
addToken(ltkString);
continue;
end else reader.previous;
//chars, note: same escape error as in SynD2Syn
if (reader.head^ = #39) then
begin
reader.next;
if isOutOfBound then exit;
if (reader.head^ = #39) then
begin
reader.next;
addToken(ltkString);
continue;
end;
while (true) do
begin
if reader.head^ = '\' then
begin
reader.next;
if (reader.head^ = #39) then
begin
reader.next;
continue;
end;
end;
if (reader.head^ = #39) then
break;
identifier += reader.head^;
reader.next;
if isOutOfBound then exit;
end;
reader.next;
addToken(ltkChar);
continue;
end;
// check negative float '-0.'
if (reader.head^ = '-') then
begin
identifier += reader.head^;
if reader.next^ = '0' then
begin
if reader.next^ = '.' then
reader.previous // back to 0, get into "binary/hex numbr/float"
else
begin
reader.previous;
reader.previous; // back to -
identifier := '';
end;
end
else
begin
reader.previous; // back to -
identifier := '';
end;
end;
// + suffixes
// + exponent
// float .xxxx
// binary/hex numbr/float
if (reader.head^ = '0') then
begin
identifier += reader.head^;
if (reader.next^ in ['b','B']) then
begin
identifier += reader.head^;
while isBit(reader.next^) or (reader.head^ = '_') do
begin
if isOutOfBound then exit;
identifier += reader.head^;
end;
addToken(ltkNumber);
continue;
end
else reader.previous;
if (reader.next^ in ['x','X']) then
begin
identifier += reader.head^;
while isHex(reader.next^) or (reader.head^ = '_') do
begin
if isOutOfBound then exit;
identifier += reader.head^;
end;
addToken(ltkNumber);
continue;
end
else reader.previous;
if (reader.next^ = '.') then
begin
identifier += reader.head^;
while isNumber(reader.next^) do
begin
if isOutOfBound then exit;
identifier += reader.head^;
end;
addToken(ltkNumber);
continue;
end
else reader.previous;
identifier := '';
end;
// check negative float/int '-xxx'
if (reader.head^ = '-') then
begin
identifier += reader.head^;
if not isNumber(reader.next^) then
begin
reader.previous; // back to '-'
identifier := '';
end;
end;
// numbers
if isNumber(reader.head^) then
begin
identifier += reader.head^;
while isNumber(reader.next^) or (reader.head^ = '_') do
begin
if isOutOfBound then exit;
identifier += reader.head^;
end;
addToken(ltkNumber);
continue;
end;
// symbols
if isSymbol(reader.head^) then
begin
identifier += reader.head^;
reader.next;
addToken(ltkSymbol);
if isOutOfBound then exit;
continue;
end;
// operators
if isOperator1(reader.head^) then
begin
identifier += reader.head^;
while isOperator1(reader.next^) do
begin
if isOutOfBound then exit;
identifier += reader.head^;
end;
case length(identifier) of
4:begin
if (not isOperator1(reader.head^)) and
isOperator4(identifier) then
begin
addToken(ltkOperator);
continue;
end;
end;
3:begin
if (not isOperator1(reader.head^)) and
isOperator3(identifier) then
begin
addToken(ltkOperator);
continue;
end;
end;
2:begin
if (not isOperator1(reader.head^)) and
isOperator2(identifier) then
begin
addToken(ltkOperator);
continue;
end;
end;
1:begin
if not isOperator1(reader.head^)
then
begin
addToken(ltkOperator);
continue;
end;
end;
end;
end;
// identifier accum
if isFirstIdentifier(reader.head^) then
begin
while isIdentifier(reader.head^) do
begin
identifier += reader.head^;
reader.next;
if isOutOfBound then exit;
end;
if D2Dictionary.find(identifier) then
addToken(ltkKeyword)
else
addToken(ltkIdentifier);
continue;
end;
// error
identifier += ' (unrecognized lexer input)';
addToken(ltkIllegal);
end;
end;
{$BOOLEVAL OFF}
{$ENDREGION}
{$REGION Syntactic errors}
function TLexErrorList.getError(index: integer): TLexError;
begin
result := PLexError(Items[index])^;
end;
procedure TLexErrorList.clear;
begin
while Count > 0 do
begin
Dispose( PLexError(Items[Count-1]) );
Delete(Count-1);
end;
end;
procedure TLexErrorList.addError(aValue: PLexError);
begin
add(Pointer(aValue));
end;
function TLexErrorEnumerator.GetCurrent: TLexError;
begin
exit(fList.error[fIndex]);
end;
function TLexErrorEnumerator.MoveNext: Boolean;
begin
Inc(fIndex);
exit(fIndex < fList.Count);
end;
operator enumerator(anErrorList: TLexErrorList): TLexErrorEnumerator;
begin
result := TLexErrorEnumerator.Create;
result.fList := anErrorList;
result.fIndex := -1;
end;
procedure checkSyntacticErrors(const aTokenList: TLexTokenList; const anErrorList: TLexErrorList);
const
errPrefix = 'syntactic error: ';
var
tk, old1, old2, lastSig: TLexToken;
err: PLexError;
tkIndex: NativeInt;
pareCnt, curlCnt, squaCnt: NativeInt;
pareLeft, curlLeft, squaLeft: boolean;
procedure addError(const aMsg: string);
begin
err := new(PLexError);
err^.msg := errPrefix + aMsg;
err^.position := aTokenList.token[tkIndex].position;
anErrorList.addError(err);
end;
label
_preSeq;
begin
tkIndex := -1;
pareCnt := 0;
curlCnt := 0;
squaCnt := 0;
pareLeft:= False;
curlLeft:= False;
squaLeft:= False;
FillByte( old1, sizeOf(TLexToken), 0);
FillByte( old2, sizeOf(TLexToken), 0);
FillByte( lastSig, sizeOf(TLexToken), 0);
for tk in aTokenList do
begin
Inc(tkIndex);
// brackets count
if tk.kind = ltkSymbol then
begin
case tk.data of
'(': Inc(pareCnt);
'{': Inc(curlCnt);
'[': Inc(squaCnt);
')': Dec(pareCnt);
'}': Dec(curlCnt);
']': Dec(squaCnt);
end;
// only for the first occurence
if not pareLeft then if pareCnt = -1 then
begin
addError('a left parenthesis is missing');
pareLeft := true;
end;
if not curlLeft then if curlCnt = -1 then
begin
addError('a left curly bracket is missing');
curlLeft := true;
end;
if not squaLeft then if squaCnt = -1 then
begin
addError('a left square bracket is missing');
squaLeft := true;
end;
// at the end
if (tkIndex = aTokenList.Count-1) then
begin
if pareCnt > 0 then
addError('a right parenthesis is missing');
if curlCnt > 0 then
addError('a right curly bracket is missing');
if squaCnt > 0 then
addError('a right square bracket is missing');
end;
goto _preSeq;
end;
// lexer invalid token
if tk.kind = ltkIllegal then
begin
addError(tk.data);
goto _preSeq;
end;
_preSeq:
// invalid sequences
if tkIndex > 0 then
begin
// empty statements:
if (tk.kind = ltkSymbol) and (tk.data = ';') then
if (lastSig.kind = ltkSymbol) and (lastSig.data = ';') then
addError('invalid syntax for empty statement');
if tk.kind <> ltkComment then lastSig := tk;
// suspicious double keywords
if (old1.kind = ltkKeyword) and (tk.kind = ltkKeyword) then
if old1.data = tk.data then
addError('keyword is duplicated');
// suspicious double numbers
if (old1.kind = ltkNumber) and (tk.kind = ltkNumber) then
addError('symbol or operator expected after number');
end;
if tkIndex > 1 then
begin
end;
old1 := tk;
old2 := old1;
end;
end;
function getModuleName(const aTokenList: TLexTokenList): string;
var
ltk: TLexToken;
mtok: boolean;
begin
result := '';
mtok := false;
for ltk in aTokenList do
begin
if mtok then
begin
if ltk.kind = ltkIdentifier then
result += ltk.data;
if ltk.kind = ltkSymbol then
case ltk.data of
'.': result += ltk.data;
';': exit;
end;
end
else
if ltk.kind = ltkKeyword then
if ltk.data = 'module' then
mtok := true;
end;
end;
{$ENDREGION}
initialization
D2Dictionary.create;
finalization
D2Dictionary.destroy;
end.