Updates according to code review
This commit is contained in:
parent
e7f4ed7266
commit
18889620b5
27
codegen.d
27
codegen.d
|
@ -60,7 +60,7 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
|
||||||
if (v.children.length > 0)
|
if (v.children.length > 0)
|
||||||
{
|
{
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\tif (endIndex >= inputString.length)\n";
|
caseStatement ~= "\tif (isEoF(inputString, endIndex))\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
caseStatement ~= "\t{\n";
|
caseStatement ~= "\t{\n";
|
||||||
caseStatement ~= indentString;
|
caseStatement ~= indentString;
|
||||||
|
@ -110,3 +110,28 @@ string generateCaseTrie(string[] args ...)
|
||||||
}
|
}
|
||||||
return printCaseStatements(t, "");
|
return printCaseStatements(t, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns: true if index points to end of inputString, false otherwise
|
||||||
|
*/
|
||||||
|
pure nothrow bool isEoF(S)(S inputString, size_t index)
|
||||||
|
{
|
||||||
|
// note: EoF is determined according to D specification
|
||||||
|
return index >= inputString.length
|
||||||
|
|| inputString[index] == Character.NUL
|
||||||
|
|| inputString[index] == Character.SUB;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
// Unicode character literals
|
||||||
|
enum Character
|
||||||
|
{
|
||||||
|
// End of file (EoF)
|
||||||
|
NUL = '\u0000', // NUL character
|
||||||
|
SUB = '\u001A', // Substitute character
|
||||||
|
|
||||||
|
// Line feed (EoL)
|
||||||
|
CR = '\u000D', // CR character
|
||||||
|
LF = '\u000A', // LF character
|
||||||
|
}
|
49
tokenizer.d
49
tokenizer.d
|
@ -432,12 +432,34 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: in some cases loop is interrupted before float literal is parsed, and some invalid inputs are accepted;
|
// suggest to extract lexing integers into a separate function
|
||||||
// suggested solution is to extract lexing integer into a separate function
|
// please see unittest below
|
||||||
|
|
||||||
token.value = inputString[startIndex .. endIndex];
|
token.value = inputString[startIndex .. endIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unittest {
|
||||||
|
dump!lexDecimal("55e-4"); // yeilds intLiteral, but should be float
|
||||||
|
dump!lexDecimal("3e+f"); // floatLiteral, but should be considered invalid
|
||||||
|
dump!lexDecimal("3e++f"); // intLiteral 3e+, but should be considered invalid
|
||||||
|
// actually, there are lots of bugs. The point is that without decomposition of integer lexing from floating-point lexing
|
||||||
|
// it is very hard to prove algorithm correctness
|
||||||
|
}
|
||||||
|
|
||||||
|
// Temporary function to illustrate some problems
|
||||||
|
// Executes T and dumps results to console
|
||||||
|
void dump(alias T)(string s) {
|
||||||
|
size_t start;
|
||||||
|
size_t end;
|
||||||
|
Token tok;
|
||||||
|
T!(string)(s, start, end, tok);
|
||||||
|
// dump results
|
||||||
|
writeln(tok.type);
|
||||||
|
writeln(tok.value);
|
||||||
|
writeln(start);
|
||||||
|
writeln(end);
|
||||||
|
}
|
||||||
|
|
||||||
nothrow void lexHex(S)(ref S inputString, ref size_t startIndex,
|
nothrow void lexHex(S)(ref S inputString, ref size_t startIndex,
|
||||||
ref size_t endIndex, ref Token token) if (isSomeString!S)
|
ref size_t endIndex, ref Token token) if (isSomeString!S)
|
||||||
{
|
{
|
||||||
|
@ -742,30 +764,9 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
|
||||||
// This should never happen.
|
// This should never happen.
|
||||||
if (endIndex <= prevIndex)
|
if (endIndex <= prevIndex)
|
||||||
{
|
{
|
||||||
stderr.writeln("FAIL"); // why not put assert(false)? being here indicates a bug in code, I guess
|
stderr.writeln("FAIL");
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tokenAppender.data;
|
return tokenAppender.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns: true if index points to end of inputString, false otherwise
|
|
||||||
*/
|
|
||||||
pure nothrow bool isEoF(S)(S inputString, size_t index)
|
|
||||||
{
|
|
||||||
// note: EoF is determined according to D specification
|
|
||||||
return index >= inputString.length
|
|
||||||
|| inputString[index] == NUL_CHAR
|
|
||||||
|| inputString[index] == SUB_CHAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// End of file (EoF)
|
|
||||||
const NUL_CHAR = '\u0000'; // NUL character
|
|
||||||
const SUB_CHAR = '\u001A'; // Substitute character
|
|
||||||
|
|
||||||
// Line feed (EoL)
|
|
||||||
const CR_CHAR = '\u000D'; // CR character
|
|
||||||
const LF_CHAR = '\u000A'; // LF character
|
|
Loading…
Reference in New Issue