Updates according to code review

This commit is contained in:
Roman D. Boiko 2012-04-26 11:57:49 +03:00
parent e7f4ed7266
commit 18889620b5
2 changed files with 51 additions and 25 deletions

View File

@ -60,7 +60,7 @@ string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
if (v.children.length > 0) if (v.children.length > 0)
{ {
caseStatement ~= indentString; caseStatement ~= indentString;
caseStatement ~= "\tif (endIndex >= inputString.length)\n"; caseStatement ~= "\tif (isEoF(inputString, endIndex))\n";
caseStatement ~= indentString; caseStatement ~= indentString;
caseStatement ~= "\t{\n"; caseStatement ~= "\t{\n";
caseStatement ~= indentString; caseStatement ~= indentString;
@ -110,3 +110,28 @@ string generateCaseTrie(string[] args ...)
} }
return printCaseStatements(t, ""); return printCaseStatements(t, "");
} }
/**
* Returns: true if index points to end of inputString, false otherwise
*/
pure nothrow bool isEoF(S)(S inputString, size_t index)
{
// note: EoF is determined according to D specification
return index >= inputString.length
|| inputString[index] == Character.NUL
|| inputString[index] == Character.SUB;
}
private:
// Unicode character literals
enum Character
{
// End of file (EoF)
NUL = '\u0000', // NUL character
SUB = '\u001A', // Substitute character
// Line feed (EoL)
CR = '\u000D', // CR character
LF = '\u000A', // LF character
}

View File

@ -432,12 +432,34 @@ pure nothrow void lexDecimal(S)(ref S inputString, size_t startIndex,
} }
} }
// todo: in some cases loop is interrupted before float literal is parsed, and some invalid inputs are accepted; // suggest to extract lexing integers into a separate function
// suggested solution is to extract lexing integer into a separate function // please see unittest below
token.value = inputString[startIndex .. endIndex]; token.value = inputString[startIndex .. endIndex];
} }
unittest {
dump!lexDecimal("55e-4"); // yeilds intLiteral, but should be float
dump!lexDecimal("3e+f"); // floatLiteral, but should be considered invalid
dump!lexDecimal("3e++f"); // intLiteral 3e+, but should be considered invalid
// actually, there are lots of bugs. The point is that without decomposition of integer lexing from floating-point lexing
// it is very hard to prove algorithm correctness
}
// Temporary function to illustrate some problems
// Executes T and dumps results to console
void dump(alias T)(string s) {
size_t start;
size_t end;
Token tok;
T!(string)(s, start, end, tok);
// dump results
writeln(tok.type);
writeln(tok.value);
writeln(start);
writeln(end);
}
nothrow void lexHex(S)(ref S inputString, ref size_t startIndex, nothrow void lexHex(S)(ref S inputString, ref size_t startIndex,
ref size_t endIndex, ref Token token) if (isSomeString!S) ref size_t endIndex, ref Token token) if (isSomeString!S)
{ {
@ -742,30 +764,9 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl
// This should never happen. // This should never happen.
if (endIndex <= prevIndex) if (endIndex <= prevIndex)
{ {
stderr.writeln("FAIL"); // why not put assert(false)? being here indicates a bug in code, I guess stderr.writeln("FAIL");
return []; return [];
} }
} }
return tokenAppender.data; return tokenAppender.data;
} }
private:
/**
* Returns: true if index points to end of inputString, false otherwise
*/
pure nothrow bool isEoF(S)(S inputString, size_t index)
{
// note: EoF is determined according to D specification
return index >= inputString.length
|| inputString[index] == NUL_CHAR
|| inputString[index] == SUB_CHAR;
}
// End of file (EoF)
const NUL_CHAR = '\u0000'; // NUL character
const SUB_CHAR = '\u001A'; // Substitute character
// Line feed (EoL)
const CR_CHAR = '\u000D'; // CR character
const LF_CHAR = '\u000A'; // LF character