diff --git a/langutils.d b/langutils.d index c9c4818..11aad83 100644 --- a/langutils.d +++ b/langutils.d @@ -253,6 +253,7 @@ enum TokenType: uint // Misc MISC_BEGIN, + scriptLine, // Line at the beginning of source file that starts from #! comment, /// /** comment */ or // comment or ///comment NUMBERS_BEGIN, floatLiteral, /// 123.456f or 0x123_45p-af diff --git a/tokenizer.d b/tokenizer.d index 47528a8..ae12a19 100644 --- a/tokenizer.d +++ b/tokenizer.d @@ -48,6 +48,30 @@ pure nothrow string lexWhitespace(S)(S inputString, ref size_t endIndex, } } +/** + * If inputString starts from #!, increments endIndex until it indexes the next line. + * Params: + * inputString = the source code to examine + * endIndex = an index into inputString + * lineNumber = the line number that corresponds to endIndex + * Returns: The script line, or null if this inputString doesn't start from script line + */ +pure nothrow string lexScriptLine(S)(ref S inputString, ref size_t endIndex, + ref uint lineNumber) if (isSomeString!S) +{ + auto startIndex = endIndex; // in current implementation endIndex is 0, but that could change (e.g., if BOM is not stripped from inputString) + string result = null; + if(inputString.length > 1 && inputString[0..2] == "#!") // safety check + { + endIndex = 2; // skip #! + while (endIndex < inputString.length && inputString[endIndex] != '\n') + ++endIndex; + + result = inputString[startIndex..endIndex]; + ++lineNumber; + } + return result; +} /** * Increments endIndex until it indexes a character directly after a comment @@ -493,6 +517,14 @@ Token[] tokenize(S)(S inputString, IterationStyle iterationStyle = IterationStyl size_t endIndex = 0; uint lineNumber = 1; + if (inputString.length > 1 && inputString[0..2] == "#!") + { + Token currentToken; + currentToken.lineNumber = lineNumber; // lineNumber is always 1 + currentToken.value = lexScriptLine(inputString, endIndex, lineNumber); + currentToken.type = TokenType.scriptLine; + } + while (endIndex < inputString.length) { size_t prevIndex = endIndex;