Merge branch 'range-based-lexer' of https://github.com/Hackerpilot/Dscanner into range-based-lexer

2013-02-27 21:01:49 +00:00 · 2013-02-27 21:01:49 +00:00 · 1dfeb281d9
parent 16ac572ad5 55ad8941b0
commit 1dfeb281d9
1 changed files with 92 additions and 94 deletions
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -683,30 +683,7 @@ struct TokenRange(LexSrc)
 	*/
 	void popFront()
 	{
-		// Filter out tokens we don't care about
+        advance();
 		loop: while (true)
 		{
 			advance();
 			if(empty)
 				break loop;
 			switch (current.type)
 			{
 			case TokenType.whitespace:
 				if (config.iterStyle & IterationStyle.includeWhitespace)
 					break loop;
 				break;
 			case TokenType.comment:
 				if (config.iterStyle & IterationStyle.includeComments)
 					break loop;
 				break;
 			case TokenType.specialTokenSequence:
 				if (config.iterStyle & IterationStyle.includeSpecialTokens)
 					break loop;
 				break;
 			default:
 				break loop;
 			}
 		}
 	}
 private:
@ -716,29 +693,26 @@ private:
 	*/
 	void advance()
 	{
-		if (isEoF())
+L_advance:
-		{
+        if (src.empty)
-			_empty = true;
+        {
-			return;
+            _empty = true;
-		}
+            return;
-
+        }
-		src.mark(); // mark a start of a lexing "frame"
+        src.mark(); // mark a start of a lexing "frame"
 		current.line = lineNumber;
 		current.startIndex = src.index;
 		current.column = column;
 		current.value = null;        
 		if (isWhite())
 		{
 			if (config.iterStyle & IterationStyle.includeWhitespace)
 				lexWhitespace!true();
 			else
 				lexWhitespace!false();
 			return;
 		}
 		switch (src.front)
 		{
        // handle sentenels for end of input
        case 0: 
        case 0x1a:
            // TODO: check config flags, it's cheap 
            // since this branch at most is taken once per file 
            _empty = true;
            return;        
 //        pragma(msg, generateCaseTrie(
 		mixin(generateCaseTrie(
 			"=",               "TokenType.assign",
@ -813,10 +787,10 @@ private:
 			case '*':
 			case '+':
 				if (config.iterStyle & IterationStyle.includeComments)
-					lexComment!true();
+					return lexComment!true();
-				else
+                lexComment!false();
-					lexComment!false();
+                goto L_advance; // tail-recursion
-				return;
+				
 			case '=':
 				current.type = TokenType.divEqual;
 				current.value = "/=";
@ -905,12 +879,32 @@ private:
 			else
 				goto default;
 		case '#':            
-			lexSpecialTokenSequence();
+            lexSpecialTokenSequence();
-			return;
+            if(config.iterStyle & IterationStyle.includeSpecialTokens)
                return;
            goto L_advance; // tail-recursion
        // "short" ASCII whites
        case 0x20:
        case 0x09: .. case 0x0d:
             if (config.iterStyle & IterationStyle.includeWhitespace)
                return lexWhitespace!true();
             lexWhitespace!false();
             goto L_advance; // tail-recursion 
 		default:        
-			while(!isEoF() && !isSeparating())
+            if ((src.front & 0x80) && isLongWhite())
            {               
                if (config.iterStyle & IterationStyle.includeWhitespace)
                    return lexWhitespace!true();
                lexWhitespace!false();
                goto L_advance; // tail-recursion
            }
 			for(;;)
 			{
                if(isSeparating())
                    break;
 				nextCharNonLF();
                if(isEoF())
                    break;
 			}
 			current.type = lookupTokenType(src.slice);
@ -924,48 +918,9 @@ private:
 				return;
 			}
-			if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
+			if (config.iterStyle & TokenStyle.doNotReplaceSpecial)
 				return;
-
+            expandSpecialToken();
 			switch (current.type)
 			{
 			case TokenType.date:
 				current.type = TokenType.stringLiteral;
 				auto time = Clock.currTime();
 				current.value = format("%s %02d %04d", time.month, time.day, time.year);
 				return;
 			case TokenType.time:
 				auto time = Clock.currTime();
 				current.type = TokenType.stringLiteral;
 				current.value = (cast(TimeOfDay)(time)).toISOExtString();
 				return;
 			case TokenType.timestamp:
 				auto time = Clock.currTime();
 				auto dt = cast(DateTime) time;
 				current.type = TokenType.stringLiteral;
 				current.value = format("%s %s %02d %02d:%02d:%02d %04d",
 					dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
 					dt.second, dt.year);
 				return;
 			case TokenType.vendor:
 				current.type = TokenType.stringLiteral;
 				current.value = config.vendorString;
 				return;
 			case TokenType.compilerVersion:
 				current.type = TokenType.stringLiteral;
 				current.value = format("%d", config.versionNumber);
 				return;
 			case TokenType.line:
 				current.type = TokenType.intLiteral;
 				current.value = format("%d", current.line);
 				return;
 			case TokenType.file:
 				current.type = TokenType.stringLiteral;
 				current.value = config.fileName;
 				return;
 			default:
 				return;
 			}
 		}
 	}
@ -1552,7 +1507,7 @@ private:
 		import std.stdio;
 		if(unescaped != Appender!(ubyte[]).init)
 		{
-			//stuff in the last slice and used buffered data
+			//stuff in the last slice and use buffered data
 			unescaped.put(src.slice);
 			setData(unescaped.data);
 		}
@ -2132,10 +2087,53 @@ private:
        else
            r.popFront();
        if (r.empty || (r.front != 0xa8 && r.front != 0xa9))
-                return false;
+            return false;
        return true;
    }
    void expandSpecialToken()
    {
        switch (current.type)
        {
        case TokenType.date:
            current.type = TokenType.stringLiteral;
            auto time = Clock.currTime();
            current.value = format("%s %02d %04d", time.month, time.day, time.year);
            return;
        case TokenType.time:
            auto time = Clock.currTime();
            current.type = TokenType.stringLiteral;
            current.value = (cast(TimeOfDay)(time)).toISOExtString();
            return;
        case TokenType.timestamp:
            auto time = Clock.currTime();
            auto dt = cast(DateTime) time;
            current.type = TokenType.stringLiteral;
            current.value = format("%s %s %02d %02d:%02d:%02d %04d",
                dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
                dt.second, dt.year);
            return;
        case TokenType.vendor:
            current.type = TokenType.stringLiteral;
            current.value = config.vendorString;
            return;
        case TokenType.compilerVersion:
            current.type = TokenType.stringLiteral;
            current.value = format("%d", config.versionNumber);
            return;
        case TokenType.line:
            current.type = TokenType.intLiteral;
            current.value = format("%d", current.line);
            return;
        case TokenType.file:
            current.type = TokenType.stringLiteral;
            current.value = config.fileName;
            return;
        default:
            return;
        }
    }
 	void errorMessage(string s)
 	{
 		import std.string: format;