Handle Unicode byte order marks

2014-05-19 13:40:35 -07:00 · 2014-05-19 13:40:35 -07:00 · db84119e33
parent 25f0d93b90
commit db84119e33
2 changed files with 6 additions and 4 deletions
--- a/main.d
+++ b/main.d
@ -121,11 +121,11 @@ int run(string[] args)
 		}
 		else if (tokenDump)
 		{
-			writeln("text                    blank\tindex\tline\tcolumn");
+			writeln("text                    blank\tindex\tline\tcolumn\ttype");
 			foreach (token; tokens)
 			{
-				writefln("<<%20s>>%b\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
-					token.text !is null, token.index, token.line, token.column);
+				writefln("<<%20s>>%b\t%d\t%d\t%d\t%d", token.text is null ? str(token.type) : token.text,
+					token.text !is null, token.index, token.line, token.column, token.type);
 			}
 			return 0;
 		}
--- a/std/d/lexer.d
+++ b/std/d/lexer.d
@ -417,7 +417,9 @@ public struct DLexer

 	this(ubyte[] range, const LexerConfig config, StringCache* cache)
 	{
-		this.range = LexerRange(range);
+		auto r = (range.length >= 3 && range[0] == 0xef && range[1] == 0xbb && range[2] == 0xbf)
+			? range[3 .. $] : range;
+		this.range = LexerRange(r);
 		this.config = config;
 		this.cache = cache;
 		popFront();