Tokenizer and highlighter functional

This commit is contained in:
Hackerpilot 2013-01-19 14:49:24 +00:00
parent e077c633da
commit 36ff7d043c
6 changed files with 110 additions and 74 deletions

View File

@ -105,7 +105,7 @@ void updateCache(string dirs[], string moduleNames[])
continue; continue;
// re-parse the module // re-parse the module
Module m = parseModule(tokenize(readText(filePath))); Module m = parseModule(byToken(readText(filePath)).array());
updateCache(m); updateCache(m);

View File

@ -367,34 +367,34 @@ local function showCompletionList(r)
buffer.auto_c_choose_single = setting buffer.auto_c_choose_single = setting
end end
events.connect(events.CHAR_ADDED, function(ch) --events.connect(events.CHAR_ADDED, function(ch)
if buffer:get_lexer() ~= "dmd" then return end -- if buffer:get_lexer() ~= "dmd" then return end
if ch > 255 then return end -- if ch > 255 then return end
local character = string.char(ch) -- local character = string.char(ch)
if character == "." or character == "(" then -- if character == "." or character == "(" then
local fileName = os.tmpname() -- local fileName = os.tmpname()
local tmpFile = io.open(fileName, "w") -- local tmpFile = io.open(fileName, "w")
tmpFile:write(buffer:get_text()) -- tmpFile:write(buffer:get_text())
local command = M.PATH_TO_DSCANNER -- local command = M.PATH_TO_DSCANNER
.. (character == "." and " --dotComplete " or " --parenComplete ") -- .. (character == "." and " --dotComplete " or " --parenComplete ")
.. fileName .. " " .. buffer.current_pos .. " -I" .. buffer.filename:match(".+[\\/]") -- .. fileName .. " " .. buffer.current_pos .. " -I" .. buffer.filename:match(".+[\\/]")
local p = io.popen(command) -- local p = io.popen(command)
local r = p:read("*a") -- local r = p:read("*a")
if r ~= "\n" then -- if r ~= "\n" then
if character == "." then -- if character == "." then
showCompletionList(r) -- showCompletionList(r)
elseif character == "(" then -- elseif character == "(" then
if r:find("^completions\n") then -- if r:find("^completions\n") then
showCompletionList(r) -- showCompletionList(r)
elseif r:find("^calltips\n.*") then -- elseif r:find("^calltips\n.*") then
r = r:gsub("^calltips\n", "") -- r = r:gsub("^calltips\n", "")
buffer:call_tip_show(buffer.current_pos, r:gsub("\\n", "\n"):gsub("\\t", "\t"):match("(.*)%s+$")) -- buffer:call_tip_show(buffer.current_pos, r:gsub("\\n", "\n"):gsub("\\t", "\t"):match("(.*)%s+$"))
end -- end
end -- end
end -- end
os.remove(fileName) -- os.remove(fileName)
end -- end
end) --end)
local function autocomplete() local function autocomplete()

View File

@ -12,10 +12,10 @@ import std.array;
void writeSpan(string cssClass, string value) void writeSpan(string cssClass, string value)
{ {
stdout.write(`<span class="`, cssClass, `">`, value.replace("<", "&lt;"), `</span>`); stdout.write(`<span class="`, cssClass, `">`, value.replace("&", "&amp;").replace("<", "&lt;"), `</span>`);
} }
void highlight(Token[] tokens) void highlight(R)(R tokens)
{ {
stdout.writeln(q"[<!DOCTYPE html> stdout.writeln(q"[<!DOCTYPE html>
<html> <html>
@ -56,9 +56,6 @@ html { background-color: #111; color: #ccc; }
case TokenType.OPERATORS_BEGIN: .. case TokenType.OPERATORS_END: case TokenType.OPERATORS_BEGIN: .. case TokenType.OPERATORS_END:
writeSpan("operator", t.value); writeSpan("operator", t.value);
break; break;
case TokenType.PROPERTIES_BEGIN: .. case TokenType.PROPERTIES_END:
writeSpan("property", t.value);
break;
default: default:
stdout.write(t.value.replace("<", "&lt;")); stdout.write(t.value.replace("<", "&lt;"));
break; break;

30
main.d
View File

@ -137,11 +137,11 @@ int main(string[] args)
char[] buf; char[] buf;
while (stdin.readln(buf)) while (stdin.readln(buf))
f.put(buf); f.put(buf);
writeln(f.data.tokenize().count!(a => isLineOfCode(a.type))()); writeln(f.data.byToken().count!(a => isLineOfCode(a.type))());
} }
else else
{ {
writeln(args[1..$].map!(a => a.readText().tokenize())().joiner() writeln(args[1..$].map!(a => a.readText().byToken())().joiner()
.count!(a => isLineOfCode(a.type))()); .count!(a => isLineOfCode(a.type))());
} }
return 0; return 0;
@ -155,11 +155,13 @@ int main(string[] args)
char[] buf; char[] buf;
while (stdin.readln(buf)) while (stdin.readln(buf))
f.put(buf); f.put(buf);
highlighter.highlight(f.data.tokenize(IterationStyle.EVERYTHING)); highlighter.highlight(f.data.byToken(IterationStyle.Everything,
StringStyle.Source));
} }
else else
{ {
highlighter.highlight(args[1].readText().tokenize(IterationStyle.EVERYTHING)); highlighter.highlight(args[1].readText().byToken(
IterationStyle.Everything, StringStyle.Source));
} }
return 0; return 0;
} }
@ -178,11 +180,11 @@ int main(string[] args)
char[] buf; char[] buf;
while (stdin.readln(buf)) while (stdin.readln(buf))
f.put(buf); f.put(buf);
tokens = f.data.tokenize(); tokens = f.data.byToken().array();
} }
catch(ConvException e) catch(ConvException e)
{ {
tokens = args[1].readText().tokenize(); tokens = args[1].readText().byToken().array();
args.popFront(); args.popFront();
} }
auto mod = parseModule(tokens); auto mod = parseModule(tokens);
@ -193,7 +195,7 @@ int main(string[] args)
auto p = findAbsPath(importDirs, im); auto p = findAbsPath(importDirs, im);
if (p is null || !p.exists()) if (p is null || !p.exists())
continue; continue;
context.addModule(p.readText().tokenize().parseModule()); context.addModule(p.readText().byToken().array().parseModule());
} }
auto complete = AutoComplete(tokens, context); auto complete = AutoComplete(tokens, context);
if (parenComplete) if (parenComplete)
@ -213,12 +215,12 @@ int main(string[] args)
char[] buf; char[] buf;
while (stdin.readln(buf)) while (stdin.readln(buf))
f.put(buf); f.put(buf);
tokens = tokenize(f.data); tokens = byToken(f.data).array();
} }
else else
{ {
// read given file // read given file
tokens = tokenize(readText(args[1])); tokens = byToken(readText(args[1])).array();
} }
auto mod = parseModule(tokens); auto mod = parseModule(tokens);
mod.writeJSONTo(stdout); mod.writeJSONTo(stdout);
@ -229,8 +231,8 @@ int main(string[] args)
{ {
if (!recursiveCtags) if (!recursiveCtags)
{ {
auto tokens = tokenize(readText(args[1])); auto tokens = byToken(readText(args[1]));
auto mod = parseModule(tokens); auto mod = parseModule(tokens.array());
mod.writeCtagsTo(stdout, args[1]); mod.writeCtagsTo(stdout, args[1]);
} }
else else
@ -241,12 +243,12 @@ int main(string[] args)
if (!dirEntry.name.endsWith(".d", ".di")) if (!dirEntry.name.endsWith(".d", ".di"))
continue; continue;
stderr.writeln("Generating tags for ", dirEntry.name); stderr.writeln("Generating tags for ", dirEntry.name);
auto tokens = tokenize(readText(dirEntry.name)); auto tokens = byToken(readText(dirEntry.name));
if (m is null) if (m is null)
m = parseModule(tokens); m = parseModule(tokens.array());
else else
{ {
auto mod = parseModule(tokens); auto mod = parseModule(tokens.array());
m.merge(mod); m.merge(mod);
} }
} }

View File

@ -302,10 +302,6 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
case TokenType.Nothrow: case TokenType.Nothrow:
case TokenType.Override: case TokenType.Override:
case TokenType.Synchronized: case TokenType.Synchronized:
case TokenType.AtDisable:
case TokenType.AtProperty:
case TokenType.AtSafe:
case TokenType.AtSystem:
case TokenType.Abstract: case TokenType.Abstract:
case TokenType.Final: case TokenType.Final:
case TokenType.Gshared: case TokenType.Gshared:
@ -635,8 +631,6 @@ body
case TokenType.Immutable: case TokenType.Immutable:
case TokenType.Const: case TokenType.Const:
case TokenType.Pure: case TokenType.Pure:
case TokenType.AtTrusted:
case TokenType.AtProperty:
case TokenType.Nothrow: case TokenType.Nothrow:
case TokenType.Final: case TokenType.Final:
case TokenType.Override: case TokenType.Override:

View File

@ -396,7 +396,7 @@ unittest
} }
Token lexHexString(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber, Token lexHexString(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber,
const StringStyle style = StringStyle.Escaped) const StringStyle style = StringStyle.Default)
in in
{ {
assert (input.front == 'x'); assert (input.front == 'x');
@ -426,7 +426,7 @@ body
input.popFront(); input.popFront();
++index; ++index;
} }
else if (std.uni.isWhite(input.front) && !(style & StringStyle.Escaped)) else if (std.uni.isWhite(input.front) && (style & StringStyle.NotEscaped))
{ {
app.put(input.front); app.put(input.front);
input.popFront(); input.popFront();
@ -465,15 +465,17 @@ body
break; break;
} }
} }
if (style & StringStyle.Escaped) if (style & StringStyle.NotEscaped)
t.value = to!string(app.data);
else
{ {
auto a = appender!(char[])(); auto a = appender!(char[])();
foreach (b; std.range.chunks(app.data, 2)) foreach (b; std.range.chunks(app.data, 2))
a.put(to!string(cast(dchar) parse!uint(b, 16))); a.put(to!string(cast(dchar) parse!uint(b, 16)));
t.value = to!string(a.data); t.value = to!string(a.data);
} }
else
t.value = to!string(app.data);
return t; return t;
} }
@ -503,7 +505,7 @@ unittest
} }
Token lexString(R)(ref R input, ref uint index, ref uint lineNumber, Token lexString(R)(ref R input, ref uint index, ref uint lineNumber,
const StringStyle style = StringStyle.Escaped) const StringStyle style = StringStyle.Default)
in in
{ {
assert (input.front == '\'' || input.front == '"' || input.front == '`' || input.front == 'r'); assert (input.front == '\'' || input.front == '"' || input.front == '`' || input.front == 'r');
@ -534,8 +536,38 @@ body
app.put(popNewline(input, index)); app.put(popNewline(input, index));
lineNumber++; lineNumber++;
} }
else if (input.front == '\\' && style & StringStyle.Escaped) else if (input.front == '\\')
app.put(interpretEscapeSequence(input, index)); {
if (style & StringStyle.NotEscaped)
{
auto r = input.save();
r.popFront();
if (r.front == quote)
{
app.put('\\');
app.put(quote);
input.popFront();
input.popFront();
index += 2;
}
else if (r.front == '\\')
{
app.put('\\');
app.put('\\');
input.popFront();
input.popFront();
index += 2;
}
else
{
app.put('\\');
input.popFront();
++index;
}
}
else
app.put(interpretEscapeSequence(input, index));
}
else if (input.front == quote) else if (input.front == quote)
{ {
if (style & StringStyle.IncludeQuotes) if (style & StringStyle.IncludeQuotes)
@ -1217,20 +1249,28 @@ enum IterationStyle
*/ */
enum StringStyle : uint enum StringStyle : uint
{ {
NotEscaped = 0, /// Escape sequences will be replaced with their equivalent characters.
/// String escape sequences will be processed and enclosing quote characters /// Quote characters will not be included
/// will not be preserved. Default = 0b0000,
Escaped = 1,
/// Escape sequences will not be processed
NotEscaped = 0b0001,
/// Strings will include their opening and closing quote characters as well
/// as any prefixes or suffixes (e.g.: "abcde"w will include the 'w'
/// character)
IncludeQuotes = 0x0010,
/// Strings will be read exactly as they appeared in the source, including /// Strings will be read exactly as they appeared in the source, including
/// their opening and closing quote characters. Useful for syntax highlighting. /// their opening and closing quote characters. Useful for syntax highlighting.
IncludeQuotes = 2, Source = NotEscaped | IncludeQuotes,
} }
TokenRange!(R) byToken(R)(ref R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly, TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
const StringStyle tokenStyle = StringStyle.Escaped) if (isForwardRange!(R) && isSomeChar!(ElementType!(R))) const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
{ {
auto r = TokenRange!(R)(range); auto r = TokenRange!(R)(range);
r.tokenStyle = tokenStyle; r.stringStyle = stringStyle;
r.iterStyle = iterationStyle; r.iterStyle = iterationStyle;
r.lineNumber = 1; r.lineNumber = 1;
r.popFront(); r.popFront();
@ -1273,7 +1313,7 @@ struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
if (iterStyle == IterationStyle.Everything) if (iterStyle == IterationStyle.Everything)
{ {
current = lexWhitespace(range, index, lineNumber); current = lexWhitespace(range, index, lineNumber);
break; return c;
} }
else else
lexWhitespace(range, index, lineNumber); lexWhitespace(range, index, lineNumber);
@ -1348,16 +1388,19 @@ struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
break; break;
case '\'': case '\'':
case '"': case '"':
current = lexString(range, index, lineNumber); current = lexString(range, index, lineNumber, stringStyle);
break; break;
case '`': case '`':
current = lexString(range, index, lineNumber, StringStyle.NotEscaped); current = lexString(range, index, lineNumber, stringStyle);
break; break;
case 'q': case 'q':
auto r = range.save; auto r = range.save;
r.popFront(); r.popFront();
if (!r.isEoF() && r.front == '{') if (!r.isEoF() && r.front == '{')
{
writeln("ParseTokenString"); writeln("ParseTokenString");
break;
}
else else
goto default; goto default;
case '/': case '/':
@ -1427,7 +1470,7 @@ private:
R range; R range;
bool _empty; bool _empty;
IterationStyle iterStyle; IterationStyle iterStyle;
StringStyle tokenStyle; StringStyle stringStyle;
} }
unittest unittest