From 8a444fbd8910ababfaf34f7c0a881f96f2929a66 Mon Sep 17 00:00:00 2001 From: Hackerpilot Date: Mon, 20 Jan 2014 20:09:32 -0800 Subject: [PATCH] Initial static analysis checks --- README.md | 32 +++++++++++++- analysis/base.d | 37 ++++++++++++++++ analysis/enumarrayliteral.d | 72 +++++++++++++++++++++++++++++++ analysis/package.d | 6 +++ analysis/pokemon.d | 52 ++++++++++++++++++++++ analysis/run.d | 73 +++++++++++++++++++++++++++++++ style.d => analysis/style.d | 49 ++++++++------------- build.sh | 4 +- main.d | 10 +++-- stdx/d/lexer.d | 86 +++++++++++++++++++++---------------- 10 files changed, 345 insertions(+), 76 deletions(-) create mode 100644 analysis/base.d create mode 100644 analysis/enumarrayliteral.d create mode 100644 analysis/package.d create mode 100644 analysis/pokemon.d create mode 100644 analysis/run.d rename style.d => analysis/style.d (58%) diff --git a/README.md b/README.md index 35bc363..2d1a455 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ # Overview DScanner is a tool for analyzing D source code +### Building and installing +To build DScanner, run the build.sh script (or the build.bat file on Windows). +The build time can be rather long with the -inline flag (over 2 minutes on an +i7 processor), so you may wish to remove it from the build script. To install, +simply place the generated binary somewhere on your $PATH. + # Usage The following examples assume that we are analyzing a simple file called helloworld.d @@ -29,8 +35,30 @@ while lexing or parsing the given source file. It does not do any semantic analysis and it does not compile the code. ### Style Check -The "--styleCheck" option checks the names of packages, variables, enums, -classes, and other things for consistency with the Phobos style guide. +The "--styleCheck" option runs some basic static analysis checks against the +given source files. + +#### Implemented checks +* Old alias syntax (i.e "alias a b;" should be replaced with "alias b = a;"). +* Implicit concatenation of string literals. +* Complex number literals (e.g. "1.23i"). +* Empty declarations (i.e. random ";" characters) +* enum array literals in struct/class bodies +* Avoid Pokémon exception handling + +#### Wishlish +* Assigning to foreach variables that are not "ref". +* opCmp or opEquals, or toHash not declared "const". +* Unused variables. +* Unused imports. +* Unused parameters (check is skipped if function is marked "override") +* Struct constructors that have a single parameter that has a default argument. +* Variables that are never modified and not declared immutable. +* Public declarations not documented +* Format numbers for readability +* Declaring opEquals without toHash +* Assignment in conditionals +* delete keyword is deprecated ### Line of Code Count The "--sloc" or "-l" option prints the number of lines of code in the file. diff --git a/analysis/base.d b/analysis/base.d new file mode 100644 index 0000000..a5d6ae9 --- /dev/null +++ b/analysis/base.d @@ -0,0 +1,37 @@ +module analysis.base; + +import std.string; +import stdx.d.ast; + +abstract class BaseAnalyzer : ASTVisitor +{ +public: + this(string fileName) + { + this.fileName = fileName; + } + + string[] messages() + { + return _messages; + } + +protected: + + import core.vararg; + + void addErrorMessage(size_t line, size_t column, string message) + { + _messages ~= format("%s(%d:%d)[warn]: %s", fileName, line, column, message); + } + + /** + * The file name + */ + string fileName; + + /** + * Map of file names to warning messages for that file + */ + string[] _messages; +} diff --git a/analysis/enumarrayliteral.d b/analysis/enumarrayliteral.d new file mode 100644 index 0000000..93d7458 --- /dev/null +++ b/analysis/enumarrayliteral.d @@ -0,0 +1,72 @@ +// Copyright Brian Schott (Sir Alaran) 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module analysis.enumarrayliteral; + +import stdx.d.ast; +import stdx.d.lexer; +import analysis.base; + +void doNothing(string, size_t, size_t, string, bool) {} + +class EnumArrayLiteralCheck : BaseAnalyzer +{ + alias visit = BaseAnalyzer.visit; + + this(string fileName) + { + super(fileName); + } + + bool inAggregate = false; + bool looking = false; + + template visitTemplate(T) + { + override void visit(T structDec) + { + inAggregate = true; + structDec.accept(this); + inAggregate = false; + } + } + + mixin visitTemplate!ClassDeclaration; + mixin visitTemplate!InterfaceDeclaration; + mixin visitTemplate!UnionDeclaration; + mixin visitTemplate!StructDeclaration; + + override void visit(Declaration dec) + { + if (inAggregate) foreach (attr; dec.attributes) + { + if (attr.storageClass !is null && + attr.storageClass.token == tok!"enum") + { + looking = true; + } + } + dec.accept(this); + looking = false; + } + + override void visit(AutoDeclaration autoDec) + { + if (looking) + { + foreach (i, initializer; autoDec.initializers) + { + if (initializer is null) continue; + if (initializer.nonVoidInitializer is null) continue; + if (initializer.nonVoidInitializer.arrayInitializer is null) continue; + addErrorMessage(autoDec.identifiers[i].line, + autoDec.identifiers[i].column, "This enum may lead to " + ~ "unnecessary allocation at run-time. Use 'static immutable " + ~ autoDec.identifiers[i].text ~ " = [ ...' instead."); + } + } + autoDec.accept(this); + } +} diff --git a/analysis/package.d b/analysis/package.d new file mode 100644 index 0000000..225b82e --- /dev/null +++ b/analysis/package.d @@ -0,0 +1,6 @@ +module analysis; + +public import analysis.style; +public import analysis.enumarrayliteral; +public import analysis.pokemon; +public import analysis.base; diff --git a/analysis/pokemon.d b/analysis/pokemon.d new file mode 100644 index 0000000..1840f8b --- /dev/null +++ b/analysis/pokemon.d @@ -0,0 +1,52 @@ +// Copyright Brian Schott (Sir Alaran) 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module analysis.pokemon; + +import stdx.d.ast; +import stdx.d.lexer; +import analysis.base; + +/** + * Checks for Pokémon exception handling, i.e. "gotta' catch 'em all". + * + * --- + * catch (Exception e) + * ... + * --- + */ +class PokemonExceptionCheck : BaseAnalyzer +{ + alias visit = BaseAnalyzer.visit; + + this(string fileName) + { + super(fileName); + } + + override void visit(Catch c) + { + if (c.type.type2.symbol.identifierOrTemplateChain.identifiersOrTemplateInstances.length != 1) + { + c.accept(this); + return; + } + auto identOrTemplate = c.type.type2.symbol.identifierOrTemplateChain.identifiersOrTemplateInstances[0]; + if (identOrTemplate.templateInstance !is null) + { + c.accept(this); + return; + } + if (identOrTemplate.identifier.text == "Exception" + || identOrTemplate.identifier.text == "Throwable" + || identOrTemplate.identifier.text == "Error") + { + immutable column = identOrTemplate.identifier.column; + immutable line = identOrTemplate.identifier.line; + addErrorMessage(line, column, "Avoid catching Exception, Error, and Throwable"); + } + c.accept(this); + } +} diff --git a/analysis/run.d b/analysis/run.d new file mode 100644 index 0000000..19fa961 --- /dev/null +++ b/analysis/run.d @@ -0,0 +1,73 @@ +module analysis.run; + +import std.stdio; +import std.array; +import std.conv; +import std.algorithm; +import std.range; +import std.array; + +import stdx.d.lexer; +import stdx.d.parser; +import stdx.d.ast; + +import analysis.base; +import analysis.style; +import analysis.enumarrayliteral; +import analysis.pokemon; + +void messageFunction(string fileName, size_t line, size_t column, string message, + bool isError) +{ + writefln("%s(%d:%d)[%s]: %s", fileName, line, column, + isError ? "error" : "warn", message); +} + +void syntaxCheck(File output, string[] fileNames) +{ + analyze(output, fileNames, false); +} + +void analyze(File output, string[] fileNames, bool staticAnalyze = true) +{ + foreach (fileName; fileNames) + { + File f = File(fileName); + auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size)); + f.rawRead(bytes); + auto lexer = byToken(bytes); + auto app = appender!(typeof(lexer.front)[])(); + while (!lexer.empty) + { + app.put(lexer.front); + lexer.popFront(); + } + + foreach (message; lexer.messages) + { + messageFunction(fileName, message.line, message.column, message.message, + message.isError); + } + + Module m = parseModule(app.data, fileName, &messageFunction); + + if (!staticAnalyze) + return; + + auto style = new StyleChecker(fileName); + style.visit(m); + + auto enums = new EnumArrayLiteralCheck(fileName); + enums.visit(m); + + auto pokemon = new PokemonExceptionCheck(fileName); + pokemon.visit(m); + + foreach (message; sort(chain(enums.messages, style.messages, + pokemon.messages).array)) + { + writeln(message); + } + } +} + diff --git a/style.d b/analysis/style.d similarity index 58% rename from style.d rename to analysis/style.d index 6e984fa..007e8db 100644 --- a/style.d +++ b/analysis/style.d @@ -3,48 +3,35 @@ // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -module style; +module analysis.style; import stdx.d.ast; import stdx.d.lexer; -import stdx.d.parser; -import std.stdio; import std.regex; import std.array; import std.conv; +import std.format; -// TODO: Warn on assigning to non-ref foreach item. +import analysis.base; -void doNothing(string, size_t, size_t, string, bool) {} - -void styleCheck(File output, string[] fileNames) -{ - foreach (fileName; fileNames) - { - File f = File(fileName); - auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size)); - f.rawRead(bytes); - auto tokens = byToken(bytes); - Module m = parseModule(tokens.array, fileName, &doNothing); - auto checker = new StyleChecker; - checker.fileName = fileName; - checker.visit(m); - } -} - -class StyleChecker : ASTVisitor +class StyleChecker : BaseAnalyzer { enum varFunNameRegex = `^([\p{Ll}_][_\w\d]*|[\p{Lu}\d_]+)$`; enum aggregateNameRegex = `^\p{Lu}[\w\d]*$`; enum moduleNameRegex = `^\p{Ll}+$`; + this(string fileName) + { + super(fileName); + } + override void visit(ModuleDeclaration dec) { foreach (part; dec.moduleName.identifiers) { if (part.text.matchFirst(moduleNameRegex).length == 0) - writeln(fileName, "(", part.line, ":", part.column, ") ", - "Module/package name ", part.text, " does not match style guidelines"); + addErrorMessage(part.line, part.column, "Module/package name " + ~ part.text ~ " does not match style guidelines"); } } @@ -61,8 +48,8 @@ class StyleChecker : ASTVisitor void checkLowercaseName(string type, ref Token name) { if (name.text.matchFirst(varFunNameRegex).length == 0) - writeln(fileName, "(", name.line, ":", name.column, ") ", - type, " name ", name.text, " does not match style guidelines"); + addErrorMessage(name.line, name.column, type ~ " name " + ~ name.text ~ " does not match style guidelines"); } override void visit(ClassDeclaration dec) @@ -91,14 +78,12 @@ class StyleChecker : ASTVisitor dec.accept(this); } - void checkAggregateName(string aggregateType, ref Token name) + void checkAggregateName(string aggregateType, ref const Token name) { if (name.text.matchFirst(aggregateNameRegex).length == 0) - writeln(fileName, "(", name.line, ":", name.column, ") ", - aggregateType, " name ", name.text, - " does not match style guidelines"); + addErrorMessage(name.line, name.column, aggregateType + ~ " name '" ~ name.text ~ "' does not match style guidelines"); } - alias ASTVisitor.visit visit; - string fileName; + alias visit = ASTVisitor.visit; } diff --git a/build.sh b/build.sh index 3a81079..297dd12 100755 --- a/build.sh +++ b/build.sh @@ -7,12 +7,12 @@ dmd\ astprinter.d\ formatter.d\ outliner.d\ - style.d\ stdx/*.d\ stdx/d/*.d\ + analysis/*.d\ -ofdscanner\ -m64\ - -O -release -noboundscheck + -O -release -inline -noboundscheck #gdc\ # main.d\ diff --git a/main.d b/main.d index 41d2210..b48f67f 100644 --- a/main.d +++ b/main.d @@ -23,7 +23,7 @@ import ctags; import astprinter; import imports; import outliner; -import style; +import analysis.run; int main(string[] args) { @@ -123,7 +123,11 @@ int main(string[] args) } else if (styleCheck) { - stdout.styleCheck(expandArgs(args, recursive)); + stdout.analyze(expandArgs(args, recursive)); + } + else if (syntaxCheck) + { + stdout.syntaxCheck(expandArgs(args, recursive)); } else { @@ -162,7 +166,7 @@ int main(string[] args) writefln("total:\t%d", count); } } - else if (syntaxCheck || imports || ast || outline) + else if (imports || ast || outline) { auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1])); auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]); diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d index 6cb620e..fb21436 100644 --- a/stdx/d/lexer.d +++ b/stdx/d/lexer.d @@ -50,6 +50,38 @@ private enum dynamicTokens = [ "dstringLiteral", "stringLiteral", "wstringLiteral", "scriptLine" ]; +private enum pseudoTokenHandlers = [ + "\"", "lexStringLiteral", + "`", "lexWysiwygString", + "//", "lexSlashSlashComment", + "/*", "lexSlashStarComment", + "/+", "lexSlashPlusComment", + ".", "lexDot", + "'", "lexCharacterLiteral", + "0", "lexNumber", + "1", "lexDecimal", + "2", "lexDecimal", + "3", "lexDecimal", + "4", "lexDecimal", + "5", "lexDecimal", + "6", "lexDecimal", + "7", "lexDecimal", + "8", "lexDecimal", + "9", "lexDecimal", + "q\"", "lexDelimitedString", + "q{", "lexTokenString", + "r\"", "lexWysiwygString", + "x\"", "lexHexString", + " ", "lexWhitespace", + "\t", "lexWhitespace", + "\r", "lexWhitespace", + "\n", "lexWhitespace", + "\u2028", "lexLongNewline", + "\u2029", "lexLongNewline", + "#!", "lexScriptLine", + "#line", "lexSpecialTokenSequence" +]; + public alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens); public alias str = tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens); public template tok(string token) @@ -373,38 +405,6 @@ public struct DLexer { import core.vararg; - private enum pseudoTokenHandlers = [ - "\"", "lexStringLiteral", - "`", "lexWysiwygString", - "//", "lexSlashSlashComment", - "/*", "lexSlashStarComment", - "/+", "lexSlashPlusComment", - ".", "lexDot", - "'", "lexCharacterLiteral", - "0", "lexNumber", - "1", "lexDecimal", - "2", "lexDecimal", - "3", "lexDecimal", - "4", "lexDecimal", - "5", "lexDecimal", - "6", "lexDecimal", - "7", "lexDecimal", - "8", "lexDecimal", - "9", "lexDecimal", - "q\"", "lexDelimitedString", - "q{", "lexTokenString", - "r\"", "lexWysiwygString", - "x\"", "lexHexString", - " ", "lexWhitespace", - "\t", "lexWhitespace", - "\r", "lexWhitespace", - "\n", "lexWhitespace", - "\u2028", "lexLongNewline", - "\u2029", "lexLongNewline", - "#!", "lexScriptLine", - "#line", "lexSpecialTokenSequence" - ]; - mixin Lexer!(IdType, Token, lexIdentifier, staticTokens, dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens); @@ -1356,7 +1356,7 @@ public struct DLexer } else { - error("Error: Expected ' to end character literal ", cast(char) range.front); + error("Error: Expected ' to end character literal"); return Token(); } } @@ -1447,14 +1447,26 @@ public struct DLexer auto mark = range.mark(); }; - void error(...) pure nothrow @safe { - + void error(string message) pure nothrow @safe + { + messages ~= Message(range.line, range.column, message, true); } - void warning(...) pure nothrow @safe { - + void warning(string message) pure nothrow @safe + { + messages ~= Message(range.line, range.column, message, false); + assert (messages.length > 0); } + struct Message + { + size_t line; + size_t column; + string message; + bool isError; + } + + Message[] messages; StringCache* cache; LexerConfig config; }