From 8a444fbd8910ababfaf34f7c0a881f96f2929a66 Mon Sep 17 00:00:00 2001
From: Hackerpilot <briancschott@gmail.com>
Date: Mon, 20 Jan 2014 20:09:32 -0800
Subject: [PATCH] Initial static analysis checks

---
 README.md                   | 32 +++++++++++++-
 analysis/base.d             | 37 ++++++++++++++++
 analysis/enumarrayliteral.d | 72 +++++++++++++++++++++++++++++++
 analysis/package.d          |  6 +++
 analysis/pokemon.d          | 52 ++++++++++++++++++++++
 analysis/run.d              | 73 +++++++++++++++++++++++++++++++
 style.d => analysis/style.d | 49 ++++++++-------------
 build.sh                    |  4 +-
 main.d                      | 10 +++--
 stdx/d/lexer.d              | 86 +++++++++++++++++++++----------------
 10 files changed, 345 insertions(+), 76 deletions(-)
 create mode 100644 analysis/base.d
 create mode 100644 analysis/enumarrayliteral.d
 create mode 100644 analysis/package.d
 create mode 100644 analysis/pokemon.d
 create mode 100644 analysis/run.d
 rename style.d => analysis/style.d (58%)

diff --git a/README.md b/README.md
index 35bc363..2d1a455 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # Overview
 DScanner is a tool for analyzing D source code
 
+### Building and installing
+To build DScanner, run the build.sh script (or the build.bat file on Windows).
+The build time can be rather long with the -inline flag (over 2 minutes on an
+i7 processor), so you may wish to remove it from the build script. To install,
+simply place the generated binary somewhere on your $PATH.
+
 # Usage
 The following examples assume that we are analyzing a simple file called helloworld.d
 
@@ -29,8 +35,30 @@ while lexing or parsing the given source file. It does not do any semantic
 analysis and it does not compile the code.
 
 ### Style Check
-The "--styleCheck" option checks the names of packages, variables, enums,
-classes, and other things for consistency with the Phobos style guide.
+The "--styleCheck" option runs some basic static analysis checks against the
+given source files.
+
+#### Implemented checks
+* Old alias syntax (i.e "alias a b;" should be replaced with "alias b = a;").
+* Implicit concatenation of string literals.
+* Complex number literals (e.g. "1.23i").
+* Empty declarations (i.e. random ";" characters)
+* enum array literals in struct/class bodies
+* Avoid Pokémon exception handling
+
+#### Wishlish
+* Assigning to foreach variables that are not "ref".
+* opCmp or opEquals, or toHash not declared "const".
+* Unused variables.
+* Unused imports.
+* Unused parameters (check is skipped if function is marked "override")
+* Struct constructors that have a single parameter that has a default argument.
+* Variables that are never modified and not declared immutable.
+* Public declarations not documented
+* Format numbers for readability
+* Declaring opEquals without toHash
+* Assignment in conditionals
+* delete keyword is deprecated
 
 ### Line of Code Count
 The "--sloc" or "-l" option prints the number of lines of code in the file.
diff --git a/analysis/base.d b/analysis/base.d
new file mode 100644
index 0000000..a5d6ae9
--- /dev/null
+++ b/analysis/base.d
@@ -0,0 +1,37 @@
+module analysis.base;
+
+import std.string;
+import stdx.d.ast;
+
+abstract class BaseAnalyzer : ASTVisitor
+{
+public:
+	this(string fileName)
+	{
+		this.fileName = fileName;
+	}
+
+	string[] messages()
+	{
+		return _messages;
+	}
+
+protected:
+
+	import core.vararg;
+
+	void addErrorMessage(size_t line, size_t column, string message)
+	{
+		_messages ~= format("%s(%d:%d)[warn]: %s", fileName, line, column, message);
+	}
+
+	/**
+	 * The file name
+	 */
+	string fileName;
+
+	/**
+	 * Map of file names to warning messages for that file
+	 */
+	string[] _messages;
+}
diff --git a/analysis/enumarrayliteral.d b/analysis/enumarrayliteral.d
new file mode 100644
index 0000000..93d7458
--- /dev/null
+++ b/analysis/enumarrayliteral.d
@@ -0,0 +1,72 @@
+//          Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.enumarrayliteral;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+void doNothing(string, size_t, size_t, string, bool) {}
+
+class EnumArrayLiteralCheck : BaseAnalyzer
+{
+	alias visit = BaseAnalyzer.visit;
+
+	this(string fileName)
+	{
+		super(fileName);
+	}
+
+	bool inAggregate = false;
+	bool looking = false;
+
+	template visitTemplate(T)
+	{
+		override void visit(T structDec)
+		{
+			inAggregate = true;
+			structDec.accept(this);
+			inAggregate = false;
+		}
+	}
+
+	mixin visitTemplate!ClassDeclaration;
+	mixin visitTemplate!InterfaceDeclaration;
+	mixin visitTemplate!UnionDeclaration;
+	mixin visitTemplate!StructDeclaration;
+
+	override void visit(Declaration dec)
+	{
+		if (inAggregate) foreach (attr; dec.attributes)
+		{
+			if (attr.storageClass !is null &&
+				attr.storageClass.token == tok!"enum")
+			{
+				looking = true;
+			}
+		}
+		dec.accept(this);
+		looking = false;
+	}
+
+	override void visit(AutoDeclaration autoDec)
+	{
+		if (looking)
+		{
+			foreach (i, initializer; autoDec.initializers)
+			{
+				if (initializer is null) continue;
+				if (initializer.nonVoidInitializer is null) continue;
+				if (initializer.nonVoidInitializer.arrayInitializer is null) continue;
+				addErrorMessage(autoDec.identifiers[i].line,
+					autoDec.identifiers[i].column, "This enum may lead to "
+					~ "unnecessary allocation at run-time. Use 'static immutable "
+					~ autoDec.identifiers[i].text ~ " = [ ...' instead.");
+			}
+		}
+		autoDec.accept(this);
+	}
+}
diff --git a/analysis/package.d b/analysis/package.d
new file mode 100644
index 0000000..225b82e
--- /dev/null
+++ b/analysis/package.d
@@ -0,0 +1,6 @@
+module analysis;
+
+public import analysis.style;
+public import analysis.enumarrayliteral;
+public import analysis.pokemon;
+public import analysis.base;
diff --git a/analysis/pokemon.d b/analysis/pokemon.d
new file mode 100644
index 0000000..1840f8b
--- /dev/null
+++ b/analysis/pokemon.d
@@ -0,0 +1,52 @@
+//          Copyright Brian Schott (Sir Alaran) 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+module analysis.pokemon;
+
+import stdx.d.ast;
+import stdx.d.lexer;
+import analysis.base;
+
+/**
+ * Checks for Pokémon exception handling, i.e. "gotta' catch 'em all".
+ *
+ * ---
+ * catch (Exception e)
+ * ...
+ * ---
+ */
+class PokemonExceptionCheck : BaseAnalyzer
+{
+	alias visit = BaseAnalyzer.visit;
+
+	this(string fileName)
+	{
+		super(fileName);
+	}
+
+	override void visit(Catch c)
+	{
+		if (c.type.type2.symbol.identifierOrTemplateChain.identifiersOrTemplateInstances.length != 1)
+		{
+			c.accept(this);
+			return;
+		}
+		auto identOrTemplate = c.type.type2.symbol.identifierOrTemplateChain.identifiersOrTemplateInstances[0];
+		if (identOrTemplate.templateInstance !is null)
+		{
+			c.accept(this);
+			return;
+		}
+		if (identOrTemplate.identifier.text == "Exception"
+			|| identOrTemplate.identifier.text == "Throwable"
+			|| identOrTemplate.identifier.text == "Error")
+		{
+			immutable column = identOrTemplate.identifier.column;
+			immutable line = identOrTemplate.identifier.line;
+			addErrorMessage(line, column, "Avoid catching Exception, Error, and Throwable");
+		}
+		c.accept(this);
+	}
+}
diff --git a/analysis/run.d b/analysis/run.d
new file mode 100644
index 0000000..19fa961
--- /dev/null
+++ b/analysis/run.d
@@ -0,0 +1,73 @@
+module analysis.run;
+
+import std.stdio;
+import std.array;
+import std.conv;
+import std.algorithm;
+import std.range;
+import std.array;
+
+import stdx.d.lexer;
+import stdx.d.parser;
+import stdx.d.ast;
+
+import analysis.base;
+import analysis.style;
+import analysis.enumarrayliteral;
+import analysis.pokemon;
+
+void messageFunction(string fileName, size_t line, size_t column, string message,
+	bool isError)
+{
+	writefln("%s(%d:%d)[%s]: %s", fileName, line, column,
+		isError ? "error" : "warn", message);
+}
+
+void syntaxCheck(File output, string[] fileNames)
+{
+	analyze(output, fileNames, false);
+}
+
+void analyze(File output, string[] fileNames, bool staticAnalyze = true)
+{
+	foreach (fileName; fileNames)
+	{
+		File f = File(fileName);
+		auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size));
+		f.rawRead(bytes);
+		auto lexer = byToken(bytes);
+		auto app = appender!(typeof(lexer.front)[])();
+		while (!lexer.empty)
+		{
+			app.put(lexer.front);
+			lexer.popFront();
+		}
+
+		foreach (message; lexer.messages)
+		{
+			messageFunction(fileName, message.line, message.column, message.message,
+				message.isError);
+		}
+
+		Module m = parseModule(app.data, fileName, &messageFunction);
+
+		if (!staticAnalyze)
+			return;
+
+		auto style = new StyleChecker(fileName);
+		style.visit(m);
+
+		auto enums = new EnumArrayLiteralCheck(fileName);
+		enums.visit(m);
+
+		auto pokemon = new PokemonExceptionCheck(fileName);
+		pokemon.visit(m);
+
+		foreach (message; sort(chain(enums.messages, style.messages,
+			pokemon.messages).array))
+		{
+			writeln(message);
+		}
+	}
+}
+
diff --git a/style.d b/analysis/style.d
similarity index 58%
rename from style.d
rename to analysis/style.d
index 6e984fa..007e8db 100644
--- a/style.d
+++ b/analysis/style.d
@@ -3,48 +3,35 @@
 //    (See accompanying file LICENSE_1_0.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-module style;
+module analysis.style;
 
 import stdx.d.ast;
 import stdx.d.lexer;
-import stdx.d.parser;
-import std.stdio;
 import std.regex;
 import std.array;
 import std.conv;
+import std.format;
 
-// TODO: Warn on assigning to non-ref foreach item.
+import analysis.base;
 
-void doNothing(string, size_t, size_t, string, bool) {}
-
-void styleCheck(File output, string[] fileNames)
-{
-	foreach (fileName; fileNames)
-	{
-		File f = File(fileName);
-		auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size));
-		f.rawRead(bytes);
-		auto tokens = byToken(bytes);
-		Module m = parseModule(tokens.array, fileName, &doNothing);
-		auto checker = new StyleChecker;
-		checker.fileName = fileName;
-		checker.visit(m);
-	}
-}
-
-class StyleChecker : ASTVisitor
+class StyleChecker : BaseAnalyzer
 {
 	enum varFunNameRegex = `^([\p{Ll}_][_\w\d]*|[\p{Lu}\d_]+)$`;
 	enum aggregateNameRegex = `^\p{Lu}[\w\d]*$`;
 	enum moduleNameRegex = `^\p{Ll}+$`;
 
+	this(string fileName)
+	{
+		super(fileName);
+	}
+
 	override void visit(ModuleDeclaration dec)
 	{
 		foreach (part; dec.moduleName.identifiers)
 		{
 			if (part.text.matchFirst(moduleNameRegex).length == 0)
-				writeln(fileName, "(", part.line, ":", part.column, ") ",
-					"Module/package name ", part.text, " does not match style guidelines");
+				addErrorMessage(part.line, part.column, "Module/package name "
+					~ part.text ~ " does not match style guidelines");
 		}
 	}
 
@@ -61,8 +48,8 @@ class StyleChecker : ASTVisitor
 	void checkLowercaseName(string type, ref Token name)
 	{
 		if (name.text.matchFirst(varFunNameRegex).length == 0)
-			writeln(fileName, "(", name.line, ":", name.column, ") ",
-				type, " name ", name.text, " does not match style guidelines");
+			addErrorMessage(name.line, name.column, type ~ " name "
+				~ name.text ~ " does not match style guidelines");
 	}
 
 	override void visit(ClassDeclaration dec)
@@ -91,14 +78,12 @@ class StyleChecker : ASTVisitor
 		dec.accept(this);
 	}
 
-	void checkAggregateName(string aggregateType, ref Token name)
+	void checkAggregateName(string aggregateType, ref const Token name)
 	{
 		if (name.text.matchFirst(aggregateNameRegex).length == 0)
-			writeln(fileName, "(", name.line, ":", name.column, ") ",
-				aggregateType, " name ", name.text,
-				" does not match style guidelines");
+			addErrorMessage(name.line, name.column, aggregateType
+				~ " name '" ~ name.text ~ "' does not match style guidelines");
 	}
 
-	alias ASTVisitor.visit visit;
-	string fileName;
+	alias visit = ASTVisitor.visit;
 }
diff --git a/build.sh b/build.sh
index 3a81079..297dd12 100755
--- a/build.sh
+++ b/build.sh
@@ -7,12 +7,12 @@ dmd\
 	astprinter.d\
 	formatter.d\
 	outliner.d\
-	style.d\
 	stdx/*.d\
 	stdx/d/*.d\
+	analysis/*.d\
 	-ofdscanner\
 	-m64\
-	-O -release -noboundscheck
+	-O -release -inline -noboundscheck
 
 #gdc\
 #	main.d\
diff --git a/main.d b/main.d
index 41d2210..b48f67f 100644
--- a/main.d
+++ b/main.d
@@ -23,7 +23,7 @@ import ctags;
 import astprinter;
 import imports;
 import outliner;
-import style;
+import analysis.run;
 
 int main(string[] args)
 {
@@ -123,7 +123,11 @@ int main(string[] args)
 	}
 	else if (styleCheck)
 	{
-		stdout.styleCheck(expandArgs(args, recursive));
+		stdout.analyze(expandArgs(args, recursive));
+	}
+	else if (syntaxCheck)
+	{
+		stdout.syntaxCheck(expandArgs(args, recursive));
 	}
 	else
 	{
@@ -162,7 +166,7 @@ int main(string[] args)
 				writefln("total:\t%d", count);
 			}
 		}
-		else if (syntaxCheck || imports || ast || outline)
+		else if (imports || ast || outline)
 		{
 			auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
 			auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d
index 6cb620e..fb21436 100644
--- a/stdx/d/lexer.d
+++ b/stdx/d/lexer.d
@@ -50,6 +50,38 @@ private enum dynamicTokens = [
 	"dstringLiteral", "stringLiteral", "wstringLiteral", "scriptLine"
 ];
 
+private enum pseudoTokenHandlers = [
+	"\"", "lexStringLiteral",
+	"`", "lexWysiwygString",
+	"//", "lexSlashSlashComment",
+	"/*", "lexSlashStarComment",
+	"/+", "lexSlashPlusComment",
+	".", "lexDot",
+	"'", "lexCharacterLiteral",
+	"0", "lexNumber",
+	"1", "lexDecimal",
+	"2", "lexDecimal",
+	"3", "lexDecimal",
+	"4", "lexDecimal",
+	"5", "lexDecimal",
+	"6", "lexDecimal",
+	"7", "lexDecimal",
+	"8", "lexDecimal",
+	"9", "lexDecimal",
+	"q\"", "lexDelimitedString",
+	"q{", "lexTokenString",
+	"r\"", "lexWysiwygString",
+	"x\"", "lexHexString",
+	" ", "lexWhitespace",
+	"\t", "lexWhitespace",
+	"\r", "lexWhitespace",
+	"\n", "lexWhitespace",
+	"\u2028", "lexLongNewline",
+	"\u2029", "lexLongNewline",
+	"#!", "lexScriptLine",
+	"#line", "lexSpecialTokenSequence"
+];
+
 public alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens);
 public alias str = tokenStringRepresentation!(IdType, staticTokens, dynamicTokens, possibleDefaultTokens);
 public template tok(string token)
@@ -373,38 +405,6 @@ public struct DLexer
 {
 	import core.vararg;
 
-	private enum pseudoTokenHandlers = [
-		"\"", "lexStringLiteral",
-		"`", "lexWysiwygString",
-		"//", "lexSlashSlashComment",
-		"/*", "lexSlashStarComment",
-		"/+", "lexSlashPlusComment",
-		".", "lexDot",
-		"'", "lexCharacterLiteral",
-		"0", "lexNumber",
-		"1", "lexDecimal",
-		"2", "lexDecimal",
-		"3", "lexDecimal",
-		"4", "lexDecimal",
-		"5", "lexDecimal",
-		"6", "lexDecimal",
-		"7", "lexDecimal",
-		"8", "lexDecimal",
-		"9", "lexDecimal",
-		"q\"", "lexDelimitedString",
-		"q{", "lexTokenString",
-		"r\"", "lexWysiwygString",
-		"x\"", "lexHexString",
-		" ", "lexWhitespace",
-		"\t", "lexWhitespace",
-		"\r", "lexWhitespace",
-		"\n", "lexWhitespace",
-		"\u2028", "lexLongNewline",
-		"\u2029", "lexLongNewline",
-		"#!", "lexScriptLine",
-		"#line", "lexSpecialTokenSequence"
-	];
-
 	mixin Lexer!(IdType, Token, lexIdentifier, staticTokens,
 		dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens);
 
@@ -1356,7 +1356,7 @@ public struct DLexer
 		}
 		else
 		{
-			error("Error: Expected ' to end character literal ", cast(char) range.front);
+			error("Error: Expected ' to end character literal");
 			return Token();
 		}
 	}
@@ -1447,14 +1447,26 @@ public struct DLexer
 		auto mark = range.mark();
 	};
 
-	void error(...) pure nothrow @safe {
-
+	void error(string message) pure nothrow @safe
+	{
+		messages ~= Message(range.line, range.column, message, true);
 	}
 
-	void warning(...) pure nothrow @safe {
-
+	void warning(string message) pure nothrow @safe
+	{
+		messages ~= Message(range.line, range.column, message, false);
+		assert (messages.length > 0);
 	}
 
+	struct Message
+	{
+		size_t line;
+		size_t column;
+		string message;
+		bool isError;
+	}
+
+	Message[] messages;
 	StringCache* cache;
 	LexerConfig config;
 }