ir2obj cache: Add all cmdline args to the hash, unless we are certain they can safely be ignored for ir2obj hashing.

This commit is contained in:
Johan Engelen 2016-09-19 20:37:22 +02:00
parent 197c81439d
commit f6919e3fb6
7 changed files with 181 additions and 20 deletions

View file

@ -17,6 +17,9 @@
namespace opts { namespace opts {
// This vector is filled by parseCommandLine in main.cpp.
llvm::SmallVector<const char *, 32> allArguments;
/* Option parser that defaults to zero when no explicit number is given. /* Option parser that defaults to zero when no explicit number is given.
* i.e.: -cov --> value = 0 * i.e.: -cov --> value = 0
* -cov=9 --> value = 9 * -cov=9 --> value = 9
@ -198,7 +201,7 @@ static cl::opt<bool, true> unittest("unittest",
cl::opt<std::string> cl::opt<std::string>
ir2objCacheDir("ir2obj-cache", cl::desc("Use <cache dir> to cache object files for whole IR modules (experimental)"), ir2objCacheDir("ir2obj-cache", cl::desc("Use <cache dir> to cache object files for whole IR modules (experimental)"),
cl::value_desc("cache dir"), cl::Prefix); cl::value_desc("cache dir"));
static StringsAdapter strImpPathStore("J", global.params.fileImppath); static StringsAdapter strImpPathStore("J", global.params.fileImppath);
static cl::list<std::string, StringsAdapter> static cl::list<std::string, StringsAdapter>

View file

@ -17,6 +17,7 @@
#include "driver/targetmachine.h" #include "driver/targetmachine.h"
#include "gen/cl_helpers.h" #include "gen/cl_helpers.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CodeGen.h" #include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include <deque> #include <deque>
@ -28,6 +29,10 @@
namespace opts { namespace opts {
namespace cl = llvm::cl; namespace cl = llvm::cl;
/// Stores the commandline arguments list, including the ones specified by the
/// config and response files.
extern llvm::SmallVector<const char *, 32> allArguments;
/* Mostly generated with the following command: /* Mostly generated with the following command:
egrep -e '^(cl::|#if|#e)' gen/cl_options.cpp \ egrep -e '^(cl::|#if|#e)' gen/cl_options.cpp \
| sed -re 's/^(cl::.*)\(.*$/ extern \1;/' | sed -re 's/^(cl::.*)\(.*$/ extern \1;/'

View file

@ -127,6 +127,104 @@ void storeCacheFileName(llvm::StringRef cacheObjectHash,
llvm::sys::path::append(filePath, llvm::Twine("ircache_") + cacheObjectHash + llvm::sys::path::append(filePath, llvm::Twine("ircache_") + cacheObjectHash +
"." + global.obj_ext); "." + global.obj_ext);
} }
// Output to `hash_os` all commandline flags, and try to skip the ones that have
// no influence on the object code output. The cmdline flags need to be added
// to the ir2obj cache hash to uniquely identify the object file output.
// Because the compiler version is part of the hash, differences in the
// default settings between compiler versions are already taken care of.
// (Note: config and response files may also add compiler flags.)
void outputIR2ObjRelevantCmdlineArgs(llvm::raw_ostream &hash_os)
{
// Use a "whitelist" of cmdline args that do not need to be added to the hash,
// and add all others. There is no harm (other than missed cache
// opportunities) in adding commandline arguments that also change the hashed
// IR, which simplifies the code here.
// The code does not deal well with options specified without equals sign, and
// will add those to the hash, resulting in missed cache opportunities.
for (const char *arg : opts::allArguments) {
if (!arg || !arg[0])
continue;
// Out of pre-caution, all arguments that are not prefixed with '-' are
// added to the hash. Such an argument could be a source file "foo.d", but
// also a value for the previous argument when the equals sign is omitted,
// for example: "-code-model default" becomes "-code-model" "default".
// It results in missed cache opportunities. :(
if (arg[0] == '-') {
if (arg[1] == 'O') {
// We deal with -O later ("-O" and "-O3" should hash equally, "" and
// "-O0" too)
continue;
}
if (arg[1] == 'c' && !arg[2])
continue;
// All options starting with these characters can be ignored (LLVM does
// not have options starting with capitals)
if (arg[1] == 'D' || arg[1] == 'H' || arg[1] == 'I' || arg[1] == 'J' ||
arg[1] == 'L' || arg[1] == 'X')
continue;
if (arg[1] == 'd' || arg[1] == 'v' || arg[1] == 'w') {
// LLVM options are long, so short options starting with 'v' or 'w' can
// be ignored.
unsigned len = 2;
for (; len < 11; ++len)
if (!arg[len])
break;
if (len < 11)
continue;
}
// "-of..." can be ignored
if (arg[1] == 'o' && arg[2] == 'f')
continue;
// "-od..." can be ignored
if (arg[1] == 'o' && arg[2] == 'd')
continue;
// All "-ir2..." options can be ignored
if (arg[1] == 'i' && arg[2] == 'r' && arg[3] == '2')
continue;
// All effects of -d-version... are already included in the IR hash.
if (strncmp(arg+1, "d-version", 9) == 0)
continue;
// All effects of -unittest are already included in the IR hash.
if (strcmp(arg + 1, "unittest") == 0) {
continue;
}
// All arguments following -run can safely be ignored
if (strcmp(arg + 1, "run") == 0) {
break;
}
}
// If we reach here, add the argument to the hash.
hash_os << arg;
}
// Adding these options to the hash should not be needed after adding all
// cmdline args. We keep this code here however, in case we find a different
// solution for dealing with LLVM commandline flags. See GH #1773.
// Also, having these options explicitly added to the hash protects against
// the possibility of different default settings on different platforms (while
// sharing the cache).
outputOptimizationSettings(hash_os);
hash_os << opts::mCPU;
for (auto &attr : opts::mAttrs) {
hash_os << attr;
}
hash_os << opts::mFloatABI;
hash_os << opts::mRelocModel;
hash_os << opts::mCodeModel;
hash_os << opts::disableFpElim;
}
// Output to `hash_os` all environment flags that influence object code output
// in ways that are not observable in the pre-LLVM passes IR used for hashing.
void outputIR2ObjRelevantEnvironmentOpts(llvm::raw_ostream &hash_os)
{
// There are no relevant environment options at the moment.
}
} }
namespace ir2obj { namespace ir2obj {
@ -138,17 +236,11 @@ void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
hash_os << global.ldc_version << global.version << global.llvm_version hash_os << global.ldc_version << global.version << global.llvm_version
<< ldc::built_with_Dcompiler_version; << ldc::built_with_Dcompiler_version;
// Let hash depend on a few compile flags that change the outputted obj file, // Let hash depend on compile flags that change the outputted obj file,
// but whose changes are not always observable in the IR: // but whose changes are not always observable in the pre-optimized IR used
hash_os << codeGenOptLevel(); // for hashing:
hash_os << opts::mCPU; outputIR2ObjRelevantCmdlineArgs(hash_os);
for (auto &attr : opts::mAttrs) { outputIR2ObjRelevantEnvironmentOpts(hash_os);
hash_os << attr;
}
hash_os << opts::mFloatABI;
hash_os << opts::mRelocModel;
hash_os << opts::mCodeModel;
hash_os << opts::disableFpElim;
llvm::WriteBitcodeToFile(m, hash_os); llvm::WriteBitcodeToFile(m, hash_os);
hash_os.resultAsString(str); hash_os.resultAsString(str);

View file

@ -401,18 +401,17 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
global.params.moduleDepsFile = nullptr; global.params.moduleDepsFile = nullptr;
// Build combined list of command line arguments. // Build combined list of command line arguments.
llvm::SmallVector<const char *, 32> final_args; opts::allArguments.push_back(argv[0]);
final_args.push_back(argv[0]);
ConfigFile cfg_file; ConfigFile cfg_file;
const char *explicitConfFile = tryGetExplicitConfFile(argc, argv); const char *explicitConfFile = tryGetExplicitConfFile(argc, argv);
std::string cfg_triple = tryGetExplicitTriple(argc, argv).getTriple(); std::string cfg_triple = tryGetExplicitTriple(argc, argv).getTriple();
// just ignore errors for now, they are still printed // just ignore errors for now, they are still printed
cfg_file.read(explicitConfFile, cfg_triple.c_str()); cfg_file.read(explicitConfFile, cfg_triple.c_str());
final_args.insert(final_args.end(), cfg_file.switches_begin(), opts::allArguments.insert(opts::allArguments.end(), cfg_file.switches_begin(),
cfg_file.switches_end()); cfg_file.switches_end());
final_args.insert(final_args.end(), &argv[1], &argv[argc]); opts::allArguments.insert(opts::allArguments.end(), &argv[1], &argv[argc]);
cl::SetVersionPrinter(&printVersion); cl::SetVersionPrinter(&printVersion);
hideLLVMOptions(); hideLLVMOptions();
@ -429,11 +428,11 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
cl::TokenizeGNUCommandLine cl::TokenizeGNUCommandLine
#endif #endif
, ,
final_args); opts::allArguments);
#endif #endif
cl::ParseCommandLineOptions(final_args.size(), cl::ParseCommandLineOptions(opts::allArguments.size(),
const_cast<char **>(final_args.data()), const_cast<char **>(opts::allArguments.data()),
"LDC - the LLVM D compiler\n"); "LDC - the LLVM D compiler\n");
helpOnly = mCPU == "help" || helpOnly = mCPU == "help" ||

View file

@ -455,3 +455,22 @@ void verifyModule(llvm::Module *m) {
} }
Logger::println("Verification passed!"); Logger::println("Verification passed!");
} }
// Output to `hash_os` all optimization settings that influence object code output
// and that are not observable in the IR.
// This is used to calculate the hash use for caching that uniquely identifies
// the object file output.
void outputOptimizationSettings(llvm::raw_ostream &hash_os) {
hash_os << optimizeLevel;
hash_os << willInline();
hash_os << disableLangSpecificPasses;
hash_os << disableSimplifyDruntimeCalls;
hash_os << disableSimplifyLibCalls;
hash_os << disableGCToStack;
hash_os << unitAtATime;
hash_os << stripDebug;
hash_os << opts::sanitize;
hash_os << disableLoopUnrolling;
hash_os << disableLoopVectorization;
hash_os << disableSLPVectorization;
}

View file

@ -20,6 +20,10 @@
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
namespace llvm {
class raw_ostream;
}
namespace opts { namespace opts {
enum SanitizerCheck { enum SanitizerCheck {
@ -49,4 +53,6 @@ llvm::CodeGenOpt::Level codeGenOptLevel();
void verifyModule(llvm::Module *m); void verifyModule(llvm::Module *m);
void outputOptimizationSettings(llvm::raw_ostream &hash_os);
#endif #endif

View file

@ -0,0 +1,37 @@
// Test that certain cmdline flags result in different ir2obj cache objects, even though the LLVM IR may be the same.
// Note that the NO_HIT tests should change the default setting of the tested flag.
// Create and then empty the cache for correct testing when running the test multiple times.
// RUN: %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache \
// RUN: && %prunecache -f %T/flag1cache --max-bytes=1 \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O3 -vv | FileCheck --check-prefix=MUST_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O2 -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O4 -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O5 -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Os -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Oz -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-d-passes -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-drtcalls -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-libcalls -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-gc2stack -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -enable-inlining -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unit-at-a-time=false -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -strip-debug -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-unrolling -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-slp-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vectorize-loops -vv | FileCheck --check-prefix=NO_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -v -wi -d -vv | FileCheck --check-prefix=MUST_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -D -H -I. -J. -vv | FileCheck --check-prefix=MUST_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -d-version=Irrelevant -vv | FileCheck --check-prefix=MUST_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unittest -vv | FileCheck --check-prefix=MUST_HIT %s \
// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=MUST_HIT %s
// The last test is a MUST_HIT test, to make sure that the cache wasn't pruned somehow, which could effectively disable some NO_HIT tests.
// MUST_HIT: Cache object found!
// NO_HIT-NOT: Cache object found!
void foo() {}