diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp index f032708eb7..4c0f502d21 100644 --- a/driver/cl_options.cpp +++ b/driver/cl_options.cpp @@ -17,6 +17,9 @@ namespace opts { +// This vector is filled by parseCommandLine in main.cpp. +llvm::SmallVector allArguments; + /* Option parser that defaults to zero when no explicit number is given. * i.e.: -cov --> value = 0 * -cov=9 --> value = 9 @@ -198,7 +201,7 @@ static cl::opt unittest("unittest", cl::opt ir2objCacheDir("ir2obj-cache", cl::desc("Use to cache object files for whole IR modules (experimental)"), - cl::value_desc("cache dir"), cl::Prefix); + cl::value_desc("cache dir")); static StringsAdapter strImpPathStore("J", global.params.fileImppath); static cl::list diff --git a/driver/cl_options.h b/driver/cl_options.h index 817a6b2fd3..ba46f75de2 100644 --- a/driver/cl_options.h +++ b/driver/cl_options.h @@ -17,6 +17,7 @@ #include "driver/targetmachine.h" #include "gen/cl_helpers.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include @@ -28,6 +29,10 @@ namespace opts { namespace cl = llvm::cl; +/// Stores the commandline arguments list, including the ones specified by the +/// config and response files. +extern llvm::SmallVector allArguments; + /* Mostly generated with the following command: egrep -e '^(cl::|#if|#e)' gen/cl_options.cpp \ | sed -re 's/^(cl::.*)\(.*$/ extern \1;/' diff --git a/driver/ir2obj_cache.cpp b/driver/ir2obj_cache.cpp index 0e2cd7a2bf..3949bbaa2d 100644 --- a/driver/ir2obj_cache.cpp +++ b/driver/ir2obj_cache.cpp @@ -127,6 +127,104 @@ void storeCacheFileName(llvm::StringRef cacheObjectHash, llvm::sys::path::append(filePath, llvm::Twine("ircache_") + cacheObjectHash + "." + global.obj_ext); } + +// Output to `hash_os` all commandline flags, and try to skip the ones that have +// no influence on the object code output. The cmdline flags need to be added +// to the ir2obj cache hash to uniquely identify the object file output. +// Because the compiler version is part of the hash, differences in the +// default settings between compiler versions are already taken care of. +// (Note: config and response files may also add compiler flags.) +void outputIR2ObjRelevantCmdlineArgs(llvm::raw_ostream &hash_os) +{ + // Use a "whitelist" of cmdline args that do not need to be added to the hash, + // and add all others. There is no harm (other than missed cache + // opportunities) in adding commandline arguments that also change the hashed + // IR, which simplifies the code here. + // The code does not deal well with options specified without equals sign, and + // will add those to the hash, resulting in missed cache opportunities. + for (const char *arg : opts::allArguments) { + if (!arg || !arg[0]) + continue; + + // Out of pre-caution, all arguments that are not prefixed with '-' are + // added to the hash. Such an argument could be a source file "foo.d", but + // also a value for the previous argument when the equals sign is omitted, + // for example: "-code-model default" becomes "-code-model" "default". + // It results in missed cache opportunities. :( + if (arg[0] == '-') { + if (arg[1] == 'O') { + // We deal with -O later ("-O" and "-O3" should hash equally, "" and + // "-O0" too) + continue; + } + if (arg[1] == 'c' && !arg[2]) + continue; + // All options starting with these characters can be ignored (LLVM does + // not have options starting with capitals) + if (arg[1] == 'D' || arg[1] == 'H' || arg[1] == 'I' || arg[1] == 'J' || + arg[1] == 'L' || arg[1] == 'X') + continue; + if (arg[1] == 'd' || arg[1] == 'v' || arg[1] == 'w') { + // LLVM options are long, so short options starting with 'v' or 'w' can + // be ignored. + unsigned len = 2; + for (; len < 11; ++len) + if (!arg[len]) + break; + if (len < 11) + continue; + } + // "-of..." can be ignored + if (arg[1] == 'o' && arg[2] == 'f') + continue; + // "-od..." can be ignored + if (arg[1] == 'o' && arg[2] == 'd') + continue; + // All "-ir2..." options can be ignored + if (arg[1] == 'i' && arg[2] == 'r' && arg[3] == '2') + continue; + // All effects of -d-version... are already included in the IR hash. + if (strncmp(arg+1, "d-version", 9) == 0) + continue; + // All effects of -unittest are already included in the IR hash. + if (strcmp(arg + 1, "unittest") == 0) { + continue; + } + + // All arguments following -run can safely be ignored + if (strcmp(arg + 1, "run") == 0) { + break; + } + } + + // If we reach here, add the argument to the hash. + hash_os << arg; + } + + // Adding these options to the hash should not be needed after adding all + // cmdline args. We keep this code here however, in case we find a different + // solution for dealing with LLVM commandline flags. See GH #1773. + // Also, having these options explicitly added to the hash protects against + // the possibility of different default settings on different platforms (while + // sharing the cache). + outputOptimizationSettings(hash_os); + hash_os << opts::mCPU; + for (auto &attr : opts::mAttrs) { + hash_os << attr; + } + hash_os << opts::mFloatABI; + hash_os << opts::mRelocModel; + hash_os << opts::mCodeModel; + hash_os << opts::disableFpElim; +} + +// Output to `hash_os` all environment flags that influence object code output +// in ways that are not observable in the pre-LLVM passes IR used for hashing. +void outputIR2ObjRelevantEnvironmentOpts(llvm::raw_ostream &hash_os) +{ + // There are no relevant environment options at the moment. +} + } namespace ir2obj { @@ -138,17 +236,11 @@ void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) { hash_os << global.ldc_version << global.version << global.llvm_version << ldc::built_with_Dcompiler_version; - // Let hash depend on a few compile flags that change the outputted obj file, - // but whose changes are not always observable in the IR: - hash_os << codeGenOptLevel(); - hash_os << opts::mCPU; - for (auto &attr : opts::mAttrs) { - hash_os << attr; - } - hash_os << opts::mFloatABI; - hash_os << opts::mRelocModel; - hash_os << opts::mCodeModel; - hash_os << opts::disableFpElim; + // Let hash depend on compile flags that change the outputted obj file, + // but whose changes are not always observable in the pre-optimized IR used + // for hashing: + outputIR2ObjRelevantCmdlineArgs(hash_os); + outputIR2ObjRelevantEnvironmentOpts(hash_os); llvm::WriteBitcodeToFile(m, hash_os); hash_os.resultAsString(str); diff --git a/driver/main.cpp b/driver/main.cpp index ca945e5931..c1cdf5b80a 100644 --- a/driver/main.cpp +++ b/driver/main.cpp @@ -401,18 +401,17 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles, global.params.moduleDepsFile = nullptr; // Build combined list of command line arguments. - llvm::SmallVector final_args; - final_args.push_back(argv[0]); + opts::allArguments.push_back(argv[0]); ConfigFile cfg_file; const char *explicitConfFile = tryGetExplicitConfFile(argc, argv); std::string cfg_triple = tryGetExplicitTriple(argc, argv).getTriple(); // just ignore errors for now, they are still printed cfg_file.read(explicitConfFile, cfg_triple.c_str()); - final_args.insert(final_args.end(), cfg_file.switches_begin(), - cfg_file.switches_end()); + opts::allArguments.insert(opts::allArguments.end(), cfg_file.switches_begin(), + cfg_file.switches_end()); - final_args.insert(final_args.end(), &argv[1], &argv[argc]); + opts::allArguments.insert(opts::allArguments.end(), &argv[1], &argv[argc]); cl::SetVersionPrinter(&printVersion); hideLLVMOptions(); @@ -429,11 +428,11 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles, cl::TokenizeGNUCommandLine #endif , - final_args); + opts::allArguments); #endif - cl::ParseCommandLineOptions(final_args.size(), - const_cast(final_args.data()), + cl::ParseCommandLineOptions(opts::allArguments.size(), + const_cast(opts::allArguments.data()), "LDC - the LLVM D compiler\n"); helpOnly = mCPU == "help" || diff --git a/gen/optimizer.cpp b/gen/optimizer.cpp index cb1b791a08..7ec1ba38e4 100644 --- a/gen/optimizer.cpp +++ b/gen/optimizer.cpp @@ -455,3 +455,22 @@ void verifyModule(llvm::Module *m) { } Logger::println("Verification passed!"); } + +// Output to `hash_os` all optimization settings that influence object code output +// and that are not observable in the IR. +// This is used to calculate the hash use for caching that uniquely identifies +// the object file output. +void outputOptimizationSettings(llvm::raw_ostream &hash_os) { + hash_os << optimizeLevel; + hash_os << willInline(); + hash_os << disableLangSpecificPasses; + hash_os << disableSimplifyDruntimeCalls; + hash_os << disableSimplifyLibCalls; + hash_os << disableGCToStack; + hash_os << unitAtATime; + hash_os << stripDebug; + hash_os << opts::sanitize; + hash_os << disableLoopUnrolling; + hash_os << disableLoopVectorization; + hash_os << disableSLPVectorization; +} diff --git a/gen/optimizer.h b/gen/optimizer.h index 84414e668d..8a2b6c037d 100644 --- a/gen/optimizer.h +++ b/gen/optimizer.h @@ -20,6 +20,10 @@ #include "llvm/Support/CommandLine.h" +namespace llvm { +class raw_ostream; +} + namespace opts { enum SanitizerCheck { @@ -49,4 +53,6 @@ llvm::CodeGenOpt::Level codeGenOptLevel(); void verifyModule(llvm::Module *m); +void outputOptimizationSettings(llvm::raw_ostream &hash_os); + #endif diff --git a/tests/linking/ir2obj_caching_flags1.d b/tests/linking/ir2obj_caching_flags1.d new file mode 100644 index 0000000000..1baceb03b1 --- /dev/null +++ b/tests/linking/ir2obj_caching_flags1.d @@ -0,0 +1,37 @@ +// Test that certain cmdline flags result in different ir2obj cache objects, even though the LLVM IR may be the same. + +// Note that the NO_HIT tests should change the default setting of the tested flag. + +// Create and then empty the cache for correct testing when running the test multiple times. +// RUN: %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache \ +// RUN: && %prunecache -f %T/flag1cache --max-bytes=1 \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O3 -vv | FileCheck --check-prefix=MUST_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O2 -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O4 -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O5 -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Os -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Oz -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-d-passes -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-drtcalls -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-libcalls -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-gc2stack -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -enable-inlining -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unit-at-a-time=false -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -strip-debug -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-unrolling -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-slp-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vectorize-loops -vv | FileCheck --check-prefix=NO_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -v -wi -d -vv | FileCheck --check-prefix=MUST_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -D -H -I. -J. -vv | FileCheck --check-prefix=MUST_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -d-version=Irrelevant -vv | FileCheck --check-prefix=MUST_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unittest -vv | FileCheck --check-prefix=MUST_HIT %s \ +// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=MUST_HIT %s +// The last test is a MUST_HIT test, to make sure that the cache wasn't pruned somehow, which could effectively disable some NO_HIT tests. + +// MUST_HIT: Cache object found! +// NO_HIT-NOT: Cache object found! + +void foo() {}