//===-- driver/cache.cpp --------------------------------------------------===// // // LDC – the LLVM D compiler // // This file is distributed under the BSD-style LDC license. See the LICENSE // file for details. // //===----------------------------------------------------------------------===// // // Contains LLVM IR to object code cache functionality. // // After LLVM IR codegen, the LLVM IR module is hashed for lookup in the cache // directory. If the cache directory contains the object file .o, // that file is used and machine code gen is skipped entirely. If the cache // doesn't contain that file, machine codegen happens as normal and the object // code is added to the cache. // The goal is to speed up successive builds of large codebases after minor // changes that trigger recompilation of many files but with little effective // changes (in the extreme case, adding a comment in a "globals.d"). // // The current limitation is that hashing and cache look-up are done with // whole-module granularity. Future work could attempt to do this for module // "fragments". // // The hash depends on the IR code (obviously), but also on the compiler+LLVM // versions and several compile flags (e.g. -O*, -mcpu, and -mattr). // //===----------------------------------------------------------------------===// #include "driver/cache.h" #include "dmd/errors.h" #include "dmd/target.h" #include "driver/cache_pruning.h" #include "driver/cl_options.h" #include "driver/cl_options_sanitizers.h" #include "driver/ldc-version.h" #include "gen/logger.h" #include "gen/optimizer.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" // Include close() declaration. #if !defined(_MSC_VER) && !defined(__MINGW32__) #include #else #include #endif #if LDC_POSIX #include #include static std::error_code createHardLink(const char *to, const char *from) { if (link(to, from) == 0) return std::error_code(0, std::system_category()); else return std::error_code(errno, std::system_category()); } static std::error_code createSymLink(const char *to, const char *from) { if (symlink(to, from) == 0) return std::error_code(0, std::system_category()); else return std::error_code(errno, std::system_category()); } #elif _WIN32 #include namespace llvm { namespace sys { namespace windows { // Fwd declaration to an internal LLVM function. std::error_code widenPath(const llvm::Twine &Path8, llvm::SmallVectorImpl &Path16, size_t MaxPathLen = MAX_PATH); } } // namespace sys } // namespace llvm namespace { template std::error_code createLink(FType f, const char *to, const char *from) { //===----------------------------------------------------------------------===// // // Code copied from LLVM 3.9 llvm/Support/Windows/Path.inc, distributed under // the University of Illinois Open Source License. See LICENSE for details. // //===----------------------------------------------------------------------===// using llvm::sys::windows::widenPath; llvm::SmallVector wide_from; llvm::SmallVector wide_to; if (auto errorcode = widenPath(from, wide_from)) return errorcode; if (auto errorcode = widenPath(to, wide_to)) return errorcode; if (!(*f)(wide_from.begin(), wide_to.begin(), NULL)) return std::error_code(GetLastError(), std::system_category());; return std::error_code(0, std::system_category()); } } static std::error_code createHardLink(const char *to, const char *from) { return createLink(&CreateHardLinkW, to, from); } static std::error_code createSymLink(const char *to, const char *from) { return createLink(&CreateSymbolicLinkW, to, from); } #endif namespace { // Options for the cache pruning algorithm llvm::cl::opt pruneEnabled("cache-prune", llvm::cl::desc("Enable cache pruning.")); llvm::cl::opt pruneSizeLimitInBytes( "cache-prune-maxbytes", llvm::cl::desc("Sets the maximum cache size to bytes. Implies " "-cache-prune."), llvm::cl::value_desc("size"), llvm::cl::init(0)); llvm::cl::opt pruneInterval( "cache-prune-interval", llvm::cl::desc("Sets the cache pruning interval to seconds " "(default: 20 min). Set to 0 to force pruning. Implies " "-cache-prune."), llvm::cl::value_desc("dur"), llvm::cl::init(20 * 60)); llvm::cl::opt pruneExpiration( "cache-prune-expiration", llvm::cl::desc("Sets the pruning expiration time of cache files to " " seconds (default: 1 week). Implies -cache-prune."), llvm::cl::value_desc("dur"), llvm::cl::init(7 * 24 * 3600)); llvm::cl::opt pruneSizeLimitPercentage( "cache-prune-maxpercentage", llvm::cl::desc( "Sets the cache size limit to percent of the available " "space (default: 75%). Implies -cache-prune."), llvm::cl::value_desc("perc"), llvm::cl::init(75)); enum class RetrievalMode { Copy, HardLink, AnyLink, SymLink }; llvm::cl::opt cacheRecoveryMode( "cache-retrieval", llvm::cl::ZeroOrMore, llvm::cl::desc("Set the cache retrieval mechanism (default: copy)."), llvm::cl::init(RetrievalMode::Copy), llvm::cl::values( clEnumValN(RetrievalMode::Copy, "copy", "Make a copy of the cache file"), clEnumValN(RetrievalMode::HardLink, "hardlink", "Create a hard link to the cache file (recommended)"), clEnumValN( RetrievalMode::AnyLink, "link", "Equal to 'hardlink' on Windows, but 'symlink' on Unix and OS X"), clEnumValN(RetrievalMode::SymLink, "symlink", "Create a symbolic link to the cache file"))); bool isPruningEnabled() { if (pruneEnabled) return true; // Specifying cache pruning parameters implies enabling pruning. if ((pruneSizeLimitInBytes.getNumOccurrences() > 0) || (pruneInterval.getNumOccurrences() > 0) || (pruneExpiration.getNumOccurrences() > 0) || (pruneSizeLimitPercentage.getNumOccurrences() > 0)) return true; return false; } llvm::sys::TimePoint getTimeNow() { using namespace std::chrono; return time_point_cast(system_clock::now()); } /// A raw_ostream that creates a hash of what is written to it. /// This class does not encounter output errors. /// There is no buffering and the hasher can be used at any time. class raw_hash_ostream : public llvm::raw_ostream { llvm::MD5 hasher; /// See raw_ostream::write_impl. void write_impl(const char *ptr, size_t size) override { hasher.update( llvm::ArrayRef(reinterpret_cast(ptr), size)); } uint64_t current_pos() const override { return 0; } public: raw_hash_ostream() { SetUnbuffered(); } ~raw_hash_ostream() override {} void flush() = delete; void finalResult(llvm::MD5::MD5Result &result) { hasher.final(result); } void resultAsString(llvm::SmallString<32> &str) { llvm::MD5::MD5Result result; hasher.final(result); llvm::MD5::stringifyResult(result, str); } }; void storeCacheFileName(llvm::StringRef cacheObjectHash, llvm::SmallString<128> &filePath) { filePath = opts::cacheDir; llvm::sys::path::append( filePath, llvm::Twine("ircache_") + cacheObjectHash + "." + llvm::StringRef(target.obj_ext.ptr, target.obj_ext.length)); } // Output to `hash_os` all commandline flags, and try to skip the ones that have // no influence on the object code output. The cmdline flags need to be added // to the ir2obj cache hash to uniquely identify the object file output. // Because the compiler version is part of the hash, differences in the // default settings between compiler versions are already taken care of. // (Note: config and response files may also add compiler flags.) void outputIR2ObjRelevantCmdlineArgs(llvm::raw_ostream &hash_os) { // Use a "whitelist" of cmdline args that do not need to be added to the hash, // and add all others. There is no harm (other than missed cache // opportunities) in adding commandline arguments that also change the hashed // IR, which simplifies the code here. // The code does not deal well with options specified without equals sign, and // will add those to the hash, resulting in missed cache opportunities. auto it = opts::allArguments.begin(); auto end_it = opts::allArguments.end(); // The first argument is the compiler executable filename: we can skip it. ++it; for (; it != end_it; ++it) { const char *arg = *it; if (!arg || !arg[0]) continue; // Out of pre-caution, all arguments that are not prefixed with '-' are // added to the hash. Such an argument could be a source file "foo.d", but // also a value for the previous argument when the equals sign is omitted, // for example: "-code-model default" becomes "-code-model" "default". // It results in missed cache opportunities. :( if (arg[0] == '-') { if (arg[1] == 'O') { // We deal with -O later ("-O" and "-O3" should hash equally, "" and // "-O0" too) continue; } if (arg[1] == 'c' && !arg[2]) continue; // All options starting with these characters can be ignored (LLVM does // not have options starting with capitals) if (arg[1] == 'D' || arg[1] == 'H' || arg[1] == 'I' || arg[1] == 'J' || arg[1] == 'L' || arg[1] == 'X') continue; if (arg[1] == 'd' || arg[1] == 'v' || arg[1] == 'w') { // LLVM options are long, so short options starting with 'v' or 'w' can // be ignored. unsigned len = 2; for (; len < 11; ++len) if (!arg[len]) break; if (len < 11) continue; } // "-of..." can be ignored if (arg[1] == 'o' && arg[2] == 'f') continue; // "-od..." can be ignored if (arg[1] == 'o' && arg[2] == 'd') continue; // All "-cache..." options can be ignored if (strncmp(arg + 1, "cache", 5) == 0) continue; // Ignore "-lib" if (arg[1] == 'l' && arg[2] == 'i' && arg[3] == 'b' && !arg[4]) continue; // All effects of -d-version... are already included in the IR hash. if (strncmp(arg + 1, "d-version", 9) == 0) continue; // All effects of -unittest are already included in the IR hash. if (strcmp(arg + 1, "unittest") == 0) { continue; } // All arguments following -run can safely be ignored if (strcmp(arg + 1, "run") == 0) { break; } } // If we reach here, add the argument to the hash. hash_os << arg; } // Adding these options to the hash should not be needed after adding all // cmdline args. We keep this code here however, in case we find a different // solution for dealing with LLVM commandline flags. See GH #1773. // Also, having these options explicitly added to the hash protects against // the possibility of different default settings on different platforms (while // sharing the cache). outputOptimizationSettings(hash_os); opts::outputSanitizerSettings(hash_os); hash_os << opts::getCPUStr(); hash_os << opts::getFeaturesStr(); hash_os << opts::floatABI; const auto relocModel = opts::getRelocModel(); #if LDC_LLVM_VER >= 1600 if (relocModel.has_value()) hash_os << relocModel.value(); #else if (relocModel.hasValue()) hash_os << relocModel.getValue(); #endif const auto codeModel = opts::getCodeModel(); #if LDC_LLVM_VER >= 1600 if (codeModel.has_value()) hash_os << codeModel.value(); #else if (codeModel.hasValue()) hash_os << codeModel.getValue(); #endif const auto framePointerUsage = opts::framePointerUsage(); #if LDC_LLVM_VER >= 1600 if (framePointerUsage.has_value()) hash_os << static_cast(framePointerUsage.value()); #else if (framePointerUsage.hasValue()) hash_os << static_cast(framePointerUsage.getValue()); #endif } // Output to `hash_os` all environment flags that influence object code output // in ways that are not observable in the pre-LLVM passes IR used for hashing. void outputIR2ObjRelevantEnvironmentOpts(llvm::raw_ostream &hash_os) { // There are no relevant environment options at the moment. } } // anonymous namespace namespace cache { void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) { raw_hash_ostream hash_os; // Let hash depend on the compiler version: hash_os << ldc::ldc_version << ldc::dmd_version << ldc::llvm_version << ldc::built_with_Dcompiler_version; // Let hash depend on compile flags that change the outputted obj file, // but whose changes are not always observable in the pre-optimized IR used // for hashing: outputIR2ObjRelevantCmdlineArgs(hash_os); outputIR2ObjRelevantEnvironmentOpts(hash_os); llvm::WriteBitcodeToFile(*m, hash_os); hash_os.resultAsString(str); IF_LOG Logger::println("Module's LLVM bitcode hash is: %s", str.c_str()); } std::string cacheLookup(llvm::StringRef cacheObjectHash) { if (opts::cacheDir.empty()) return ""; if (!llvm::sys::fs::exists(opts::cacheDir)) { IF_LOG Logger::println("Cache directory does not exist, no object found."); return ""; } llvm::SmallString<128> filePath; storeCacheFileName(cacheObjectHash, filePath); if (llvm::sys::fs::exists(filePath.c_str())) { IF_LOG Logger::println("Cache object found! %s", filePath.c_str()); return filePath.str().str(); } IF_LOG Logger::println("Cache object not found."); return ""; } void cacheObjectFile(llvm::StringRef objectFile, llvm::StringRef cacheObjectHash) { if (opts::cacheDir.empty()) return; if (!llvm::sys::fs::exists(opts::cacheDir)) { if (auto errorcode = llvm::sys::fs::create_directories(opts::cacheDir)) { error(Loc(), "Unable to create cache directory: %s (errno %d: %s)", opts::cacheDir.c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } // To prevent bad cache files, add files to the cache atomically: first copy // to a temporary file and then rename that temp file to the cache entry // filename (rename is atomic). llvm::SmallString<128> cacheFile; storeCacheFileName(cacheObjectHash, cacheFile); llvm::SmallString<128> tempFile; if (auto errorcode = llvm::sys::fs::createUniqueFile( llvm::Twine(cacheFile) + ".tmp%%%%%%%", tempFile)) { error( Loc(), "Could not create name of temporary file in the cache (errno %d: %s)", errorcode.value(), errorcode.message().c_str()); fatal(); } IF_LOG Logger::println("Copy object file to temp file: %s to %s", objectFile.str().c_str(), tempFile.c_str()); if (auto errorcode = llvm::sys::fs::copy_file(objectFile, tempFile.c_str())) { error(Loc(), "Failed to copy object file to cache: %s to %s (errno %d: %s)", objectFile.str().c_str(), tempFile.c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } IF_LOG Logger::println("Rename temp file to cache file: %s to %s", tempFile.c_str(), cacheFile.c_str()); if (auto errorcode = llvm::sys::fs::rename(tempFile.c_str(), cacheFile.c_str())) { error(Loc(), "Failed to rename temp file to cache file: %s to %s (errno %d: %s)", tempFile.c_str(), cacheFile.c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } void recoverObjectFile(llvm::StringRef cacheObjectHash, llvm::StringRef objectFile) { llvm::SmallString<128> cacheFile; storeCacheFileName(cacheObjectHash, cacheFile); // Remove the potentially pre-existing output file. llvm::sys::fs::remove(objectFile); switch (cacheRecoveryMode) { case RetrievalMode::Copy: { IF_LOG Logger::println("Copy cached object file: %s -> %s", cacheFile.c_str(), objectFile.str().c_str()); if (auto errorcode = llvm::sys::fs::copy_file(cacheFile.c_str(), objectFile)) { error(Loc(), "Failed to copy the cached file: %s -> %s (errno %d: %s)", cacheFile.c_str(), objectFile.str().c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } break; case RetrievalMode::HardLink: { IF_LOG Logger::println("HardLink output to cached object file: %s -> %s", objectFile.str().c_str(), cacheFile.c_str()); if (auto errorcode = createHardLink(cacheFile.c_str(), objectFile.str().c_str())) { error(Loc(), "Failed to create a hard link to the cached file: %s -> %s (errno " "%d: %s)", cacheFile.c_str(), objectFile.str().c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } break; case RetrievalMode::AnyLink: { IF_LOG Logger::println("Link output to cached object file: %s -> %s", objectFile.str().c_str(), cacheFile.c_str()); if (auto errorcode = llvm::sys::fs::create_link(cacheFile.c_str(), objectFile)) { error( Loc(), "Failed to create a link to the cached file: %s -> %s (errno %d: %s)", cacheFile.c_str(), objectFile.str().c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } break; case RetrievalMode::SymLink: { IF_LOG Logger::println("SymLink output to cached object file: %s -> %s", objectFile.str().c_str(), cacheFile.c_str()); if (auto errorcode = createSymLink(cacheFile.c_str(), objectFile.str().c_str())) { error(Loc(), "Failed to create a symbolic link to the cached file: %s -> %s " "(errno %d: %s)", cacheFile.c_str(), objectFile.str().c_str(), errorcode.value(), errorcode.message().c_str()); fatal(); } } break; } // We reset the modification time to "now" such that the pruning algorithm // sees that the file should be kept over older files. // On some systems the last accessed time is not automatically updated so set // it explicitly here. Because the file will really only be accessed later // during linking, it's not perfect but it's the best we can do. { int FD; if (llvm::sys::fs::openFileForWrite(cacheFile.c_str(), FD, llvm::sys::fs::CD_OpenExisting, llvm::sys::fs::OF_Append)) { error(Loc(), "Failed to open the cached file for writing: %s", cacheFile.c_str()); fatal(); } if (llvm::sys::fs::setLastAccessAndModificationTime(FD, getTimeNow())) { error(Loc(), "Failed to set the cached file modification time: %s", cacheFile.c_str()); fatal(); } close(FD); } } void pruneCache() { if (!opts::cacheDir.empty() && isPruningEnabled()) { ::pruneCache(opts::cacheDir.data(), opts::cacheDir.size(), pruneInterval, pruneExpiration, pruneSizeLimitInBytes, pruneSizeLimitPercentage); } } } // namespace cache