mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-04-29 14:40:40 +03:00

Add the commandline options -fprofile-instr-generate[=filename] and -profile-instr-use=filename -fprofile-instr-generate -- Add instrumentation on branches, switches, and function entry; uses LLVM's InstrProf pass. -- Link to profile runtime that writes instrumentation counters to a file. -fprofile-instr-use -- Read profile data from a file and apply branch weights to branches and switches, and annotate functions with entrycount in LLVM IR. -- Functions with low or high entrycount are marked with 'cold' or 'inlinehint'. The only statement type without PGO yet is "try-finally". A new pragma, `pragma(LDC_profile_instr, [ true | false ])`, is added to selectively disable/enable instrumentation of functions (granularity = whole functions). The runtime library ldc-profile-rt is a copy of LLVM compiler-rt lib/profile. It has to be exactly in-sync with the LLVM version, and thus we need a copy for each PGO-supported LLVM (>=3.7). import ldc.profile for a D interface to ldc-profile-rt (for example to reset execution counts after a program startup phase). The instrumentation data is mainly passed on to LLVM: function-entry counts and branch counts/probabilities. LDC marks functions as hot when "execution count is 30% of the maximum function execution count", and marks functions as cold if their count is 1% of maximum function execution count. The source of LLVM's llvm-profdata tool is hereby included in LDCs repository (different source for each LLVM version), and the binary is included in the install bin folder. The executable is named "ldc-profdata" to avoid clashing with llvm-profdata on the same machine. This is needed because profdata executable has to be in-sync with the LLVM version used to build LDC. Maintenance burden: for trunk LLVM, we have to keep ldc-profile-rt and llvm-profdata in sync. There is no diff with upstream; but because of active development there are the occasional API changes.
427 lines
14 KiB
C++
427 lines
14 KiB
C++
//===-- optimizer.cpp -----------------------------------------------------===//
|
||
//
|
||
// LDC – the LLVM D compiler
|
||
//
|
||
// This file is distributed under the BSD-style LDC license. See the LICENSE
|
||
// file for details.
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
|
||
#include "gen/optimizer.h"
|
||
#include "mars.h" // error()
|
||
#include "gen/cl_helpers.h"
|
||
#include "gen/logger.h"
|
||
#include "gen/passes/Passes.h"
|
||
#include "llvm/LinkAllPasses.h"
|
||
#if LDC_LLVM_VER >= 307
|
||
#include "llvm/IR/LegacyPassManager.h"
|
||
#else
|
||
#include "llvm/PassManager.h"
|
||
#endif
|
||
#include "llvm/IR/Module.h"
|
||
#include "llvm/IR/DataLayout.h"
|
||
#include "llvm/ADT/Triple.h"
|
||
#if LDC_LLVM_VER >= 307
|
||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||
#endif
|
||
#include "llvm/IR/Verifier.h"
|
||
#if LDC_LLVM_VER >= 307
|
||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||
#else
|
||
#include "llvm/Target/TargetLibraryInfo.h"
|
||
#endif
|
||
#include "llvm/Target/TargetMachine.h"
|
||
#include "llvm/Support/CommandLine.h"
|
||
#include "llvm/IR/LegacyPassNameParser.h"
|
||
#include "llvm/Transforms/Instrumentation.h"
|
||
#include "llvm/Transforms/IPO.h"
|
||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||
|
||
extern llvm::TargetMachine *gTargetMachine;
|
||
using namespace llvm;
|
||
|
||
static cl::opt<signed char> optimizeLevel(
|
||
cl::desc("Setting the optimization level:"), cl::ZeroOrMore,
|
||
cl::values(
|
||
clEnumValN(3, "O", "Equivalent to -O3"),
|
||
clEnumValN(0, "O0", "No optimizations (default)"),
|
||
clEnumValN(1, "O1", "Simple optimizations"),
|
||
clEnumValN(2, "O2", "Good optimizations"),
|
||
clEnumValN(3, "O3", "Aggressive optimizations"),
|
||
clEnumValN(4, "O4", "Equivalent to -O3"), // Not implemented yet.
|
||
clEnumValN(5, "O5", "Equivalent to -O3"), // Not implemented yet.
|
||
clEnumValN(-1, "Os", "Like -O2 with extra optimizations for size"),
|
||
clEnumValN(-2, "Oz", "Like -Os but reduces code size further"),
|
||
clEnumValEnd),
|
||
cl::init(0));
|
||
|
||
static cl::opt<bool> noVerify("disable-verify",
|
||
cl::desc("Do not verify result module"),
|
||
cl::Hidden);
|
||
|
||
static cl::opt<bool>
|
||
verifyEach("verify-each",
|
||
cl::desc("Run verifier after D-specific and explicitly "
|
||
"specified optimization passes"),
|
||
cl::Hidden, cl::ZeroOrMore);
|
||
|
||
static cl::opt<bool>
|
||
disableLangSpecificPasses("disable-d-passes",
|
||
cl::desc("Disable all D-specific passes"),
|
||
cl::ZeroOrMore);
|
||
|
||
static cl::opt<bool> disableSimplifyDruntimeCalls(
|
||
"disable-simplify-drtcalls",
|
||
cl::desc("Disable simplification of druntime calls"), cl::ZeroOrMore);
|
||
|
||
static cl::opt<bool> disableSimplifyLibCalls(
|
||
"disable-simplify-libcalls",
|
||
cl::desc("Disable simplification of well-known C runtime calls"),
|
||
cl::ZeroOrMore);
|
||
|
||
static cl::opt<bool> disableGCToStack(
|
||
"disable-gc2stack",
|
||
cl::desc("Disable promotion of GC allocations to stack memory"),
|
||
cl::ZeroOrMore);
|
||
|
||
static cl::opt<cl::boolOrDefault, false, opts::FlagParser<cl::boolOrDefault>>
|
||
enableInlining(
|
||
"inlining",
|
||
cl::desc("Enable function inlining (default in -O2 and higher)"),
|
||
cl::ZeroOrMore);
|
||
|
||
static cl::opt<bool> unitAtATime("unit-at-a-time", cl::desc("Enable basic IPO"),
|
||
cl::init(true));
|
||
|
||
static cl::opt<bool> stripDebug(
|
||
"strip-debug",
|
||
cl::desc("Strip symbolic debug information before optimization"));
|
||
|
||
cl::opt<opts::SanitizerCheck> opts::sanitize(
|
||
"sanitize", cl::desc("Enable runtime instrumentation for bug detection"),
|
||
cl::init(opts::None),
|
||
cl::values(clEnumValN(opts::AddressSanitizer, "address", "memory errors"),
|
||
clEnumValN(opts::MemorySanitizer, "memory", "memory errors"),
|
||
clEnumValN(opts::ThreadSanitizer, "thread", "race detection"),
|
||
clEnumValEnd));
|
||
|
||
static cl::opt<bool> disableLoopUnrolling(
|
||
"disable-loop-unrolling",
|
||
cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false));
|
||
static cl::opt<bool>
|
||
disableLoopVectorization("disable-loop-vectorization",
|
||
cl::desc("Disable the loop vectorization pass"),
|
||
cl::init(false));
|
||
|
||
static cl::opt<bool>
|
||
disableSLPVectorization("disable-slp-vectorization",
|
||
cl::desc("Disable the slp vectorization pass"),
|
||
cl::init(false));
|
||
|
||
static unsigned optLevel() {
|
||
// Use -O2 as a base for the size-optimization levels.
|
||
return optimizeLevel >= 0 ? optimizeLevel : 2;
|
||
}
|
||
|
||
static unsigned sizeLevel() { return optimizeLevel < 0 ? -optimizeLevel : 0; }
|
||
|
||
// Determines whether or not to run the normal, full inlining pass.
|
||
bool willInline() {
|
||
return enableInlining == cl::BOU_TRUE ||
|
||
(enableInlining == cl::BOU_UNSET && optLevel() > 1);
|
||
}
|
||
|
||
bool isOptimizationEnabled() { return optimizeLevel != 0; }
|
||
|
||
llvm::CodeGenOpt::Level codeGenOptLevel() {
|
||
// Use same appoach as clang (see lib/CodeGen/BackendUtil.cpp)
|
||
if (optLevel() == 0) {
|
||
return llvm::CodeGenOpt::None;
|
||
}
|
||
if (optLevel() >= 3) {
|
||
return llvm::CodeGenOpt::Aggressive;
|
||
}
|
||
return llvm::CodeGenOpt::Default;
|
||
}
|
||
|
||
static inline void addPass(PassManagerBase &pm, Pass *pass) {
|
||
pm.add(pass);
|
||
|
||
if (verifyEach) {
|
||
pm.add(createVerifierPass());
|
||
}
|
||
}
|
||
|
||
static void addStripExternalsPass(const PassManagerBuilder &builder,
|
||
PassManagerBase &pm) {
|
||
if (builder.OptLevel >= 1) {
|
||
addPass(pm, createStripExternalsPass());
|
||
addPass(pm, createGlobalDCEPass());
|
||
}
|
||
}
|
||
|
||
static void addSimplifyDRuntimeCallsPass(const PassManagerBuilder &builder,
|
||
PassManagerBase &pm) {
|
||
if (builder.OptLevel >= 2 && builder.SizeLevel == 0) {
|
||
addPass(pm, createSimplifyDRuntimeCalls());
|
||
}
|
||
}
|
||
|
||
static void addGarbageCollect2StackPass(const PassManagerBuilder &builder,
|
||
PassManagerBase &pm) {
|
||
if (builder.OptLevel >= 2 && builder.SizeLevel == 0) {
|
||
addPass(pm, createGarbageCollect2Stack());
|
||
}
|
||
}
|
||
|
||
static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
|
||
PassManagerBase &PM) {
|
||
PM.add(createAddressSanitizerFunctionPass());
|
||
PM.add(createAddressSanitizerModulePass());
|
||
}
|
||
|
||
static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
|
||
PassManagerBase &PM) {
|
||
PM.add(createMemorySanitizerPass());
|
||
|
||
// MemorySanitizer inserts complex instrumentation that mostly follows
|
||
// the logic of the original code, but operates on "shadow" values.
|
||
// It can benefit from re-running some general purpose optimization passes.
|
||
if (Builder.OptLevel > 0) {
|
||
PM.add(createEarlyCSEPass());
|
||
PM.add(createReassociatePass());
|
||
PM.add(createLICMPass());
|
||
PM.add(createGVNPass());
|
||
PM.add(createInstructionCombiningPass());
|
||
PM.add(createDeadStoreEliminationPass());
|
||
}
|
||
}
|
||
|
||
static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
|
||
PassManagerBase &PM) {
|
||
PM.add(createThreadSanitizerPass());
|
||
}
|
||
|
||
static void addInstrProfilingPass(legacy::PassManagerBase &mpm) {
|
||
#if LDC_WITH_PGO
|
||
if (global.params.genInstrProf) {
|
||
InstrProfOptions options;
|
||
options.NoRedZone = global.params.disableRedZone;
|
||
options.InstrProfileOutput = global.params.datafileInstrProf;
|
||
mpm.add(createInstrProfilingPass(options));
|
||
}
|
||
#endif
|
||
}
|
||
|
||
/**
|
||
* Adds a set of optimization passes to the given module/function pass
|
||
* managers based on the given optimization and size reduction levels.
|
||
*
|
||
* The selection mirrors Clang behavior and is based on LLVM's
|
||
* PassManagerBuilder.
|
||
*/
|
||
#if LDC_LLVM_VER >= 307
|
||
static void addOptimizationPasses(legacy::PassManagerBase &mpm,
|
||
legacy::FunctionPassManager &fpm,
|
||
#else
|
||
static void addOptimizationPasses(PassManagerBase &mpm,
|
||
FunctionPassManager &fpm,
|
||
#endif
|
||
unsigned optLevel, unsigned sizeLevel) {
|
||
fpm.add(createVerifierPass()); // Verify that input is correct
|
||
|
||
PassManagerBuilder builder;
|
||
builder.OptLevel = optLevel;
|
||
builder.SizeLevel = sizeLevel;
|
||
|
||
if (willInline()) {
|
||
unsigned threshold = 225;
|
||
if (sizeLevel == 1) { // -Os
|
||
threshold = 75;
|
||
} else if (sizeLevel == 2) { // -Oz
|
||
threshold = 25;
|
||
}
|
||
if (optLevel > 2) {
|
||
threshold = 275;
|
||
}
|
||
builder.Inliner = createFunctionInliningPass(threshold);
|
||
} else {
|
||
builder.Inliner = createAlwaysInlinerPass();
|
||
}
|
||
builder.DisableUnitAtATime = !unitAtATime;
|
||
builder.DisableUnrollLoops = optLevel == 0;
|
||
|
||
builder.DisableUnrollLoops = (disableLoopUnrolling.getNumOccurrences() > 0)
|
||
? disableLoopUnrolling
|
||
: optLevel == 0;
|
||
|
||
// This is final, unless there is a #pragma vectorize enable
|
||
if (disableLoopVectorization) {
|
||
builder.LoopVectorize = false;
|
||
// If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
|
||
} else if (!builder.LoopVectorize) {
|
||
builder.LoopVectorize = optLevel > 1 && sizeLevel < 2;
|
||
}
|
||
|
||
// When #pragma vectorize is on for SLP, do the same as above
|
||
builder.SLPVectorize =
|
||
disableSLPVectorization ? false : optLevel > 1 && sizeLevel < 2;
|
||
|
||
if (opts::sanitize == opts::AddressSanitizer) {
|
||
builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
|
||
addAddressSanitizerPasses);
|
||
builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
|
||
addAddressSanitizerPasses);
|
||
}
|
||
|
||
if (opts::sanitize == opts::MemorySanitizer) {
|
||
builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
|
||
addMemorySanitizerPass);
|
||
builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
|
||
addMemorySanitizerPass);
|
||
}
|
||
|
||
if (opts::sanitize == opts::ThreadSanitizer) {
|
||
builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
|
||
addThreadSanitizerPass);
|
||
builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
|
||
addThreadSanitizerPass);
|
||
}
|
||
|
||
if (!disableLangSpecificPasses) {
|
||
if (!disableSimplifyDruntimeCalls) {
|
||
builder.addExtension(PassManagerBuilder::EP_LoopOptimizerEnd,
|
||
addSimplifyDRuntimeCallsPass);
|
||
}
|
||
|
||
if (!disableGCToStack) {
|
||
builder.addExtension(PassManagerBuilder::EP_LoopOptimizerEnd,
|
||
addGarbageCollect2StackPass);
|
||
}
|
||
}
|
||
|
||
// EP_OptimizerLast does not exist in LLVM 3.0, add it manually below.
|
||
builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
|
||
addStripExternalsPass);
|
||
|
||
addInstrProfilingPass(mpm);
|
||
|
||
builder.populateFunctionPassManager(fpm);
|
||
builder.populateModulePassManager(mpm);
|
||
}
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
// This function runs optimization passes based on command line arguments.
|
||
// Returns true if any optimization passes were invoked.
|
||
bool ldc_optimize_module(llvm::Module *M) {
|
||
// Create a PassManager to hold and optimize the collection of
|
||
// per-module passes we are about to build.
|
||
#if LDC_LLVM_VER >= 307
|
||
legacy::
|
||
#endif
|
||
PassManager mpm;
|
||
|
||
#if LDC_LLVM_VER >= 307
|
||
// Add an appropriate TargetLibraryInfo pass for the module's triple.
|
||
TargetLibraryInfoImpl *tlii =
|
||
new TargetLibraryInfoImpl(Triple(M->getTargetTriple()));
|
||
|
||
// The -disable-simplify-libcalls flag actually disables all builtin optzns.
|
||
if (disableSimplifyLibCalls)
|
||
tlii->disableAllFunctions();
|
||
|
||
mpm.add(new TargetLibraryInfoWrapperPass(*tlii));
|
||
#else
|
||
// Add an appropriate TargetLibraryInfo pass for the module's triple.
|
||
TargetLibraryInfo *tli = new TargetLibraryInfo(Triple(M->getTargetTriple()));
|
||
|
||
// The -disable-simplify-libcalls flag actually disables all builtin optzns.
|
||
if (disableSimplifyLibCalls) {
|
||
tli->disableAllFunctions();
|
||
}
|
||
|
||
mpm.add(tli);
|
||
#endif
|
||
|
||
// Add an appropriate DataLayout instance for this module.
|
||
#if LDC_LLVM_VER >= 307
|
||
// The DataLayout is already set at the module (in module.cpp,
|
||
// method Module::genLLVMModule())
|
||
// FIXME: Introduce new command line switch default-data-layout to
|
||
// override the module data layout
|
||
#elif LDC_LLVM_VER == 306
|
||
mpm.add(new DataLayoutPass());
|
||
#else
|
||
const DataLayout *DL = M->getDataLayout();
|
||
assert(DL &&
|
||
"DataLayout not set at module");
|
||
mpm.add(new DataLayoutPass(*DL));
|
||
#endif
|
||
|
||
#if LDC_LLVM_VER >= 307
|
||
// Add internal analysis passes from the target machine.
|
||
mpm.add(createTargetTransformInfoWrapperPass(
|
||
gTargetMachine->getTargetIRAnalysis()));
|
||
#else
|
||
// Add internal analysis passes from the target machine.
|
||
gTargetMachine->addAnalysisPasses(mpm);
|
||
#endif
|
||
|
||
// Also set up a manager for the per-function passes.
|
||
#if LDC_LLVM_VER >= 307
|
||
legacy::
|
||
#endif
|
||
FunctionPassManager fpm(M);
|
||
|
||
#if LDC_LLVM_VER >= 307
|
||
// Add internal analysis passes from the target machine.
|
||
fpm.add(createTargetTransformInfoWrapperPass(
|
||
gTargetMachine->getTargetIRAnalysis()));
|
||
#elif LDC_LLVM_VER >= 306
|
||
fpm.add(new DataLayoutPass());
|
||
gTargetMachine->addAnalysisPasses(fpm);
|
||
#else
|
||
fpm.add(new DataLayoutPass(M));
|
||
gTargetMachine->addAnalysisPasses(fpm);
|
||
#endif
|
||
|
||
// If the -strip-debug command line option was specified, add it before
|
||
// anything else.
|
||
if (stripDebug) {
|
||
mpm.add(createStripSymbolsPass(true));
|
||
}
|
||
|
||
addOptimizationPasses(mpm, fpm, optLevel(), sizeLevel());
|
||
|
||
// Run per-function passes.
|
||
fpm.doInitialization();
|
||
for (auto &F : *M) {
|
||
fpm.run(F);
|
||
}
|
||
fpm.doFinalization();
|
||
|
||
// Run per-module passes.
|
||
mpm.run(*M);
|
||
|
||
// Verify the resulting module.
|
||
verifyModule(M);
|
||
|
||
// Report that we run some passes.
|
||
return true;
|
||
}
|
||
|
||
// Verifies the module.
|
||
void verifyModule(llvm::Module *m) {
|
||
if (!noVerify) {
|
||
Logger::println("Verifying module...");
|
||
LOG_SCOPE;
|
||
std::string ErrorStr;
|
||
raw_string_ostream OS(ErrorStr);
|
||
if (llvm::verifyModule(*m, &OS)) {
|
||
error(Loc(), "%s", ErrorStr.c_str());
|
||
fatal();
|
||
} else {
|
||
Logger::println("Verification passed!");
|
||
}
|
||
}
|
||
}
|