Add LTO support (full and thin), with -flto=thin|full.

LTO needs linker support: I am only aware of support on OS X and Linux (through the LLVMgold plugin).

Resolves #693
This commit is contained in:
Johan Engelen 2016-10-16 20:48:31 +02:00
parent 54e45490cc
commit bb24fb2816
17 changed files with 361 additions and 14 deletions

View file

@ -718,6 +718,19 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
install(DIRECTORY bash_completion.d/ DESTINATION ${BASH_COMPLETION_COMPLETIONSDIR})
endif()
# Also install LLVM's LTO binary if available
if(APPLE)
set(LLVM_LTO_BINARY ${LLVM_LIBRARY_DIRS}/libLTO.dylib)
elseif(UNIX)
set(LLVM_LTO_BINARY ${LLVM_LIBRARY_DIRS}/LLVMgold.so)
endif()
if(EXISTS ${LLVM_LTO_BINARY})
message(STATUS "Also installing LTO binary: ${LLVM_LTO_BINARY}")
install(PROGRAMS ${LLVM_LTO_BINARY} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
configure_file(${LLVM_LTO_BINARY} ${PROJECT_BINARY_DIR}/lib COPYONLY)
endif()
#
# Packaging
#

View file

@ -460,6 +460,16 @@ cl::opt<bool>
cl::desc("Instrument function entry and exit with "
"GCC-compatible profiling calls"));
#if LDC_LLVM_VER >= 309
cl::opt<LTOKind> ltoMode(
"flto", cl::desc("Set LTO mode, requires linker support"),
cl::init(LTO_None),
clEnumValues(
clEnumValN(LTO_Full, "full", "Merges all input into a single module"),
clEnumValN(LTO_Thin, "thin",
"Parallel importing and codegen (faster than 'full')")));
#endif
static cl::extrahelp footer(
"\n"
"-d-debug can also be specified without options, in which case it enables "

View file

@ -96,5 +96,20 @@ extern std::vector<std::string> debugArgs;
#if LDC_LLVM_VER >= 307
void CreateColorOption();
#endif
#if LDC_LLVM_VER >= 309
// LTO options
enum LTOKind {
LTO_None,
LTO_Full,
LTO_Thin,
};
extern cl::opt<LTOKind> ltoMode;
inline bool isUsingLTO() { return ltoMode != LTO_None; }
inline bool isUsingThinLTO() { return ltoMode == LTO_Thin; }
#else
inline bool isUsingLTO() { return false; }
inline bool isUsingThinLTO() { return false; }
#endif
}
#endif

View file

@ -46,8 +46,20 @@ string exe_path::getBaseDir() {
return path::parent_path(binDir);
}
string exe_path::getLibDir() {
llvm::SmallString<128> r(getBaseDir());
path::append(r, "lib");
return r.str();
}
string exe_path::prependBinDir(const char *suffix) {
llvm::SmallString<128> r(getBinDir());
path::append(r, suffix);
return r.str();
}
string exe_path::prependLibDir(const char *suffix) {
llvm::SmallString<128> r(getLibDir());
path::append(r, suffix);
return r.str();
}

View file

@ -24,7 +24,9 @@ void initialize(const char *arg0);
const std::string &getExePath(); // <baseDir>/bin/ldc2
std::string getBinDir(); // <baseDir>/bin
std::string getBaseDir(); // <baseDir>
std::string getLibDir(); // <baseDir>/lib
std::string prependBinDir(const char *suffix); // <baseDir>/bin/<suffix>
std::string prependLibDir(const char *suffix); // <baseDir>/lib/<suffix>
}
#endif // LDC_DRIVER_EXE_PATH_H

View file

@ -14,6 +14,7 @@
#include "driver/cl_options.h"
#include "driver/exe_path.h"
#include "driver/tool.h"
#include "gen/irstate.h"
#include "gen/llvm.h"
#include "gen/logger.h"
#include "gen/optimizer.h"
@ -26,12 +27,16 @@
#include "llvm/Support/Program.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#if _WIN32
#include "llvm/Support/SystemUtils.h"
#include "llvm/Support/ConvertUTF.h"
#include <Windows.h>
#endif
#include <algorithm>
//////////////////////////////////////////////////////////////////////////////
static llvm::cl::opt<bool> staticFlag(
@ -46,6 +51,12 @@ static llvm::cl::opt<bool> createStaticLibInObjdir(
llvm::cl::desc("Create static library in -od directory (DMD-compliant)"),
llvm::cl::ZeroOrMore, llvm::cl::ReallyHidden);
static llvm::cl::opt<std::string> ltoLibrary(
"flto-binary",
llvm::cl::desc(
"Set the path for LLVMgold.so (Unixes) or libLTO.dylib (Darwin)"),
llvm::cl::value_desc("file"));
//////////////////////////////////////////////////////////////////////////////
static void CreateDirectoryOnDisk(llvm::StringRef fileName) {
@ -99,6 +110,108 @@ static std::string getOutputName(bool const sharedLib) {
return result;
}
//////////////////////////////////////////////////////////////////////////////
// LTO functionality
namespace {
void addLinkerFlag(std::vector<std::string> &args, const llvm::Twine &flag) {
args.push_back("-Xlinker");
args.push_back(flag.str());
}
std::string getLTOGoldPluginPath() {
if (!ltoLibrary.empty()) {
if (llvm::sys::fs::exists(ltoLibrary))
return ltoLibrary;
error(Loc(), "-flto-binary: file '%s' not found", ltoLibrary.c_str());
fatal();
} else {
std::string searchPaths[] = {
exe_path::prependLibDir("LLVMgold.so"), "/usr/local/lib/LLVMgold.so",
"/usr/lib/bfd-plugins/LLVMgold.so",
};
// Try all searchPaths and early return upon the first path found.
for (auto p : searchPaths) {
if (llvm::sys::fs::exists(p))
return p;
}
error(Loc(), "The LLVMgold.so plugin (needed for LTO) was not found. You "
"can specify its path with -flto-binary=<file>.");
fatal();
}
}
void addLTOGoldPluginFlags(std::vector<std::string> &args) {
addLinkerFlag(args, "-plugin");
addLinkerFlag(args, getLTOGoldPluginPath());
if (opts::isUsingThinLTO())
addLinkerFlag(args, "-plugin-opt=thinlto");
if (!opts::mCPU.empty())
addLinkerFlag(args, llvm::Twine("-plugin-opt=mcpu=") + opts::mCPU);
// Use the O-level passed to LDC as the O-level for LTO, but restrict it to
// the [0, 3] range that can be passed to the linker plugin.
static char optChars[15] = "-plugin-opt=O0";
optChars[13] = '0' + std::min<char>(optLevel(), 3);
addLinkerFlag(args, optChars);
#if LDC_LLVM_VER >= 400
const llvm::TargetOptions &TO = gTargetMachine->Options;
if (TO.FunctionSections)
addLinkerFlag(args, "-plugin-opt=-function-sections");
if (TO.DataSections)
addLinkerFlag(args, "-plugin-opt=-data-sections");
#endif
}
// Returns an empty string when libLTO.dylib was not specified nor found.
std::string getLTOdylibPath() {
if (!ltoLibrary.empty()) {
if (llvm::sys::fs::exists(ltoLibrary))
return ltoLibrary;
error(Loc(), "-flto-binary: '%s' not found", ltoLibrary.c_str());
fatal();
} else {
std::string searchPath = exe_path::prependLibDir("libLTO.dylib");
if (llvm::sys::fs::exists(searchPath))
return searchPath;
return "";
}
}
void addDarwinLTOFlags(std::vector<std::string> &args) {
std::string dylibPath = getLTOdylibPath();
if (!dylibPath.empty()) {
args.push_back("-lto_library");
args.push_back(std::move(dylibPath));
}
}
/// Adds the required linker flags for LTO builds to args.
void addLTOLinkFlags(std::vector<std::string> &args) {
#if LDC_LLVM_VER >= 309
if (global.params.targetTriple->isOSLinux() ||
global.params.targetTriple->isOSFreeBSD() ||
global.params.targetTriple->isOSNetBSD() ||
global.params.targetTriple->isOSOpenBSD() ||
global.params.targetTriple->isOSDragonFly()) {
// Assume that ld.gold or ld.bfd is used with plugin support.
addLTOGoldPluginFlags(args);
} else if (global.params.targetTriple->isOSDarwin()) {
addDarwinLTOFlags(args);
}
#endif
}
} // anonymous namespace
//////////////////////////////////////////////////////////////////////////////
namespace {
@ -223,6 +336,11 @@ static int linkObjToBinaryGcc(bool sharedLib, bool fullyStatic) {
args.push_back("-fsanitize=thread");
}
// Add LTO link flags before adding the user link switches, such that the user
// can pass additional options to the LTO plugin.
if (opts::isUsingLTO())
addLTOLinkFlags(args);
// additional linker switches
for (unsigned i = 0; i < global.params.linkswitches->dim; i++) {
const char *p = (*global.params.linkswitches)[i];

View file

@ -19,6 +19,9 @@
#include "gen/programs.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/Verifier.h"
#if LDC_LLVM_VER >= 309
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#endif
#include "llvm/Bitcode/ReaderWriter.h"
#if LDC_LLVM_VER >= 307
#include "llvm/IR/LegacyPassManager.h"
@ -364,20 +367,47 @@ void writeObjectFile(llvm::Module *m, std::string &filename) {
}
}
}
bool shouldAssembleExternally() {
// There is no integrated assembler on AIX because XCOFF is not supported.
// Starting with LLVM 3.5 the integrated assembler can be used with MinGW.
return global.params.output_o &&
(NoIntegratedAssembler ||
global.params.targetTriple->getOS() == llvm::Triple::AIX);
}
bool shouldOutputObjectFile() {
return global.params.output_o && !shouldAssembleExternally();
}
bool shouldDoLTO(llvm::Module *m) {
#if LDC_LLVM_VER < 309
return false;
#else
#if LDC_LLVM_VER == 309
// LLVM 3.9 bug: can't do ThinLTO with modules that have module-scope inline
// assembly blocks (duplicate definitions upon importing from such a module).
// https://llvm.org/bugs/show_bug.cgi?id=30610
if (opts::isUsingThinLTO() && !m->getModuleInlineAsm().empty())
return false;
#endif
return opts::isUsingLTO();
#endif
}
} // end of anonymous namespace
void writeModule(llvm::Module *m, std::string filename) {
// There is no integrated assembler on AIX because XCOFF is not supported.
// Starting with LLVM 3.5 the integrated assembler can be used with MinGW.
bool const assembleExternally =
global.params.output_o &&
(NoIntegratedAssembler ||
global.params.targetTriple->getOS() == llvm::Triple::AIX);
const bool doLTO = shouldDoLTO(m);
const bool outputObj = shouldOutputObjectFile();
const bool assembleExternally = shouldAssembleExternally();
// Use cached object code if possible
bool useIR2ObjCache = !opts::cacheDir.empty();
// Use cached object code if possible.
// TODO: combine LDC's cache and LTO (the advantage is skipping the IR
// optimization).
const bool useIR2ObjCache =
!opts::cacheDir.empty() && outputObj && !doLTO;
llvm::SmallString<32> moduleHash;
if (useIR2ObjCache && global.params.output_o && !assembleExternally) {
if (useIR2ObjCache) {
llvm::SmallString<128> cacheDir(opts::cacheDir.c_str());
llvm::sys::fs::make_absolute(cacheDir);
opts::cacheDir = cacheDir.c_str();
@ -411,9 +441,10 @@ void writeModule(llvm::Module *m, std::string filename) {
}
// write LLVM bitcode
if (global.params.output_bc) {
if (global.params.output_bc || (doLTO && outputObj)) {
LLPath bcpath(filename);
llvm::sys::path::replace_extension(bcpath, global.bc_ext);
if (global.params.output_bc)
llvm::sys::path::replace_extension(bcpath, global.bc_ext);
Logger::println("Writing LLVM bitcode to: %s\n", bcpath.c_str());
LLErrorInfo errinfo;
llvm::raw_fd_ostream bos(bcpath.c_str(), errinfo, llvm::sys::fs::F_None);
@ -422,7 +453,25 @@ void writeModule(llvm::Module *m, std::string filename) {
ERRORINFO_STRING(errinfo));
fatal();
}
llvm::WriteBitcodeToFile(m, bos);
if (opts::isUsingThinLTO()) {
#if LDC_LLVM_VER >= 309
Logger::println("Creating module summary for ThinLTO");
#if LDC_LLVM_VER == 309
// TODO: add PGO data in here when available (function freq info).
llvm::ModuleSummaryIndexBuilder indexBuilder(m, nullptr);
auto &moduleSummaryIndex = indexBuilder.getIndex();
#else
// TODO: add PGO data in here when available (function freq info and
// profile summary info).
auto moduleSummaryIndex = buildModuleSummaryIndex(*m, nullptr, nullptr);
#endif
llvm::WriteBitcodeToFile(m, bos, true, &moduleSummaryIndex,
/* generate ThinLTO hash */ true);
#endif
} else {
llvm::WriteBitcodeToFile(m, bos);
}
}
// write LLVM IR
@ -476,7 +525,7 @@ void writeModule(llvm::Module *m, std::string filename) {
}
}
if (global.params.output_o && !assembleExternally) {
if (outputObj && !doLTO) {
writeObjectFile(m, filename);
if (useIR2ObjCache) {
cache::cacheObjectFile(filename, moduleHash);

View file

@ -125,7 +125,7 @@ static cl::opt<bool>
cl::desc("Disable the slp vectorization pass"),
cl::init(false));
static unsigned optLevel() {
unsigned optLevel() {
// Use -O2 as a base for the size-optimization levels.
return optimizeLevel >= 0 ? optimizeLevel : 2;
}

View file

@ -47,6 +47,8 @@ bool willInline();
bool willCrossModuleInline();
unsigned optLevel();
bool isOptimizationEnabled();
llvm::CodeGenOpt::Level codeGenOptLevel();

14
tests/linking/fulllto_1.d Normal file
View file

@ -0,0 +1,14 @@
// Test full LTO commandline flag
// REQUIRES: atleast_llvm309
// REQUIRES: LTO
// RUN: %ldc %s -of=%t%obj -c -flto=full -vv | FileCheck %s
// RUN: %ldc -flto=full -run %s
// CHECK: Writing LLVM bitcode
// CHECK-NOT: Creating module summary
void main()
{
}

View file

@ -0,0 +1,13 @@
void foo()
{
asm
{
naked;
ret;
}
}
int simplefunction()
{
return 1;
}

View file

@ -0,0 +1,6 @@
import core.stdc.stdio;
static this()
{
puts("ctor\n");
}

14
tests/linking/thinlto_1.d Normal file
View file

@ -0,0 +1,14 @@
// Test ThinLTO commandline flag
// REQUIRES: atleast_llvm309
// REQUIRES: LTO
// RUN: %ldc %s -of=%t%obj -c -flto=thin -vv | FileCheck %s
// RUN: %ldc -flto=thin -run %s
// CHECK: Writing LLVM bitcode
// CHECK: Creating module summary for ThinLTO
void main()
{
}

View file

@ -0,0 +1,13 @@
// ThinLTO: Test inline assembly functions with thinlto
// REQUIRES: atleast_llvm309
// REQUIRES: LTO
// RUN: %ldc -flto=thin %S/inputs/asm_x86.d -c -of=%t_input%obj
// RUN: %ldc -flto=thin -I%S %s %t_input%obj
import inputs.asm_x86;
int main() {
return simplefunction();
}

View file

@ -0,0 +1,26 @@
// ThinLTO: Test that module ctors/dtors are called
// REQUIRES: atleast_llvm309
// REQUIRES: LTO
// RUN: %ldc -flto=thin -O3 -run %s | FileCheck %s
// CHECK: ctor
// CHECK: main
// CHECK: dtor
import core.stdc.stdio;
static this()
{
puts("ctor\n");
}
static ~this()
{
puts("dtor\n");
}
void main() {
puts("main\n");
}

View file

@ -0,0 +1,19 @@
// REQUIRES: atleast_llvm309
// REQUIRES: LTO
// RUN: %ldc -flto=thin -O3 %S/inputs/thinlto_ctor.d -run %s | FileCheck --check-prefix=EXECUTE %s
// EXECUTE: ctor
// EXECUTE: main
// EXECUTE: dtor
import core.stdc.stdio;
static ~this()
{
puts("dtor\n");
}
void main() {
puts("main\n");
}

View file

@ -3,6 +3,7 @@ import os
import sys
import platform
import string
import subprocess
## Auto-initialized variables by cmake:
config.ldc2_bin = "@LDC2_BIN@"
@ -66,6 +67,26 @@ if (platform.system() == 'Windows') and (config.default_target_bits == 32):
if (platform.system() == 'Windows') and (config.default_target_bits == 64):
config.available_features.add('Windows_x64')
# Add "LTO" feature if linker support is available (LTO is supported from LLVM 3.9)
canDoLTO = False
if (config.llvm_version >= 309):
if (platform.system() == 'Darwin'):
command = ['ld', '-v']
p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
text = p.stderr.read()
if "LTO support" in text:
canDoLTO = True
elif (platform.system() == 'Linux'):
command = ['ld', '-plugin']
p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
text = p.stderr.read()
if "plugin: missing argument" in text:
canDoLTO = True
if canDoLTO:
config.available_features.add('LTO')
config.target_triple = '(unused)'
# test_exec_root: The root path where tests should be run.