mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-05-14 15:16:07 +03:00
[dcomptue] codegen (#2126)
* code generation * simplify logic * apply clang-format * Undo completely inane choice by clang-format * Guard the use of the command line args.
This commit is contained in:
parent
cae9ccd9a4
commit
ae6ff33fc1
23 changed files with 378 additions and 92 deletions
|
@ -25,6 +25,11 @@ math(EXPR LDC_LLVM_VER ${LLVM_VERSION_MAJOR}*100+${LLVM_VERSION_MINOR})
|
|||
string(REGEX MATCH "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" LLVM_TABLEGEN_LIBRARY "${LLVM_LIBRARIES}")
|
||||
string(REGEX REPLACE "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" "" LLVM_LIBRARIES "${LLVM_LIBRARIES}")
|
||||
|
||||
# Information about which targets LLVM was built to target
|
||||
foreach(LLVM_SUPPORTED_TARGET ${LLVM_TARGETS_TO_BUILD})
|
||||
add_definitions("-DLDC_LLVM_SUPPORTED_TARGET_${LLVM_SUPPORTED_TARGET}=1")
|
||||
endforeach()
|
||||
|
||||
#
|
||||
# Get info about used Linux distribution.
|
||||
#
|
||||
|
@ -330,6 +335,7 @@ set(DRV_SRC
|
|||
driver/cl_options.cpp
|
||||
driver/codegenerator.cpp
|
||||
driver/configfile.cpp
|
||||
driver/dcomputecodegenerator.cpp
|
||||
driver/exe_path.cpp
|
||||
driver/targetmachine.cpp
|
||||
driver/toobj.cpp
|
||||
|
@ -345,6 +351,7 @@ set(DRV_HDR
|
|||
driver/cl_options.h
|
||||
driver/codegenerator.h
|
||||
driver/configfile.h
|
||||
driver/dcomputecodegenerator.h
|
||||
driver/exe_path.h
|
||||
driver/ldc-version.h
|
||||
driver/archiver.h
|
||||
|
|
|
@ -505,6 +505,15 @@ cl::opt<std::string>
|
|||
"of optimizations performed by LLVM"),
|
||||
cl::ValueOptional);
|
||||
#endif
|
||||
|
||||
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
|
||||
cl::list<std::string>
|
||||
dcomputeTargets("mdcompute-targets", cl::CommaSeparated,
|
||||
cl::desc("Generates code for the specified DCompute target"
|
||||
" list. Use 'ocl-xy0' for OpenCL x.y, and "
|
||||
"'cuda-xy0' for CUDA CC x.y"),
|
||||
cl::value_desc("targets"));
|
||||
#endif
|
||||
|
||||
static cl::extrahelp footer(
|
||||
"\n"
|
||||
|
|
|
@ -124,5 +124,8 @@ inline bool isUsingThinLTO() { return false; }
|
|||
#if LDC_LLVM_VER >= 400
|
||||
extern cl::opt<std::string> saveOptimizationRecord;
|
||||
#endif
|
||||
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
|
||||
extern cl::list<std::string> dcomputeTargets;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
|
74
driver/dcomputecodegenerator.cpp
Normal file
74
driver/dcomputecodegenerator.cpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
//===-- driver/dcomputecodegenerator.cpp ----------------------------------===//
|
||||
//
|
||||
// LDC – the LLVM D compiler
|
||||
//
|
||||
// This file is distributed under the BSD-style LDC license. See the LICENSE
|
||||
// file for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "driver/dcomputecodegenerator.h"
|
||||
#include "driver/cl_options.h"
|
||||
#include "ddmd/errors.h"
|
||||
#include "gen/cl_helpers.h"
|
||||
#include "ir/irdsymbol.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
DComputeTarget *
|
||||
DComputeCodeGenManager::createComputeTarget(const std::string &s) {
|
||||
int v;
|
||||
#define OCL_VALID_VER_INIT 100, 110, 120, 200, 210, 220
|
||||
const std::array<int, 6> valid_ocl_versions = {{OCL_VALID_VER_INIT}};
|
||||
#define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\
|
||||
500, 520, 600, 610, 620
|
||||
const std::array<int, 14> vaild_cuda_versions = {{CUDA_VALID_VER_INIT}};
|
||||
|
||||
if (s.substr(0, 4) == "ocl-") {
|
||||
v = atoi(s.c_str() + 4);
|
||||
if (std::find(valid_ocl_versions.begin(), valid_ocl_versions.end(), v) !=
|
||||
valid_ocl_versions.end()) {
|
||||
return createOCLTarget(ctx, v);
|
||||
}
|
||||
} else if (s.substr(0, 5) == "cuda-") {
|
||||
v = atoi(s.c_str() + 5);
|
||||
|
||||
if (std::find(vaild_cuda_versions.begin(), vaild_cuda_versions.end(), v) !=
|
||||
vaild_cuda_versions.end()) {
|
||||
return createCUDATarget(ctx, v);
|
||||
}
|
||||
}
|
||||
#define XSTR(x) #x
|
||||
#define STR(x) XSTR((x))
|
||||
|
||||
error(Loc(),
|
||||
"unrecognised or invalid DCompute targets: the format is ocl-xy0 "
|
||||
"for OpenCl x.y and cuda-xy0 for CUDA CC x.y. Valid versions "
|
||||
"for OpenCl are " STR(OCL_VALID_VER_INIT) ". Valid versions for CUDA "
|
||||
"are " STR(CUDA_VALID_VER_INIT));
|
||||
fatal();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DComputeCodeGenManager::DComputeCodeGenManager(llvm::LLVMContext &c) : ctx(c) {
|
||||
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
|
||||
for (auto &option : opts::dcomputeTargets) {
|
||||
targets.push_back(createComputeTarget(option));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DComputeCodeGenManager::emit(Module *m) {
|
||||
for (auto &target : targets) {
|
||||
target->emit(m);
|
||||
IrDsymbol::resetAll();
|
||||
}
|
||||
}
|
||||
|
||||
void DComputeCodeGenManager::writeModules() {
|
||||
for (auto &target : targets) {
|
||||
target->writeModule();
|
||||
}
|
||||
}
|
31
driver/dcomputecodegenerator.h
Normal file
31
driver/dcomputecodegenerator.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
//===-- driver/dcomputecodegenerator.h - LDC --------------------*- C++ -*-===//
|
||||
//
|
||||
// LDC – the LLVM D compiler
|
||||
//
|
||||
// This file is distributed under the BSD-style LDC license. See the LICENSE
|
||||
// file for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LDC_DRIVER_DCOMPUTECODEGENERATOR_H
|
||||
#define LDC_DRIVER_DCOMPUTECODEGENERATOR_H
|
||||
|
||||
#include "gen/dcompute/target.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
// gets run on modules marked @compute
|
||||
// All @compute D modules are emitted into one LLVM module once per target.
|
||||
class DComputeCodeGenManager {
|
||||
|
||||
llvm::LLVMContext &ctx;
|
||||
llvm::SmallVector<DComputeTarget *, 2> targets;
|
||||
DComputeTarget *createComputeTarget(const std::string &s);
|
||||
|
||||
public:
|
||||
void emit(Module *m);
|
||||
void writeModules();
|
||||
|
||||
DComputeCodeGenManager(llvm::LLVMContext &c);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -23,6 +23,7 @@
|
|||
#include "driver/cl_options.h"
|
||||
#include "driver/codegenerator.h"
|
||||
#include "driver/configfile.h"
|
||||
#include "driver/dcomputecodegenerator.h"
|
||||
#include "driver/exe_path.h"
|
||||
#include "driver/ldc-version.h"
|
||||
#include "driver/linker.h"
|
||||
|
@ -40,6 +41,7 @@
|
|||
#include "gen/optimizer.h"
|
||||
#include "gen/passes/Passes.h"
|
||||
#include "gen/runtime.h"
|
||||
#include "gen/uda.h"
|
||||
#include "gen/abi.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/LinkAllPasses.h"
|
||||
|
@ -879,6 +881,9 @@ void registerPredefinedVersions() {
|
|||
VersionCondition::addPredefinedGlobalIdent("LDC");
|
||||
VersionCondition::addPredefinedGlobalIdent("all");
|
||||
VersionCondition::addPredefinedGlobalIdent("D_Version2");
|
||||
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
|
||||
VersionCondition::addPredefinedGlobalIdent("LDC_DCompute");
|
||||
#endif
|
||||
|
||||
if (global.params.doDocComments) {
|
||||
VersionCondition::addPredefinedGlobalIdent("D_Ddoc");
|
||||
|
@ -1034,10 +1039,11 @@ void addDefaultVersionIdentifiers() {
|
|||
}
|
||||
|
||||
void codegenModules(Modules &modules) {
|
||||
// Generate one or more object/IR/bitcode files.
|
||||
// Generate one or more object/IR/bitcode files/dcompute kernels.
|
||||
if (global.params.obj && !modules.empty()) {
|
||||
ldc::CodeGenerator cg(getGlobalContext(), global.params.oneobj);
|
||||
|
||||
DComputeCodeGenManager dccg(getGlobalContext());
|
||||
std::vector<Module *> computeModules;
|
||||
// When inlining is enabled, we are calling semantic3 on function
|
||||
// declarations, which may _add_ members to the first module in the modules
|
||||
// array. These added functions must be codegenned, because these functions
|
||||
|
@ -1051,11 +1057,25 @@ void codegenModules(Modules &modules) {
|
|||
if (global.params.verbose)
|
||||
fprintf(global.stdmsg, "code %s\n", m->toChars());
|
||||
|
||||
cg.emit(m);
|
||||
const auto atCompute = hasComputeAttr(m);
|
||||
if (atCompute == DComputeCompileFor::hostOnly ||
|
||||
atCompute == DComputeCompileFor::hostAndDevice)
|
||||
{
|
||||
cg.emit(m);
|
||||
}
|
||||
if (atCompute != DComputeCompileFor::hostOnly)
|
||||
computeModules.push_back(m);
|
||||
|
||||
if (global.errors)
|
||||
fatal();
|
||||
}
|
||||
|
||||
if (!computeModules.empty()) {
|
||||
for (auto& mod : computeModules)
|
||||
dccg.emit(mod);
|
||||
|
||||
dccg.writeModules();
|
||||
}
|
||||
}
|
||||
|
||||
cache::pruneCache();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "mars.h"
|
||||
#include "gen/logger.h"
|
||||
|
||||
|
@ -632,3 +633,13 @@ createTargetMachine(std::string targetTriple, std::string arch, std::string cpu,
|
|||
targetOptions, relocModel, codeModel,
|
||||
codeGenOptLevel);
|
||||
}
|
||||
|
||||
ComputeBackend::Type getComputeTargetType(llvm::Module* m) {
|
||||
llvm::Triple::ArchType a = llvm::Triple(m->getTargetTriple()).getArch();
|
||||
if (a == llvm::Triple::spir || a == llvm::Triple::spir64)
|
||||
return ComputeBackend::SPIRV;
|
||||
else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64)
|
||||
return ComputeBackend::NVPTX;
|
||||
else
|
||||
return ComputeBackend::None;
|
||||
}
|
||||
|
|
|
@ -38,8 +38,15 @@ namespace llvm {
|
|||
class Triple;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
class Module;
|
||||
}
|
||||
|
||||
namespace ComputeBackend {
|
||||
enum Type { None, SPIRV, NVPTX };
|
||||
}
|
||||
|
||||
ComputeBackend::Type getComputeTargetType(llvm::Module*);
|
||||
|
||||
/**
|
||||
* Creates an LLVM TargetMachine suitable for the given (usually command-line)
|
||||
* parameters and the host platform defaults.
|
||||
|
|
|
@ -39,6 +39,9 @@
|
|||
#if LDC_LLVM_VER >= 307
|
||||
#include "llvm/Support/Path.h"
|
||||
#endif
|
||||
#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV
|
||||
#include "llvm/Support/SPIRV.h"
|
||||
#endif
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#if LDC_LLVM_VER >= 307
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
|
@ -75,7 +78,19 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
|
|||
legacy::
|
||||
#endif
|
||||
PassManager Passes;
|
||||
ComputeBackend::Type cb = getComputeTargetType(&m);
|
||||
|
||||
if (cb == ComputeBackend::SPIRV) {
|
||||
#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV
|
||||
IF_LOG Logger::println("running createSPIRVWriterPass()");
|
||||
llvm::createSPIRVWriterPass(out)->runOnModule(m);
|
||||
IF_LOG Logger::println("Success.");
|
||||
#else
|
||||
error(Loc(), "Trying to target SPIRV, but LDC is not built to do so!");
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
#if LDC_LLVM_VER >= 307
|
||||
// The DataLayout is already set at the module (in module.cpp,
|
||||
// method Module::genLLVMModule())
|
||||
|
@ -107,7 +122,11 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
|
|||
#else
|
||||
fout,
|
||||
#endif
|
||||
fileType, codeGenOptLevel())) {
|
||||
// Always generate assembly for ptx as it is an assembly format
|
||||
// The PTX backend fails if we pass anything else.
|
||||
(cb == ComputeBackend::NVPTX) ? llvm::TargetMachine::CGFT_AssemblyFile
|
||||
: fileType,
|
||||
codeGenOptLevel())) {
|
||||
llvm_unreachable("no support for asm output");
|
||||
}
|
||||
|
||||
|
|
|
@ -23,13 +23,13 @@
|
|||
#include "gen/dcompute/druntime.h"
|
||||
|
||||
struct DComputePointerRewrite : ABIRewrite {
|
||||
LLType* type(Type* t) override {
|
||||
auto ptr = toDcomputePointer(static_cast<TypeStruct*>(t)->sym);
|
||||
return ptr->toLLVMType();
|
||||
LLType *type(Type *t) override {
|
||||
auto ptr = toDcomputePointer(static_cast<TypeStruct *>(t)->sym);
|
||||
return ptr->toLLVMType(true);
|
||||
}
|
||||
LLValue *getLVal(Type *dty, LLValue *v) override {
|
||||
// TODO: Is this correct?
|
||||
return DtoBitCast(v,this->type(dty));
|
||||
return DtoBitCast(v, this->type(dty));
|
||||
}
|
||||
LLValue *put(DValue *dv) override {
|
||||
LLValue *address = getAddressOf(dv);
|
||||
|
|
|
@ -32,16 +32,14 @@ bool isFromLDC_DCompute(Dsymbol *sym) {
|
|||
return false;
|
||||
|
||||
return moduleDecl->id == Id::dcompute;
|
||||
|
||||
}
|
||||
|
||||
llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd)
|
||||
{
|
||||
llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd) {
|
||||
if (sd->ident != Id::dcPointer || !isFromLDC_DCompute(sd))
|
||||
return llvm::Optional<DcomputePointer>(llvm::None);
|
||||
return llvm::Optional<DcomputePointer>(llvm::None);
|
||||
|
||||
TemplateInstance *ti = sd->isInstantiated();
|
||||
int addrspace = isExpression((*ti->tiargs)[0])->toInteger();
|
||||
Type* type = isType((*ti->tiargs)[1]);
|
||||
return llvm::Optional<DcomputePointer>(DcomputePointer(addrspace,type));
|
||||
Type *type = isType((*ti->tiargs)[1]);
|
||||
return llvm::Optional<DcomputePointer>(DcomputePointer(addrspace, type));
|
||||
}
|
||||
|
|
|
@ -15,22 +15,26 @@
|
|||
#include "ddmd/aggregate.h"
|
||||
#include "ddmd/mtype.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "gen/dcompute/target.h"
|
||||
#include "gen/irstate.h"
|
||||
#include "gen/llvm.h"
|
||||
#include "gen/tollvm.h"
|
||||
|
||||
class Dsymbol;
|
||||
class Type;
|
||||
|
||||
|
||||
bool isFromLDC_DCompute(Dsymbol *sym);
|
||||
|
||||
struct DcomputePointer {
|
||||
int addrspace;
|
||||
Type* type;
|
||||
DcomputePointer(int as,Type* ty) : addrspace(as),type(ty) {}
|
||||
LLType *toLLVMType() {
|
||||
Type *type;
|
||||
DcomputePointer(int as, Type *ty) : addrspace(as), type(ty) {}
|
||||
LLType *toLLVMType(bool translate) {
|
||||
auto llType = DtoMemType(type);
|
||||
return llType->getPointerElementType()->getPointerTo(addrspace);
|
||||
int as = addrspace;
|
||||
if (translate)
|
||||
as = gIR->dcomputetarget->mapping[as];
|
||||
return llType->getPointerElementType()->getPointerTo(as);
|
||||
}
|
||||
};
|
||||
llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd);
|
||||
|
|
|
@ -30,7 +30,6 @@ void DComputeTarget::doCodeGen(Module *m) {
|
|||
|
||||
if (global.errors)
|
||||
fatal();
|
||||
|
||||
}
|
||||
|
||||
void DComputeTarget::emit(Module *m) {
|
||||
|
|
|
@ -38,8 +38,8 @@ public:
|
|||
|
||||
DComputeTarget(llvm::LLVMContext &c, int v, ID id, const char *_short_name,
|
||||
const char *suffix, TargetABI *a, std::array<int, 5> map)
|
||||
: ctx(c), tversion(v), target(id), short_name(_short_name),
|
||||
binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) { }
|
||||
: ctx(c), tversion(v), target(id), short_name(_short_name),
|
||||
binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) {}
|
||||
|
||||
void emit(Module *m);
|
||||
void doCodeGen(Module *m);
|
||||
|
|
|
@ -21,17 +21,20 @@ namespace {
|
|||
class TargetCUDA : public DComputeTarget {
|
||||
public:
|
||||
TargetCUDA(llvm::LLVMContext &c, int sm)
|
||||
: DComputeTarget(
|
||||
: DComputeTarget(
|
||||
c, sm, CUDA, "cuda", "ptx", createNVPTXABI(),
|
||||
|
||||
// Map from nominal DCompute address space to NVPTX address space.
|
||||
// see $LLVM_ROOT/docs/docs/NVPTXUsage.rst section Address Spaces
|
||||
{{5, 1, 3, 4, 0}}) {
|
||||
std::string dl = global.params.is64bit ?
|
||||
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
|
||||
"f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" :
|
||||
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
|
||||
"f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
std::string dl =
|
||||
global.params.is64bit
|
||||
? "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:"
|
||||
"32-"
|
||||
"f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
: "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:"
|
||||
"32-"
|
||||
"f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
|
||||
_ir = new IRState("dcomputeTargetCUDA", ctx);
|
||||
_ir->module.setTargetTriple(global.params.is64bit ? "nvptx64-nvidia-cuda"
|
||||
|
@ -40,20 +43,20 @@ public:
|
|||
_ir->dcomputetarget = this;
|
||||
}
|
||||
|
||||
void addMetadata() override {
|
||||
void addMetadata() override {
|
||||
// sm version?
|
||||
}
|
||||
void setGTargetMachine() override {
|
||||
char buf[8];
|
||||
bool is64 = global.params.is64bit;
|
||||
snprintf(buf, sizeof(buf), "sm_%d", tversion / 10);
|
||||
gTargetMachine = createTargetMachine(
|
||||
void setGTargetMachine() override {
|
||||
char buf[8];
|
||||
bool is64 = global.params.is64bit;
|
||||
snprintf(buf, sizeof(buf), "sm_%d", tversion / 10);
|
||||
gTargetMachine = createTargetMachine(
|
||||
is64 ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda",
|
||||
is64 ? "nvptx64" : "nvptx", buf, {},
|
||||
is64 ? ExplicitBitness::M64 : ExplicitBitness::M32, ::FloatABI::Hard,
|
||||
llvm::Reloc::Static, llvm::CodeModel::Medium, codeGenOptLevel(), false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf) override {
|
||||
// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
|
||||
|
|
|
@ -52,10 +52,10 @@ public:
|
|||
}
|
||||
void setGTargetMachine() override { gTargetMachine = nullptr; }
|
||||
|
||||
// Adapted from clang
|
||||
// Adapted from clang
|
||||
void addMetadata() override {
|
||||
// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
|
||||
// OCL is only supported for 3.6.1 and 3.8 anyway.
|
||||
// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
|
||||
// OCL is only supported for 3.6.1 and 3.8 anyway.
|
||||
#if LDC_LLVM_VER >= 306
|
||||
// opencl.ident?
|
||||
// spirv.Source // debug only
|
||||
|
@ -70,7 +70,7 @@ public:
|
|||
llvm::ConstantAsMetadata::get(
|
||||
llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), 2))};
|
||||
llvm::NamedMDNode *SPIRVerMD =
|
||||
_ir->module.getOrInsertNamedMetadata("opencl.spir.version");
|
||||
_ir->module.getOrInsertNamedMetadata("opencl.spir.version");
|
||||
SPIRVerMD->addOperand(llvm::MDNode::get(ctx, SPIRVerElts));
|
||||
|
||||
// Add OpenCL version
|
||||
|
@ -81,7 +81,7 @@ public:
|
|||
llvm::Type::getInt32Ty(ctx), (tversion % 100) / 10))};
|
||||
llvm::NamedMDNode *OCLVerMD =
|
||||
_ir->module.getOrInsertNamedMetadata("opencl.ocl.version");
|
||||
|
||||
|
||||
OCLVerMD->addOperand(llvm::MDNode::get(ctx, OCLVerElts));
|
||||
#endif
|
||||
}
|
||||
|
@ -93,20 +93,20 @@ public:
|
|||
KernArgMD_type_qual,
|
||||
KernArgMD_name,
|
||||
count_KernArgMD
|
||||
|
||||
};
|
||||
void addKernelMetadata(FuncDeclaration *fd, llvm::Function *llf) override {
|
||||
// By the time we get here the ABI should have rewritten the function
|
||||
// type so that the magic types in ldc.dcompute are transformed into
|
||||
// what the LLVM backend expects.
|
||||
// By the time we get here the ABI should have rewritten the function
|
||||
// type so that the magic types in ldc.dcompute are transformed into
|
||||
// what the LLVM backend expects.
|
||||
|
||||
// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
|
||||
// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
|
||||
#if LDC_LLVM_VER >= 306
|
||||
unsigned i = 0;
|
||||
// TODO: Handle Function attibutes
|
||||
llvm::SmallVector<llvm::Metadata *, 8> kernelMDArgs;
|
||||
kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(llf));
|
||||
// MDNode for the kernel argument address space qualifiers.
|
||||
|
||||
std::array<llvm::SmallVector<llvm::Metadata *, 8>,count_KernArgMD> paramArgs;
|
||||
std::array<const char*,count_KernArgMD> args = {
|
||||
"kernel_arg_addr_space",
|
||||
|
@ -125,17 +125,17 @@ public:
|
|||
VarDeclarations *vs = fd->parameters;
|
||||
for (i = 0; i < vs->dim; i++) {
|
||||
VarDeclaration *v = (*vs)[i];
|
||||
decodeTypes(paramArgs,v);
|
||||
decodeTypes(paramArgs, v);
|
||||
}
|
||||
|
||||
for (auto& md : paramArgs)
|
||||
|
||||
for (auto &md : paramArgs)
|
||||
kernelMDArgs.push_back(llvm::MDNode::get(ctx, md));
|
||||
///-------------------------------
|
||||
/// TODO: Handle Function attibutes
|
||||
///-------------------------------
|
||||
llvm::MDNode *kernelMDNode = llvm::MDNode::get(ctx, kernelMDArgs);
|
||||
llvm::NamedMDNode *OpenCLKernelMetadata =
|
||||
_ir->module.getOrInsertNamedMetadata("opencl.kernels");
|
||||
_ir->module.getOrInsertNamedMetadata("opencl.kernels");
|
||||
OpenCLKernelMetadata->addOperand(kernelMDNode);
|
||||
#endif
|
||||
}
|
||||
|
@ -147,19 +147,23 @@ public:
|
|||
std::string basicTypeToString(Type *t) {
|
||||
std::stringstream ss;
|
||||
auto ty = t->ty;
|
||||
if (ty == Tint8) ss << "char";
|
||||
else if (ty == Tuns8) ss << "uchar";
|
||||
if (ty == Tint8)
|
||||
ss << "char";
|
||||
else if (ty == Tuns8)
|
||||
ss << "uchar";
|
||||
else if (ty == Tvector) {
|
||||
TypeVector* vec = static_cast<TypeVector*>(t);
|
||||
TypeVector *vec = static_cast<TypeVector *>(t);
|
||||
auto size = vec->size(Loc());
|
||||
auto basety = vec->basetype->ty;
|
||||
if (basety == Tint8) ss << "char";
|
||||
else if (basety == Tuns8) ss << "uchar";
|
||||
else ss << vec->basetype->toChars();
|
||||
if (basety == Tint8)
|
||||
ss << "char";
|
||||
else if (basety == Tuns8)
|
||||
ss << "uchar";
|
||||
else
|
||||
ss << vec->basetype->toChars();
|
||||
ss << (int)size;
|
||||
}
|
||||
else
|
||||
ss << t->toChars();
|
||||
} else
|
||||
ss << t->toChars();
|
||||
return ss.str();
|
||||
}
|
||||
#if LDC_LLVM_VER >= 306
|
||||
|
@ -173,17 +177,14 @@ public:
|
|||
std::string accessQual = "none";
|
||||
int addrspace = 0;
|
||||
if (v->type->ty == Tstruct &&
|
||||
(ptr = toDcomputePointer(static_cast<TypeStruct*>(v->type)->sym)))
|
||||
{
|
||||
(ptr = toDcomputePointer(static_cast<TypeStruct *>(v->type)->sym))) {
|
||||
addrspace = ptr->addrspace;
|
||||
tyName = basicTypeToString(ptr->type) + "*";
|
||||
baseTyName = tyName;
|
||||
// there is no volatile or restrict (yet) in D
|
||||
typeQuals = mod2str(ptr->type->mod);
|
||||
// TODO: Images and Pipes They are global pointers to opaques
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tyName = basicTypeToString(v->type);
|
||||
baseTyName = tyName;
|
||||
typeQuals = mod2str(v->type->mod);
|
||||
|
|
|
@ -67,6 +67,8 @@ public:
|
|||
decl->toPrettyChars());
|
||||
LOG_SCOPE
|
||||
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
if (decl->ir->isDefined()) {
|
||||
return;
|
||||
}
|
||||
|
@ -125,14 +127,18 @@ public:
|
|||
m->accept(this);
|
||||
}
|
||||
|
||||
// Define the __initZ symbol.
|
||||
IrAggr *ir = getIrAggr(decl);
|
||||
llvm::GlobalVariable *initZ = ir->getInitSymbol();
|
||||
initZ->setInitializer(ir->getDefaultInit());
|
||||
setLinkage(decl, initZ);
|
||||
// Skip __initZ and typeinfo for @compute device code.
|
||||
// TODO: support global variables and thus __initZ
|
||||
if (!irs->dcomputetarget) {
|
||||
// Define the __initZ symbol.
|
||||
IrAggr *ir = getIrAggr(decl);
|
||||
llvm::GlobalVariable *initZ = ir->getInitSymbol();
|
||||
initZ->setInitializer(ir->getDefaultInit());
|
||||
setLinkage(decl, initZ);
|
||||
|
||||
// emit typeinfo
|
||||
DtoTypeInfoOf(decl->type);
|
||||
// emit typeinfo
|
||||
DtoTypeInfoOf(decl->type);
|
||||
}
|
||||
|
||||
// Emit __xopEquals/__xopCmp/__xtoHash.
|
||||
if (decl->xeq && decl->xeq != decl->xerreq) {
|
||||
|
@ -153,6 +159,8 @@ public:
|
|||
decl->toPrettyChars());
|
||||
LOG_SCOPE
|
||||
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
if (decl->ir->isDefined()) {
|
||||
return;
|
||||
}
|
||||
|
@ -245,6 +253,7 @@ public:
|
|||
|
||||
assert(!(decl->storage_class & STCmanifest) &&
|
||||
"manifest constant being codegen'd!");
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
IrGlobal *irGlobal = getIrGlobal(decl);
|
||||
LLGlobalVariable *gvar = llvm::cast<LLGlobalVariable>(irGlobal->value);
|
||||
|
@ -412,7 +421,8 @@ public:
|
|||
void visit(PragmaDeclaration *decl) LLVM_OVERRIDE {
|
||||
if (decl->ident == Id::lib) {
|
||||
assert(decl->args && decl->args->dim == 1);
|
||||
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
Expression *e = static_cast<Expression *>(decl->args->data[0]);
|
||||
|
||||
assert(e->op == TOKstring);
|
||||
|
@ -473,7 +483,7 @@ public:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void visit(TypeInfoDeclaration *decl) LLVM_OVERRIDE {
|
||||
if (isSpeculativeType(decl->tinfo)) {
|
||||
if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) {
|
||||
return;
|
||||
}
|
||||
TypeInfoDeclaration_codegen(decl, irs);
|
||||
|
@ -482,7 +492,7 @@ public:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void visit(TypeInfoClassDeclaration *decl) LLVM_OVERRIDE {
|
||||
if (isSpeculativeType(decl->tinfo)) {
|
||||
if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) {
|
||||
return;
|
||||
}
|
||||
TypeInfoClassDeclaration_codegen(decl, irs);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "gen/abi.h"
|
||||
#include "gen/arrays.h"
|
||||
#include "gen/classes.h"
|
||||
#include "gen/dcompute/target.h"
|
||||
#include "gen/dvalue.h"
|
||||
#include "gen/funcgenstate.h"
|
||||
#include "gen/function-inlining.h"
|
||||
|
@ -977,7 +978,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
|
|||
#if LDC_LLVM_VER >= 500
|
||||
0, // Address space
|
||||
#endif
|
||||
"alloca point", beginbb);
|
||||
"alloca_point", beginbb);
|
||||
funcGen.allocapoint = allocaPoint;
|
||||
|
||||
// debug info - after all allocas, but before any llvm.dbg.declare etc
|
||||
|
@ -1127,6 +1128,11 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
|
|||
}
|
||||
|
||||
gIR->scopes.pop_back();
|
||||
|
||||
if (gIR->dcomputetarget && hasKernelAttr(fd)) {
|
||||
auto fn = gIR->module.getFunction(fd->mangleString);
|
||||
gIR->dcomputetarget->addKernelMetadata(fd, fn);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "gen/cl_helpers.h"
|
||||
#include "gen/logger.h"
|
||||
#include "gen/passes/Passes.h"
|
||||
#include "driver/targetmachine.h"
|
||||
#include "llvm/LinkAllPasses.h"
|
||||
#if LDC_LLVM_VER >= 307
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
|
@ -341,6 +342,16 @@ bool ldc_optimize_module(llvm::Module *M) {
|
|||
#endif
|
||||
PassManager mpm;
|
||||
|
||||
// Dont optimise spirv modules because turning GEPs into extracts triggers
|
||||
// asserts in the IR -> SPIR-V translation pass. SPIRV doesn't have a target
|
||||
// machine, so any optimisation passes that rely on it to provide analysis,
|
||||
// like DCE can't be run.
|
||||
// The optimisation is supposed to happen between the SPIRV -> native machine
|
||||
// code pass of the consumer of the binary.
|
||||
// TODO: run rudimentary optimisations to improve IR debuggability.
|
||||
if (getComputeTargetType(M) == ComputeBackend::SPIRV)
|
||||
return false;
|
||||
|
||||
#if LDC_LLVM_VER >= 307
|
||||
// Add an appropriate TargetLibraryInfo pass for the module's triple.
|
||||
TargetLibraryInfoImpl *tlii =
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "gen/arrays.h"
|
||||
#include "gen/classes.h"
|
||||
#include "gen/coverage.h"
|
||||
#include "gen/dcompute/target.h"
|
||||
#include "gen/dvalue.h"
|
||||
#include "gen/funcgenstate.h"
|
||||
#include "gen/irstate.h"
|
||||
|
@ -24,6 +25,7 @@
|
|||
#include "gen/logger.h"
|
||||
#include "gen/runtime.h"
|
||||
#include "gen/tollvm.h"
|
||||
#include "id.h"
|
||||
#include "ir/irfunction.h"
|
||||
#include "ir/irmodule.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
|
@ -317,6 +319,18 @@ public:
|
|||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool dcomputeReflectMatches(CallExp *ce) {
|
||||
auto arg1 = (DComputeTarget::ID)(*ce->arguments)[0]->toInteger();
|
||||
auto arg2 = (*ce->arguments)[1]->toInteger();
|
||||
auto dct = irs->dcomputetarget;
|
||||
if (!dct) {
|
||||
return arg1 == DComputeTarget::Host;
|
||||
}
|
||||
else {
|
||||
return arg1 == dct->target && (!arg2 || arg2 == dct->tversion);
|
||||
}
|
||||
}
|
||||
|
||||
void visit(IfStatement *stmt) LLVM_OVERRIDE {
|
||||
IF_LOG Logger::println("IfStatement::toIR(): %s", stmt->loc.toChars());
|
||||
|
@ -332,6 +346,22 @@ public:
|
|||
irs->DBuilder.EmitBlockStart(stmt->loc);
|
||||
emitCoverageLinecountInc(stmt->loc);
|
||||
|
||||
// This is a (dirty) hack to get codegen time conditional
|
||||
// compilation, on account of the fact that we are trying
|
||||
// to target multiple backends "simultaneously" with one
|
||||
// pass through the front end, to have a single "static"
|
||||
// context.
|
||||
if (stmt->condition->op == TOKcall) {
|
||||
auto ce = (CallExp *)stmt->condition;
|
||||
if (ce->f && ce->f->ident == Id::dcReflect) {
|
||||
if (dcomputeReflectMatches(ce))
|
||||
stmt->ifbody->accept(this);
|
||||
else if (stmt->elsebody)
|
||||
stmt->elsebody->accept(this);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DValue *cond_e = toElemDtor(stmt->condition);
|
||||
LLValue *cond_val = DtoRVal(cond_e);
|
||||
|
||||
|
@ -745,10 +775,15 @@ public:
|
|||
irs->DBuilder.EmitBlockStart(stmt->finalbody->loc);
|
||||
stmt->finalbody->accept(this);
|
||||
irs->DBuilder.EmitBlockEnd();
|
||||
CleanupCursor cleanupBefore;
|
||||
|
||||
CleanupCursor cleanupBefore = irs->funcGen().scopes.currentCleanupScope();
|
||||
irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb());
|
||||
|
||||
// For @compute code, don't emit any exception handling as there are no
|
||||
// exceptions anyway.
|
||||
const bool computeCode = !!irs->dcomputetarget;
|
||||
if (!computeCode) {
|
||||
cleanupBefore = irs->funcGen().scopes.currentCleanupScope();
|
||||
irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb());
|
||||
}
|
||||
// Emit the try block.
|
||||
irs->scope() = IRScope(trybb);
|
||||
|
||||
|
@ -758,12 +793,14 @@ public:
|
|||
irs->DBuilder.EmitBlockEnd();
|
||||
|
||||
if (successbb) {
|
||||
irs->funcGen().scopes.runCleanups(cleanupBefore, successbb);
|
||||
if (!computeCode)
|
||||
irs->funcGen().scopes.runCleanups(cleanupBefore, successbb);
|
||||
irs->scope() = IRScope(successbb);
|
||||
// PGO counter tracks the continuation of the try-finally statement
|
||||
PGO.emitCounterIncrement(stmt);
|
||||
}
|
||||
irs->funcGen().scopes.popCleanups(cleanupBefore);
|
||||
if (!computeCode)
|
||||
irs->funcGen().scopes.popCleanups(cleanupBefore);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -772,6 +809,7 @@ public:
|
|||
IF_LOG Logger::println("TryCatchStatement::toIR(): %s",
|
||||
stmt->loc.toChars());
|
||||
LOG_SCOPE;
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
auto &PGO = irs->funcGen().pgo;
|
||||
|
||||
|
@ -812,6 +850,7 @@ public:
|
|||
void visit(ThrowStatement *stmt) LLVM_OVERRIDE {
|
||||
IF_LOG Logger::println("ThrowStatement::toIR(): %s", stmt->loc.toChars());
|
||||
LOG_SCOPE;
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
auto &PGO = irs->funcGen().pgo;
|
||||
PGO.setCurrentStmt(stmt);
|
||||
|
@ -871,6 +910,7 @@ public:
|
|||
const bool isStringSwitch = !stmt->condition->type->isintegral();
|
||||
if (isStringSwitch) {
|
||||
Logger::println("is string switch");
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
// Sort the cases, taking care not to modify the original AST.
|
||||
cases = cases->copy();
|
||||
|
@ -1597,7 +1637,8 @@ public:
|
|||
IF_LOG Logger::println("SwitchErrorStatement::toIR(): %s",
|
||||
stmt->loc.toChars());
|
||||
LOG_SCOPE;
|
||||
|
||||
assert(!irs->dcomputetarget);
|
||||
|
||||
auto &PGO = irs->funcGen().pgo;
|
||||
PGO.setCurrentStmt(stmt);
|
||||
|
||||
|
@ -1616,11 +1657,15 @@ public:
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void visit(AsmStatement *stmt) LLVM_OVERRIDE { AsmStatement_toIR(stmt, irs); }
|
||||
void visit(AsmStatement *stmt) LLVM_OVERRIDE {
|
||||
assert(!irs->dcomputetarget);
|
||||
AsmStatement_toIR(stmt, irs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void visit(CompoundAsmStatement *stmt) LLVM_OVERRIDE {
|
||||
assert(!irs->dcomputetarget);
|
||||
CompoundAsmStatement_toIR(stmt, irs);
|
||||
}
|
||||
|
||||
|
|
|
@ -418,15 +418,15 @@ bool hasWeakUDA(Dsymbol *sym) {
|
|||
|
||||
/// Returns 0 if 'sym' does not have the @ldc.dcompute.compute() UDA applied.
|
||||
/// Returns 1 + n if 'sym' does and is @compute(n).
|
||||
int hasComputeAttr(Dsymbol *sym) {
|
||||
DComputeCompileFor hasComputeAttr(Dsymbol *sym) {
|
||||
|
||||
auto sle = getMagicAttribute(sym, Id::udaCompute, Id::dcompute);
|
||||
if (!sle)
|
||||
return 0;
|
||||
return DComputeCompileFor::hostOnly;
|
||||
|
||||
checkStructElems(sle, {Type::tint32});
|
||||
|
||||
return 1 + (*sle->elements)[0]->toInteger();
|
||||
return static_cast<DComputeCompileFor>(1 + (*sle->elements)[0]->toInteger());
|
||||
}
|
||||
|
||||
/// Checks whether 'sym' has the @ldc.dcompute._kernel() UDA applied.
|
||||
|
@ -437,7 +437,8 @@ bool hasKernelAttr(Dsymbol *sym) {
|
|||
|
||||
checkStructElems(sle, {});
|
||||
|
||||
if (!sym->isFuncDeclaration() && !hasComputeAttr(sym->getModule()))
|
||||
if (!sym->isFuncDeclaration() &&
|
||||
hasComputeAttr(sym->getModule()) != DComputeCompileFor::hostOnly)
|
||||
sym->error("@ldc.dcompute.kernel can only be applied to functions"
|
||||
" in modules marked @ldc.dcompute.compute");
|
||||
|
||||
|
|
10
gen/uda.h
10
gen/uda.h
|
@ -27,7 +27,13 @@ void applyFuncDeclUDAs(FuncDeclaration *decl, IrFunction *irFunc);
|
|||
void applyVarDeclUDAs(VarDeclaration *decl, llvm::GlobalVariable *gvar);
|
||||
|
||||
bool hasWeakUDA(Dsymbol *sym);
|
||||
int hasComputeAttr(Dsymbol *sym);
|
||||
bool hasKernelAttr(Dsymbol *sym);
|
||||
|
||||
/// Must match ldc.attributes.Compilefor + 1 == DComputeCompileFor
|
||||
enum class DComputeCompileFor
|
||||
{
|
||||
hostOnly = 0,
|
||||
deviceOnly = 1,
|
||||
hostAndDevice = 2
|
||||
};
|
||||
DComputeCompileFor hasComputeAttr(Dsymbol *sym);
|
||||
#endif
|
||||
|
|
|
@ -15,11 +15,14 @@
|
|||
#include "declaration.h"
|
||||
#include "init.h"
|
||||
#include "mtype.h"
|
||||
#include "template.h"
|
||||
|
||||
#include "gen/irstate.h"
|
||||
#include "gen/tollvm.h"
|
||||
#include "gen/logger.h"
|
||||
#include "gen/llvmhelpers.h"
|
||||
#include "gen/dcompute/target.h"
|
||||
#include "gen/dcompute/druntime.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -49,11 +52,29 @@ IrTypeStruct *IrTypeStruct::get(StructDeclaration *sd) {
|
|||
t->packed = isPacked(sd);
|
||||
}
|
||||
|
||||
AggrTypeBuilder builder(t->packed);
|
||||
builder.addAggregate(sd);
|
||||
builder.addTailPadding(sd->structsize);
|
||||
isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed);
|
||||
t->varGEPIndices = builder.varGEPIndices();
|
||||
// For ldc.dcomptetypes.Pointer!(uint n,T),
|
||||
// emit { T addrspace(gIR->dcomputetarget->mapping[n])* }
|
||||
llvm::Optional<DcomputePointer> p;
|
||||
if (gIR->dcomputetarget && (p = toDcomputePointer(sd))) {
|
||||
|
||||
// Translate the virtual dcompute address space into the real one for
|
||||
// the target
|
||||
int realAS = gIR->dcomputetarget->mapping[p->addrspace];
|
||||
|
||||
llvm::SmallVector<LLType *, 1> body;
|
||||
body.push_back(DtoMemType(p->type)->getPointerTo(realAS));
|
||||
|
||||
isaStruct(t->type)->setBody(body, t->packed);
|
||||
VarGEPIndices v;
|
||||
v[sd->fields[0]] = 0;
|
||||
t->varGEPIndices = v;
|
||||
} else {
|
||||
AggrTypeBuilder builder(t->packed);
|
||||
builder.addAggregate(sd);
|
||||
builder.addTailPadding(sd->structsize);
|
||||
isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed);
|
||||
t->varGEPIndices = builder.varGEPIndices();
|
||||
}
|
||||
|
||||
IF_LOG Logger::cout() << "final struct type: " << *t->type << std::endl;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue