diff --git a/CMakeLists.txt b/CMakeLists.txt index c9321b87fb..51bcc8cecd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,11 @@ math(EXPR LDC_LLVM_VER ${LLVM_VERSION_MAJOR}*100+${LLVM_VERSION_MINOR}) string(REGEX MATCH "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" LLVM_TABLEGEN_LIBRARY "${LLVM_LIBRARIES}") string(REGEX REPLACE "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" "" LLVM_LIBRARIES "${LLVM_LIBRARIES}") +# Information about which targets LLVM was built to target +foreach(LLVM_SUPPORTED_TARGET ${LLVM_TARGETS_TO_BUILD}) + add_definitions("-DLDC_LLVM_SUPPORTED_TARGET_${LLVM_SUPPORTED_TARGET}=1") +endforeach() + # # Get info about used Linux distribution. # @@ -330,6 +335,7 @@ set(DRV_SRC driver/cl_options.cpp driver/codegenerator.cpp driver/configfile.cpp + driver/dcomputecodegenerator.cpp driver/exe_path.cpp driver/targetmachine.cpp driver/toobj.cpp @@ -345,6 +351,7 @@ set(DRV_HDR driver/cl_options.h driver/codegenerator.h driver/configfile.h + driver/dcomputecodegenerator.h driver/exe_path.h driver/ldc-version.h driver/archiver.h diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp index eef3fbff56..313266a9e5 100644 --- a/driver/cl_options.cpp +++ b/driver/cl_options.cpp @@ -505,6 +505,15 @@ cl::opt "of optimizations performed by LLVM"), cl::ValueOptional); #endif + +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +cl::list + dcomputeTargets("mdcompute-targets", cl::CommaSeparated, + cl::desc("Generates code for the specified DCompute target" + " list. Use 'ocl-xy0' for OpenCL x.y, and " + "'cuda-xy0' for CUDA CC x.y"), + cl::value_desc("targets")); +#endif static cl::extrahelp footer( "\n" diff --git a/driver/cl_options.h b/driver/cl_options.h index b30cea8329..d154edc568 100644 --- a/driver/cl_options.h +++ b/driver/cl_options.h @@ -124,5 +124,8 @@ inline bool isUsingThinLTO() { return false; } #if LDC_LLVM_VER >= 400 extern cl::opt saveOptimizationRecord; #endif +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +extern cl::list dcomputeTargets; +#endif } #endif diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp new file mode 100644 index 0000000000..381a4def83 --- /dev/null +++ b/driver/dcomputecodegenerator.cpp @@ -0,0 +1,74 @@ +//===-- driver/dcomputecodegenerator.cpp ----------------------------------===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// + +#include "driver/dcomputecodegenerator.h" +#include "driver/cl_options.h" +#include "ddmd/errors.h" +#include "gen/cl_helpers.h" +#include "ir/irdsymbol.h" +#include "llvm/Support/CommandLine.h" +#include +#include +#include + +DComputeTarget * +DComputeCodeGenManager::createComputeTarget(const std::string &s) { + int v; +#define OCL_VALID_VER_INIT 100, 110, 120, 200, 210, 220 + const std::array valid_ocl_versions = {{OCL_VALID_VER_INIT}}; +#define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\ + 500, 520, 600, 610, 620 + const std::array vaild_cuda_versions = {{CUDA_VALID_VER_INIT}}; + + if (s.substr(0, 4) == "ocl-") { + v = atoi(s.c_str() + 4); + if (std::find(valid_ocl_versions.begin(), valid_ocl_versions.end(), v) != + valid_ocl_versions.end()) { + return createOCLTarget(ctx, v); + } + } else if (s.substr(0, 5) == "cuda-") { + v = atoi(s.c_str() + 5); + + if (std::find(vaild_cuda_versions.begin(), vaild_cuda_versions.end(), v) != + vaild_cuda_versions.end()) { + return createCUDATarget(ctx, v); + } + } +#define XSTR(x) #x +#define STR(x) XSTR((x)) + + error(Loc(), + "unrecognised or invalid DCompute targets: the format is ocl-xy0 " + "for OpenCl x.y and cuda-xy0 for CUDA CC x.y. Valid versions " + "for OpenCl are " STR(OCL_VALID_VER_INIT) ". Valid versions for CUDA " + "are " STR(CUDA_VALID_VER_INIT)); + fatal(); + return nullptr; +} + +DComputeCodeGenManager::DComputeCodeGenManager(llvm::LLVMContext &c) : ctx(c) { +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX + for (auto &option : opts::dcomputeTargets) { + targets.push_back(createComputeTarget(option)); + } +#endif +} + +void DComputeCodeGenManager::emit(Module *m) { + for (auto &target : targets) { + target->emit(m); + IrDsymbol::resetAll(); + } +} + +void DComputeCodeGenManager::writeModules() { + for (auto &target : targets) { + target->writeModule(); + } +} diff --git a/driver/dcomputecodegenerator.h b/driver/dcomputecodegenerator.h new file mode 100644 index 0000000000..0dd46163fd --- /dev/null +++ b/driver/dcomputecodegenerator.h @@ -0,0 +1,31 @@ +//===-- driver/dcomputecodegenerator.h - LDC --------------------*- C++ -*-===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LDC_DRIVER_DCOMPUTECODEGENERATOR_H +#define LDC_DRIVER_DCOMPUTECODEGENERATOR_H + +#include "gen/dcompute/target.h" +#include "llvm/ADT/SmallVector.h" + +// gets run on modules marked @compute +// All @compute D modules are emitted into one LLVM module once per target. +class DComputeCodeGenManager { + + llvm::LLVMContext &ctx; + llvm::SmallVector targets; + DComputeTarget *createComputeTarget(const std::string &s); + +public: + void emit(Module *m); + void writeModules(); + + DComputeCodeGenManager(llvm::LLVMContext &c); +}; + +#endif diff --git a/driver/main.cpp b/driver/main.cpp index 4a13c40dd8..40a9b7527a 100644 --- a/driver/main.cpp +++ b/driver/main.cpp @@ -23,6 +23,7 @@ #include "driver/cl_options.h" #include "driver/codegenerator.h" #include "driver/configfile.h" +#include "driver/dcomputecodegenerator.h" #include "driver/exe_path.h" #include "driver/ldc-version.h" #include "driver/linker.h" @@ -40,6 +41,7 @@ #include "gen/optimizer.h" #include "gen/passes/Passes.h" #include "gen/runtime.h" +#include "gen/uda.h" #include "gen/abi.h" #include "llvm/InitializePasses.h" #include "llvm/LinkAllPasses.h" @@ -879,6 +881,9 @@ void registerPredefinedVersions() { VersionCondition::addPredefinedGlobalIdent("LDC"); VersionCondition::addPredefinedGlobalIdent("all"); VersionCondition::addPredefinedGlobalIdent("D_Version2"); +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX + VersionCondition::addPredefinedGlobalIdent("LDC_DCompute"); +#endif if (global.params.doDocComments) { VersionCondition::addPredefinedGlobalIdent("D_Ddoc"); @@ -1034,10 +1039,11 @@ void addDefaultVersionIdentifiers() { } void codegenModules(Modules &modules) { - // Generate one or more object/IR/bitcode files. + // Generate one or more object/IR/bitcode files/dcompute kernels. if (global.params.obj && !modules.empty()) { ldc::CodeGenerator cg(getGlobalContext(), global.params.oneobj); - + DComputeCodeGenManager dccg(getGlobalContext()); + std::vector computeModules; // When inlining is enabled, we are calling semantic3 on function // declarations, which may _add_ members to the first module in the modules // array. These added functions must be codegenned, because these functions @@ -1051,11 +1057,25 @@ void codegenModules(Modules &modules) { if (global.params.verbose) fprintf(global.stdmsg, "code %s\n", m->toChars()); - cg.emit(m); + const auto atCompute = hasComputeAttr(m); + if (atCompute == DComputeCompileFor::hostOnly || + atCompute == DComputeCompileFor::hostAndDevice) + { + cg.emit(m); + } + if (atCompute != DComputeCompileFor::hostOnly) + computeModules.push_back(m); if (global.errors) fatal(); } + + if (!computeModules.empty()) { + for (auto& mod : computeModules) + dccg.emit(mod); + + dccg.writeModules(); + } } cache::pruneCache(); diff --git a/driver/targetmachine.cpp b/driver/targetmachine.cpp index bfffaffb94..e1db02006d 100644 --- a/driver/targetmachine.cpp +++ b/driver/targetmachine.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/IR/Module.h" #include "mars.h" #include "gen/logger.h" @@ -632,3 +633,13 @@ createTargetMachine(std::string targetTriple, std::string arch, std::string cpu, targetOptions, relocModel, codeModel, codeGenOptLevel); } + +ComputeBackend::Type getComputeTargetType(llvm::Module* m) { + llvm::Triple::ArchType a = llvm::Triple(m->getTargetTriple()).getArch(); + if (a == llvm::Triple::spir || a == llvm::Triple::spir64) + return ComputeBackend::SPIRV; + else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64) + return ComputeBackend::NVPTX; + else + return ComputeBackend::None; +} diff --git a/driver/targetmachine.h b/driver/targetmachine.h index f93a6736ba..91af541244 100644 --- a/driver/targetmachine.h +++ b/driver/targetmachine.h @@ -38,8 +38,15 @@ namespace llvm { class Triple; class Target; class TargetMachine; +class Module; } +namespace ComputeBackend { +enum Type { None, SPIRV, NVPTX }; +} + +ComputeBackend::Type getComputeTargetType(llvm::Module*); + /** * Creates an LLVM TargetMachine suitable for the given (usually command-line) * parameters and the host platform defaults. diff --git a/driver/toobj.cpp b/driver/toobj.cpp index 734d06648f..ff09cb6431 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -39,6 +39,9 @@ #if LDC_LLVM_VER >= 307 #include "llvm/Support/Path.h" #endif +#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV +#include "llvm/Support/SPIRV.h" +#endif #include "llvm/Target/TargetMachine.h" #if LDC_LLVM_VER >= 307 #include "llvm/Analysis/TargetTransformInfo.h" @@ -75,7 +78,19 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, legacy:: #endif PassManager Passes; + ComputeBackend::Type cb = getComputeTargetType(&m); + if (cb == ComputeBackend::SPIRV) { +#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV + IF_LOG Logger::println("running createSPIRVWriterPass()"); + llvm::createSPIRVWriterPass(out)->runOnModule(m); + IF_LOG Logger::println("Success."); +#else + error(Loc(), "Trying to target SPIRV, but LDC is not built to do so!"); +#endif + + return; + } #if LDC_LLVM_VER >= 307 // The DataLayout is already set at the module (in module.cpp, // method Module::genLLVMModule()) @@ -107,7 +122,11 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, #else fout, #endif - fileType, codeGenOptLevel())) { + // Always generate assembly for ptx as it is an assembly format + // The PTX backend fails if we pass anything else. + (cb == ComputeBackend::NVPTX) ? llvm::TargetMachine::CGFT_AssemblyFile + : fileType, + codeGenOptLevel())) { llvm_unreachable("no support for asm output"); } diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h index ce63db5467..2ecd4e0562 100644 --- a/gen/dcompute/abi-rewrites.h +++ b/gen/dcompute/abi-rewrites.h @@ -23,13 +23,13 @@ #include "gen/dcompute/druntime.h" struct DComputePointerRewrite : ABIRewrite { - LLType* type(Type* t) override { - auto ptr = toDcomputePointer(static_cast(t)->sym); - return ptr->toLLVMType(); + LLType *type(Type *t) override { + auto ptr = toDcomputePointer(static_cast(t)->sym); + return ptr->toLLVMType(true); } LLValue *getLVal(Type *dty, LLValue *v) override { // TODO: Is this correct? - return DtoBitCast(v,this->type(dty)); + return DtoBitCast(v, this->type(dty)); } LLValue *put(DValue *dv) override { LLValue *address = getAddressOf(dv); diff --git a/gen/dcompute/druntime.cpp b/gen/dcompute/druntime.cpp index 33e5abf5d1..33ebee6131 100644 --- a/gen/dcompute/druntime.cpp +++ b/gen/dcompute/druntime.cpp @@ -32,16 +32,14 @@ bool isFromLDC_DCompute(Dsymbol *sym) { return false; return moduleDecl->id == Id::dcompute; - } -llvm::Optional toDcomputePointer(StructDeclaration *sd) -{ +llvm::Optional toDcomputePointer(StructDeclaration *sd) { if (sd->ident != Id::dcPointer || !isFromLDC_DCompute(sd)) - return llvm::Optional(llvm::None); + return llvm::Optional(llvm::None); TemplateInstance *ti = sd->isInstantiated(); int addrspace = isExpression((*ti->tiargs)[0])->toInteger(); - Type* type = isType((*ti->tiargs)[1]); - return llvm::Optional(DcomputePointer(addrspace,type)); + Type *type = isType((*ti->tiargs)[1]); + return llvm::Optional(DcomputePointer(addrspace, type)); } diff --git a/gen/dcompute/druntime.h b/gen/dcompute/druntime.h index d75aebda4e..b4f9559745 100644 --- a/gen/dcompute/druntime.h +++ b/gen/dcompute/druntime.h @@ -15,22 +15,26 @@ #include "ddmd/aggregate.h" #include "ddmd/mtype.h" #include "llvm/ADT/Optional.h" +#include "gen/dcompute/target.h" +#include "gen/irstate.h" #include "gen/llvm.h" #include "gen/tollvm.h" class Dsymbol; class Type; - bool isFromLDC_DCompute(Dsymbol *sym); struct DcomputePointer { int addrspace; - Type* type; - DcomputePointer(int as,Type* ty) : addrspace(as),type(ty) {} - LLType *toLLVMType() { + Type *type; + DcomputePointer(int as, Type *ty) : addrspace(as), type(ty) {} + LLType *toLLVMType(bool translate) { auto llType = DtoMemType(type); - return llType->getPointerElementType()->getPointerTo(addrspace); + int as = addrspace; + if (translate) + as = gIR->dcomputetarget->mapping[as]; + return llType->getPointerElementType()->getPointerTo(as); } }; llvm::Optional toDcomputePointer(StructDeclaration *sd); diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp index 235f5bec1e..5fcf90b6e0 100644 --- a/gen/dcompute/target.cpp +++ b/gen/dcompute/target.cpp @@ -30,7 +30,6 @@ void DComputeTarget::doCodeGen(Module *m) { if (global.errors) fatal(); - } void DComputeTarget::emit(Module *m) { diff --git a/gen/dcompute/target.h b/gen/dcompute/target.h index 60bfb8d7de..ecdc41c06f 100644 --- a/gen/dcompute/target.h +++ b/gen/dcompute/target.h @@ -38,8 +38,8 @@ public: DComputeTarget(llvm::LLVMContext &c, int v, ID id, const char *_short_name, const char *suffix, TargetABI *a, std::array map) - : ctx(c), tversion(v), target(id), short_name(_short_name), - binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) { } + : ctx(c), tversion(v), target(id), short_name(_short_name), + binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) {} void emit(Module *m); void doCodeGen(Module *m); diff --git a/gen/dcompute/targetCUDA.cpp b/gen/dcompute/targetCUDA.cpp index a1a77949cc..b3f5917bdf 100644 --- a/gen/dcompute/targetCUDA.cpp +++ b/gen/dcompute/targetCUDA.cpp @@ -21,17 +21,20 @@ namespace { class TargetCUDA : public DComputeTarget { public: TargetCUDA(llvm::LLVMContext &c, int sm) - : DComputeTarget( + : DComputeTarget( c, sm, CUDA, "cuda", "ptx", createNVPTXABI(), // Map from nominal DCompute address space to NVPTX address space. // see $LLVM_ROOT/docs/docs/NVPTXUsage.rst section Address Spaces {{5, 1, 3, 4, 0}}) { - std::string dl = global.params.is64bit ? - "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-" - "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" : - "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-" - "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"; + std::string dl = + global.params.is64bit + ? "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:" + "32-" + "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + : "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:" + "32-" + "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"; _ir = new IRState("dcomputeTargetCUDA", ctx); _ir->module.setTargetTriple(global.params.is64bit ? "nvptx64-nvidia-cuda" @@ -40,20 +43,20 @@ public: _ir->dcomputetarget = this; } - void addMetadata() override { + void addMetadata() override { // sm version? } - void setGTargetMachine() override { - char buf[8]; - bool is64 = global.params.is64bit; - snprintf(buf, sizeof(buf), "sm_%d", tversion / 10); - gTargetMachine = createTargetMachine( + void setGTargetMachine() override { + char buf[8]; + bool is64 = global.params.is64bit; + snprintf(buf, sizeof(buf), "sm_%d", tversion / 10); + gTargetMachine = createTargetMachine( is64 ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda", is64 ? "nvptx64" : "nvptx", buf, {}, is64 ? ExplicitBitness::M64 : ExplicitBitness::M32, ::FloatABI::Hard, llvm::Reloc::Static, llvm::CodeModel::Medium, codeGenOptLevel(), false, false); - } + } void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf) override { // Fix 3.5.2 build failures. Remove when dropping 3.5 support. diff --git a/gen/dcompute/targetOCL.cpp b/gen/dcompute/targetOCL.cpp index 1a45cb0fa5..f1f2903cda 100644 --- a/gen/dcompute/targetOCL.cpp +++ b/gen/dcompute/targetOCL.cpp @@ -52,10 +52,10 @@ public: } void setGTargetMachine() override { gTargetMachine = nullptr; } - // Adapted from clang + // Adapted from clang void addMetadata() override { - // Fix 3.5.2 build failures. Remove when dropping 3.5 support. - // OCL is only supported for 3.6.1 and 3.8 anyway. +// Fix 3.5.2 build failures. Remove when dropping 3.5 support. +// OCL is only supported for 3.6.1 and 3.8 anyway. #if LDC_LLVM_VER >= 306 // opencl.ident? // spirv.Source // debug only @@ -70,7 +70,7 @@ public: llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), 2))}; llvm::NamedMDNode *SPIRVerMD = - _ir->module.getOrInsertNamedMetadata("opencl.spir.version"); + _ir->module.getOrInsertNamedMetadata("opencl.spir.version"); SPIRVerMD->addOperand(llvm::MDNode::get(ctx, SPIRVerElts)); // Add OpenCL version @@ -81,7 +81,7 @@ public: llvm::Type::getInt32Ty(ctx), (tversion % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = _ir->module.getOrInsertNamedMetadata("opencl.ocl.version"); - + OCLVerMD->addOperand(llvm::MDNode::get(ctx, OCLVerElts)); #endif } @@ -93,20 +93,20 @@ public: KernArgMD_type_qual, KernArgMD_name, count_KernArgMD - }; void addKernelMetadata(FuncDeclaration *fd, llvm::Function *llf) override { - // By the time we get here the ABI should have rewritten the function - // type so that the magic types in ldc.dcompute are transformed into - // what the LLVM backend expects. +// By the time we get here the ABI should have rewritten the function +// type so that the magic types in ldc.dcompute are transformed into +// what the LLVM backend expects. - // Fix 3.5.2 build failures. Remove when dropping 3.5 support. +// Fix 3.5.2 build failures. Remove when dropping 3.5 support. #if LDC_LLVM_VER >= 306 unsigned i = 0; // TODO: Handle Function attibutes llvm::SmallVector kernelMDArgs; kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(llf)); // MDNode for the kernel argument address space qualifiers. + std::array,count_KernArgMD> paramArgs; std::array args = { "kernel_arg_addr_space", @@ -125,17 +125,17 @@ public: VarDeclarations *vs = fd->parameters; for (i = 0; i < vs->dim; i++) { VarDeclaration *v = (*vs)[i]; - decodeTypes(paramArgs,v); + decodeTypes(paramArgs, v); } - - for (auto& md : paramArgs) + + for (auto &md : paramArgs) kernelMDArgs.push_back(llvm::MDNode::get(ctx, md)); ///------------------------------- /// TODO: Handle Function attibutes ///------------------------------- llvm::MDNode *kernelMDNode = llvm::MDNode::get(ctx, kernelMDArgs); llvm::NamedMDNode *OpenCLKernelMetadata = - _ir->module.getOrInsertNamedMetadata("opencl.kernels"); + _ir->module.getOrInsertNamedMetadata("opencl.kernels"); OpenCLKernelMetadata->addOperand(kernelMDNode); #endif } @@ -147,19 +147,23 @@ public: std::string basicTypeToString(Type *t) { std::stringstream ss; auto ty = t->ty; - if (ty == Tint8) ss << "char"; - else if (ty == Tuns8) ss << "uchar"; + if (ty == Tint8) + ss << "char"; + else if (ty == Tuns8) + ss << "uchar"; else if (ty == Tvector) { - TypeVector* vec = static_cast(t); + TypeVector *vec = static_cast(t); auto size = vec->size(Loc()); auto basety = vec->basetype->ty; - if (basety == Tint8) ss << "char"; - else if (basety == Tuns8) ss << "uchar"; - else ss << vec->basetype->toChars(); + if (basety == Tint8) + ss << "char"; + else if (basety == Tuns8) + ss << "uchar"; + else + ss << vec->basetype->toChars(); ss << (int)size; - } - else - ss << t->toChars(); + } else + ss << t->toChars(); return ss.str(); } #if LDC_LLVM_VER >= 306 @@ -173,17 +177,14 @@ public: std::string accessQual = "none"; int addrspace = 0; if (v->type->ty == Tstruct && - (ptr = toDcomputePointer(static_cast(v->type)->sym))) - { + (ptr = toDcomputePointer(static_cast(v->type)->sym))) { addrspace = ptr->addrspace; tyName = basicTypeToString(ptr->type) + "*"; baseTyName = tyName; // there is no volatile or restrict (yet) in D typeQuals = mod2str(ptr->type->mod); // TODO: Images and Pipes They are global pointers to opaques - } - else - { + } else { tyName = basicTypeToString(v->type); baseTyName = tyName; typeQuals = mod2str(v->type->mod); diff --git a/gen/declarations.cpp b/gen/declarations.cpp index 378e6a81ad..f7d6a0d9ef 100644 --- a/gen/declarations.cpp +++ b/gen/declarations.cpp @@ -67,6 +67,8 @@ public: decl->toPrettyChars()); LOG_SCOPE + assert(!irs->dcomputetarget); + if (decl->ir->isDefined()) { return; } @@ -125,14 +127,18 @@ public: m->accept(this); } - // Define the __initZ symbol. - IrAggr *ir = getIrAggr(decl); - llvm::GlobalVariable *initZ = ir->getInitSymbol(); - initZ->setInitializer(ir->getDefaultInit()); - setLinkage(decl, initZ); + // Skip __initZ and typeinfo for @compute device code. + // TODO: support global variables and thus __initZ + if (!irs->dcomputetarget) { + // Define the __initZ symbol. + IrAggr *ir = getIrAggr(decl); + llvm::GlobalVariable *initZ = ir->getInitSymbol(); + initZ->setInitializer(ir->getDefaultInit()); + setLinkage(decl, initZ); - // emit typeinfo - DtoTypeInfoOf(decl->type); + // emit typeinfo + DtoTypeInfoOf(decl->type); + } // Emit __xopEquals/__xopCmp/__xtoHash. if (decl->xeq && decl->xeq != decl->xerreq) { @@ -153,6 +159,8 @@ public: decl->toPrettyChars()); LOG_SCOPE + assert(!irs->dcomputetarget); + if (decl->ir->isDefined()) { return; } @@ -245,6 +253,7 @@ public: assert(!(decl->storage_class & STCmanifest) && "manifest constant being codegen'd!"); + assert(!irs->dcomputetarget); IrGlobal *irGlobal = getIrGlobal(decl); LLGlobalVariable *gvar = llvm::cast(irGlobal->value); @@ -412,7 +421,8 @@ public: void visit(PragmaDeclaration *decl) LLVM_OVERRIDE { if (decl->ident == Id::lib) { assert(decl->args && decl->args->dim == 1); - + assert(!irs->dcomputetarget); + Expression *e = static_cast(decl->args->data[0]); assert(e->op == TOKstring); @@ -473,7 +483,7 @@ public: ////////////////////////////////////////////////////////////////////////// void visit(TypeInfoDeclaration *decl) LLVM_OVERRIDE { - if (isSpeculativeType(decl->tinfo)) { + if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) { return; } TypeInfoDeclaration_codegen(decl, irs); @@ -482,7 +492,7 @@ public: ////////////////////////////////////////////////////////////////////////// void visit(TypeInfoClassDeclaration *decl) LLVM_OVERRIDE { - if (isSpeculativeType(decl->tinfo)) { + if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) { return; } TypeInfoClassDeclaration_codegen(decl, irs); diff --git a/gen/functions.cpp b/gen/functions.cpp index 60dd18d73a..8bd7152571 100644 --- a/gen/functions.cpp +++ b/gen/functions.cpp @@ -22,6 +22,7 @@ #include "gen/abi.h" #include "gen/arrays.h" #include "gen/classes.h" +#include "gen/dcompute/target.h" #include "gen/dvalue.h" #include "gen/funcgenstate.h" #include "gen/function-inlining.h" @@ -977,7 +978,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) { #if LDC_LLVM_VER >= 500 0, // Address space #endif - "alloca point", beginbb); + "alloca_point", beginbb); funcGen.allocapoint = allocaPoint; // debug info - after all allocas, but before any llvm.dbg.declare etc @@ -1127,6 +1128,11 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) { } gIR->scopes.pop_back(); + + if (gIR->dcomputetarget && hasKernelAttr(fd)) { + auto fn = gIR->module.getFunction(fd->mangleString); + gIR->dcomputetarget->addKernelMetadata(fd, fn); + } } //////////////////////////////////////////////////////////////////////////////// diff --git a/gen/optimizer.cpp b/gen/optimizer.cpp index 0296e360f7..a74d5d3fa7 100644 --- a/gen/optimizer.cpp +++ b/gen/optimizer.cpp @@ -12,6 +12,7 @@ #include "gen/cl_helpers.h" #include "gen/logger.h" #include "gen/passes/Passes.h" +#include "driver/targetmachine.h" #include "llvm/LinkAllPasses.h" #if LDC_LLVM_VER >= 307 #include "llvm/IR/LegacyPassManager.h" @@ -341,6 +342,16 @@ bool ldc_optimize_module(llvm::Module *M) { #endif PassManager mpm; + // Dont optimise spirv modules because turning GEPs into extracts triggers + // asserts in the IR -> SPIR-V translation pass. SPIRV doesn't have a target + // machine, so any optimisation passes that rely on it to provide analysis, + // like DCE can't be run. + // The optimisation is supposed to happen between the SPIRV -> native machine + // code pass of the consumer of the binary. + // TODO: run rudimentary optimisations to improve IR debuggability. + if (getComputeTargetType(M) == ComputeBackend::SPIRV) + return false; + #if LDC_LLVM_VER >= 307 // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfoImpl *tlii = diff --git a/gen/statements.cpp b/gen/statements.cpp index e203f5bfa6..6d2d0fc8fd 100644 --- a/gen/statements.cpp +++ b/gen/statements.cpp @@ -16,6 +16,7 @@ #include "gen/arrays.h" #include "gen/classes.h" #include "gen/coverage.h" +#include "gen/dcompute/target.h" #include "gen/dvalue.h" #include "gen/funcgenstate.h" #include "gen/irstate.h" @@ -24,6 +25,7 @@ #include "gen/logger.h" #include "gen/runtime.h" #include "gen/tollvm.h" +#include "id.h" #include "ir/irfunction.h" #include "ir/irmodule.h" #include "llvm/IR/CFG.h" @@ -317,6 +319,18 @@ public: } ////////////////////////////////////////////////////////////////////////// + + bool dcomputeReflectMatches(CallExp *ce) { + auto arg1 = (DComputeTarget::ID)(*ce->arguments)[0]->toInteger(); + auto arg2 = (*ce->arguments)[1]->toInteger(); + auto dct = irs->dcomputetarget; + if (!dct) { + return arg1 == DComputeTarget::Host; + } + else { + return arg1 == dct->target && (!arg2 || arg2 == dct->tversion); + } + } void visit(IfStatement *stmt) LLVM_OVERRIDE { IF_LOG Logger::println("IfStatement::toIR(): %s", stmt->loc.toChars()); @@ -332,6 +346,22 @@ public: irs->DBuilder.EmitBlockStart(stmt->loc); emitCoverageLinecountInc(stmt->loc); + // This is a (dirty) hack to get codegen time conditional + // compilation, on account of the fact that we are trying + // to target multiple backends "simultaneously" with one + // pass through the front end, to have a single "static" + // context. + if (stmt->condition->op == TOKcall) { + auto ce = (CallExp *)stmt->condition; + if (ce->f && ce->f->ident == Id::dcReflect) { + if (dcomputeReflectMatches(ce)) + stmt->ifbody->accept(this); + else if (stmt->elsebody) + stmt->elsebody->accept(this); + return; + } + } + DValue *cond_e = toElemDtor(stmt->condition); LLValue *cond_val = DtoRVal(cond_e); @@ -745,10 +775,15 @@ public: irs->DBuilder.EmitBlockStart(stmt->finalbody->loc); stmt->finalbody->accept(this); irs->DBuilder.EmitBlockEnd(); + CleanupCursor cleanupBefore; - CleanupCursor cleanupBefore = irs->funcGen().scopes.currentCleanupScope(); - irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb()); - + // For @compute code, don't emit any exception handling as there are no + // exceptions anyway. + const bool computeCode = !!irs->dcomputetarget; + if (!computeCode) { + cleanupBefore = irs->funcGen().scopes.currentCleanupScope(); + irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb()); + } // Emit the try block. irs->scope() = IRScope(trybb); @@ -758,12 +793,14 @@ public: irs->DBuilder.EmitBlockEnd(); if (successbb) { - irs->funcGen().scopes.runCleanups(cleanupBefore, successbb); + if (!computeCode) + irs->funcGen().scopes.runCleanups(cleanupBefore, successbb); irs->scope() = IRScope(successbb); // PGO counter tracks the continuation of the try-finally statement PGO.emitCounterIncrement(stmt); } - irs->funcGen().scopes.popCleanups(cleanupBefore); + if (!computeCode) + irs->funcGen().scopes.popCleanups(cleanupBefore); } ////////////////////////////////////////////////////////////////////////// @@ -772,6 +809,7 @@ public: IF_LOG Logger::println("TryCatchStatement::toIR(): %s", stmt->loc.toChars()); LOG_SCOPE; + assert(!irs->dcomputetarget); auto &PGO = irs->funcGen().pgo; @@ -812,6 +850,7 @@ public: void visit(ThrowStatement *stmt) LLVM_OVERRIDE { IF_LOG Logger::println("ThrowStatement::toIR(): %s", stmt->loc.toChars()); LOG_SCOPE; + assert(!irs->dcomputetarget); auto &PGO = irs->funcGen().pgo; PGO.setCurrentStmt(stmt); @@ -871,6 +910,7 @@ public: const bool isStringSwitch = !stmt->condition->type->isintegral(); if (isStringSwitch) { Logger::println("is string switch"); + assert(!irs->dcomputetarget); // Sort the cases, taking care not to modify the original AST. cases = cases->copy(); @@ -1597,7 +1637,8 @@ public: IF_LOG Logger::println("SwitchErrorStatement::toIR(): %s", stmt->loc.toChars()); LOG_SCOPE; - + assert(!irs->dcomputetarget); + auto &PGO = irs->funcGen().pgo; PGO.setCurrentStmt(stmt); @@ -1616,11 +1657,15 @@ public: ////////////////////////////////////////////////////////////////////////// - void visit(AsmStatement *stmt) LLVM_OVERRIDE { AsmStatement_toIR(stmt, irs); } + void visit(AsmStatement *stmt) LLVM_OVERRIDE { + assert(!irs->dcomputetarget); + AsmStatement_toIR(stmt, irs); + } ////////////////////////////////////////////////////////////////////////// void visit(CompoundAsmStatement *stmt) LLVM_OVERRIDE { + assert(!irs->dcomputetarget); CompoundAsmStatement_toIR(stmt, irs); } diff --git a/gen/uda.cpp b/gen/uda.cpp index 6ec90c681c..4bbbad4284 100644 --- a/gen/uda.cpp +++ b/gen/uda.cpp @@ -418,15 +418,15 @@ bool hasWeakUDA(Dsymbol *sym) { /// Returns 0 if 'sym' does not have the @ldc.dcompute.compute() UDA applied. /// Returns 1 + n if 'sym' does and is @compute(n). -int hasComputeAttr(Dsymbol *sym) { +DComputeCompileFor hasComputeAttr(Dsymbol *sym) { auto sle = getMagicAttribute(sym, Id::udaCompute, Id::dcompute); if (!sle) - return 0; + return DComputeCompileFor::hostOnly; checkStructElems(sle, {Type::tint32}); - return 1 + (*sle->elements)[0]->toInteger(); + return static_cast(1 + (*sle->elements)[0]->toInteger()); } /// Checks whether 'sym' has the @ldc.dcompute._kernel() UDA applied. @@ -437,7 +437,8 @@ bool hasKernelAttr(Dsymbol *sym) { checkStructElems(sle, {}); - if (!sym->isFuncDeclaration() && !hasComputeAttr(sym->getModule())) + if (!sym->isFuncDeclaration() && + hasComputeAttr(sym->getModule()) != DComputeCompileFor::hostOnly) sym->error("@ldc.dcompute.kernel can only be applied to functions" " in modules marked @ldc.dcompute.compute"); diff --git a/gen/uda.h b/gen/uda.h index 6735b35d28..b47f24ff59 100644 --- a/gen/uda.h +++ b/gen/uda.h @@ -27,7 +27,13 @@ void applyFuncDeclUDAs(FuncDeclaration *decl, IrFunction *irFunc); void applyVarDeclUDAs(VarDeclaration *decl, llvm::GlobalVariable *gvar); bool hasWeakUDA(Dsymbol *sym); -int hasComputeAttr(Dsymbol *sym); bool hasKernelAttr(Dsymbol *sym); - +/// Must match ldc.attributes.Compilefor + 1 == DComputeCompileFor +enum class DComputeCompileFor +{ + hostOnly = 0, + deviceOnly = 1, + hostAndDevice = 2 +}; +DComputeCompileFor hasComputeAttr(Dsymbol *sym); #endif diff --git a/ir/irtypestruct.cpp b/ir/irtypestruct.cpp index 8fdab7df2a..90e148ed30 100644 --- a/ir/irtypestruct.cpp +++ b/ir/irtypestruct.cpp @@ -15,11 +15,14 @@ #include "declaration.h" #include "init.h" #include "mtype.h" +#include "template.h" #include "gen/irstate.h" #include "gen/tollvm.h" #include "gen/logger.h" #include "gen/llvmhelpers.h" +#include "gen/dcompute/target.h" +#include "gen/dcompute/druntime.h" ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// @@ -49,11 +52,29 @@ IrTypeStruct *IrTypeStruct::get(StructDeclaration *sd) { t->packed = isPacked(sd); } - AggrTypeBuilder builder(t->packed); - builder.addAggregate(sd); - builder.addTailPadding(sd->structsize); - isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed); - t->varGEPIndices = builder.varGEPIndices(); + // For ldc.dcomptetypes.Pointer!(uint n,T), + // emit { T addrspace(gIR->dcomputetarget->mapping[n])* } + llvm::Optional p; + if (gIR->dcomputetarget && (p = toDcomputePointer(sd))) { + + // Translate the virtual dcompute address space into the real one for + // the target + int realAS = gIR->dcomputetarget->mapping[p->addrspace]; + + llvm::SmallVector body; + body.push_back(DtoMemType(p->type)->getPointerTo(realAS)); + + isaStruct(t->type)->setBody(body, t->packed); + VarGEPIndices v; + v[sd->fields[0]] = 0; + t->varGEPIndices = v; + } else { + AggrTypeBuilder builder(t->packed); + builder.addAggregate(sd); + builder.addTailPadding(sd->structsize); + isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed); + t->varGEPIndices = builder.varGEPIndices(); + } IF_LOG Logger::cout() << "final struct type: " << *t->type << std::endl;