diff --git a/CMakeLists.txt b/CMakeLists.txt
index c9321b87fb..51bcc8cecd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,11 @@ math(EXPR LDC_LLVM_VER ${LLVM_VERSION_MAJOR}*100+${LLVM_VERSION_MINOR})
 string(REGEX MATCH "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" LLVM_TABLEGEN_LIBRARY "${LLVM_LIBRARIES}")
 string(REGEX REPLACE "^-.*LLVMTableGen[^;]*;|;-.*LLVMTableGen[^;]*" "" LLVM_LIBRARIES "${LLVM_LIBRARIES}")
 
+# Information about which targets LLVM was built to target
+foreach(LLVM_SUPPORTED_TARGET ${LLVM_TARGETS_TO_BUILD})
+    add_definitions("-DLDC_LLVM_SUPPORTED_TARGET_${LLVM_SUPPORTED_TARGET}=1")
+endforeach()
+
 #
 # Get info about used Linux distribution.
 #
@@ -330,6 +335,7 @@ set(DRV_SRC
     driver/cl_options.cpp
     driver/codegenerator.cpp
     driver/configfile.cpp
+    driver/dcomputecodegenerator.cpp
     driver/exe_path.cpp
     driver/targetmachine.cpp
     driver/toobj.cpp
@@ -345,6 +351,7 @@ set(DRV_HDR
     driver/cl_options.h
     driver/codegenerator.h
     driver/configfile.h
+    driver/dcomputecodegenerator.h
     driver/exe_path.h
     driver/ldc-version.h
     driver/archiver.h
diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp
index eef3fbff56..313266a9e5 100644
--- a/driver/cl_options.cpp
+++ b/driver/cl_options.cpp
@@ -505,6 +505,15 @@ cl::opt<std::string>
                                     "of optimizations performed by LLVM"),
                            cl::ValueOptional);
 #endif
+    
+#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
+cl::list<std::string>
+    dcomputeTargets("mdcompute-targets", cl::CommaSeparated,
+                    cl::desc("Generates code for the specified DCompute target"
+                             " list. Use 'ocl-xy0' for OpenCL x.y, and "
+                             "'cuda-xy0' for CUDA CC x.y"),
+                     cl::value_desc("targets"));
+#endif
 
 static cl::extrahelp footer(
     "\n"
diff --git a/driver/cl_options.h b/driver/cl_options.h
index b30cea8329..d154edc568 100644
--- a/driver/cl_options.h
+++ b/driver/cl_options.h
@@ -124,5 +124,8 @@ inline bool isUsingThinLTO() { return false; }
 #if LDC_LLVM_VER >= 400
 extern cl::opt<std::string> saveOptimizationRecord;
 #endif
+#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
+extern cl::list<std::string> dcomputeTargets;
+#endif
 }
 #endif
diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp
new file mode 100644
index 0000000000..381a4def83
--- /dev/null
+++ b/driver/dcomputecodegenerator.cpp
@@ -0,0 +1,74 @@
+//===-- driver/dcomputecodegenerator.cpp ----------------------------------===//
+//
+//                         LDC – the LLVM D compiler
+//
+// This file is distributed under the BSD-style LDC license. See the LICENSE
+// file for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "driver/dcomputecodegenerator.h"
+#include "driver/cl_options.h"
+#include "ddmd/errors.h"
+#include "gen/cl_helpers.h"
+#include "ir/irdsymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include <array>
+#include <string>
+#include <algorithm>
+
+DComputeTarget *
+DComputeCodeGenManager::createComputeTarget(const std::string &s) {
+  int v;
+#define OCL_VALID_VER_INIT 100, 110, 120, 200, 210, 220
+  const std::array<int, 6> valid_ocl_versions = {{OCL_VALID_VER_INIT}};
+#define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\
+ 500, 520, 600, 610, 620
+  const std::array<int, 14> vaild_cuda_versions = {{CUDA_VALID_VER_INIT}};
+
+  if (s.substr(0, 4) == "ocl-") {
+    v = atoi(s.c_str() + 4);
+    if (std::find(valid_ocl_versions.begin(), valid_ocl_versions.end(), v) !=
+        valid_ocl_versions.end()) {
+      return createOCLTarget(ctx, v);
+    }
+  } else if (s.substr(0, 5) == "cuda-") {
+    v = atoi(s.c_str() + 5);
+
+    if (std::find(vaild_cuda_versions.begin(), vaild_cuda_versions.end(), v) !=
+        vaild_cuda_versions.end()) {
+      return createCUDATarget(ctx, v);
+    }
+  }
+#define XSTR(x) #x
+#define STR(x) XSTR((x))
+
+  error(Loc(),
+        "unrecognised or invalid DCompute targets: the format is ocl-xy0 "
+        "for OpenCl x.y and cuda-xy0 for CUDA CC x.y. Valid versions "
+        "for OpenCl are " STR(OCL_VALID_VER_INIT) ". Valid versions for CUDA "
+        "are " STR(CUDA_VALID_VER_INIT));
+  fatal();
+  return nullptr;
+}
+
+DComputeCodeGenManager::DComputeCodeGenManager(llvm::LLVMContext &c) : ctx(c) {
+#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
+  for (auto &option : opts::dcomputeTargets) {
+    targets.push_back(createComputeTarget(option));
+  }
+#endif
+}
+
+void DComputeCodeGenManager::emit(Module *m) {
+  for (auto &target : targets) {
+    target->emit(m);
+    IrDsymbol::resetAll();
+  }
+}
+
+void DComputeCodeGenManager::writeModules() {
+  for (auto &target : targets) {
+    target->writeModule();
+  }
+}
diff --git a/driver/dcomputecodegenerator.h b/driver/dcomputecodegenerator.h
new file mode 100644
index 0000000000..0dd46163fd
--- /dev/null
+++ b/driver/dcomputecodegenerator.h
@@ -0,0 +1,31 @@
+//===-- driver/dcomputecodegenerator.h - LDC --------------------*- C++ -*-===//
+//
+//                         LDC – the LLVM D compiler
+//
+// This file is distributed under the BSD-style LDC license. See the LICENSE
+// file for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LDC_DRIVER_DCOMPUTECODEGENERATOR_H
+#define LDC_DRIVER_DCOMPUTECODEGENERATOR_H
+
+#include "gen/dcompute/target.h"
+#include "llvm/ADT/SmallVector.h"
+
+// gets run on modules marked @compute
+// All @compute D modules are emitted into one LLVM module once per target.
+class DComputeCodeGenManager {
+
+  llvm::LLVMContext &ctx;
+  llvm::SmallVector<DComputeTarget *, 2> targets;
+  DComputeTarget *createComputeTarget(const std::string &s);
+
+public:
+  void emit(Module *m);
+  void writeModules();
+
+  DComputeCodeGenManager(llvm::LLVMContext &c);
+};
+
+#endif
diff --git a/driver/main.cpp b/driver/main.cpp
index 4a13c40dd8..40a9b7527a 100644
--- a/driver/main.cpp
+++ b/driver/main.cpp
@@ -23,6 +23,7 @@
 #include "driver/cl_options.h"
 #include "driver/codegenerator.h"
 #include "driver/configfile.h"
+#include "driver/dcomputecodegenerator.h"
 #include "driver/exe_path.h"
 #include "driver/ldc-version.h"
 #include "driver/linker.h"
@@ -40,6 +41,7 @@
 #include "gen/optimizer.h"
 #include "gen/passes/Passes.h"
 #include "gen/runtime.h"
+#include "gen/uda.h"
 #include "gen/abi.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/LinkAllPasses.h"
@@ -879,6 +881,9 @@ void registerPredefinedVersions() {
   VersionCondition::addPredefinedGlobalIdent("LDC");
   VersionCondition::addPredefinedGlobalIdent("all");
   VersionCondition::addPredefinedGlobalIdent("D_Version2");
+#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
+  VersionCondition::addPredefinedGlobalIdent("LDC_DCompute");
+#endif
 
   if (global.params.doDocComments) {
     VersionCondition::addPredefinedGlobalIdent("D_Ddoc");
@@ -1034,10 +1039,11 @@ void addDefaultVersionIdentifiers() {
 }
 
 void codegenModules(Modules &modules) {
-  // Generate one or more object/IR/bitcode files.
+  // Generate one or more object/IR/bitcode files/dcompute kernels.
   if (global.params.obj && !modules.empty()) {
     ldc::CodeGenerator cg(getGlobalContext(), global.params.oneobj);
-
+    DComputeCodeGenManager dccg(getGlobalContext());
+    std::vector<Module *> computeModules;
     // When inlining is enabled, we are calling semantic3 on function
     // declarations, which may _add_ members to the first module in the modules
     // array. These added functions must be codegenned, because these functions
@@ -1051,11 +1057,25 @@ void codegenModules(Modules &modules) {
       if (global.params.verbose)
         fprintf(global.stdmsg, "code      %s\n", m->toChars());
 
-      cg.emit(m);
+      const auto atCompute = hasComputeAttr(m);
+      if (atCompute == DComputeCompileFor::hostOnly ||
+           atCompute == DComputeCompileFor::hostAndDevice)
+      {
+        cg.emit(m);
+      }
+      if (atCompute != DComputeCompileFor::hostOnly)
+        computeModules.push_back(m);
 
       if (global.errors)
         fatal();
     }
+
+    if (!computeModules.empty()) {
+      for (auto& mod : computeModules)
+        dccg.emit(mod);
+
+      dccg.writeModules();
+    }
   }
 
   cache::pruneCache();
diff --git a/driver/targetmachine.cpp b/driver/targetmachine.cpp
index bfffaffb94..e1db02006d 100644
--- a/driver/targetmachine.cpp
+++ b/driver/targetmachine.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/Module.h"
 #include "mars.h"
 #include "gen/logger.h"
 
@@ -632,3 +633,13 @@ createTargetMachine(std::string targetTriple, std::string arch, std::string cpu,
                                      targetOptions, relocModel, codeModel,
                                      codeGenOptLevel);
 }
+    
+ComputeBackend::Type getComputeTargetType(llvm::Module* m) {
+  llvm::Triple::ArchType a = llvm::Triple(m->getTargetTriple()).getArch();
+  if (a == llvm::Triple::spir || a == llvm::Triple::spir64)
+    return ComputeBackend::SPIRV;
+  else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64)
+    return ComputeBackend::NVPTX;
+  else
+    return ComputeBackend::None;
+}
diff --git a/driver/targetmachine.h b/driver/targetmachine.h
index f93a6736ba..91af541244 100644
--- a/driver/targetmachine.h
+++ b/driver/targetmachine.h
@@ -38,8 +38,15 @@ namespace llvm {
 class Triple;
 class Target;
 class TargetMachine;
+class Module;
 }
 
+namespace ComputeBackend {
+enum Type { None, SPIRV, NVPTX };
+}
+
+ComputeBackend::Type getComputeTargetType(llvm::Module*);
+
 /**
  * Creates an LLVM TargetMachine suitable for the given (usually command-line)
  * parameters and the host platform defaults.
diff --git a/driver/toobj.cpp b/driver/toobj.cpp
index 734d06648f..ff09cb6431 100644
--- a/driver/toobj.cpp
+++ b/driver/toobj.cpp
@@ -39,6 +39,9 @@
 #if LDC_LLVM_VER >= 307
 #include "llvm/Support/Path.h"
 #endif
+#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV
+#include "llvm/Support/SPIRV.h"
+#endif
 #include "llvm/Target/TargetMachine.h"
 #if LDC_LLVM_VER >= 307
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -75,7 +78,19 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
   legacy::
 #endif
       PassManager Passes;
+  ComputeBackend::Type cb = getComputeTargetType(&m);
 
+  if (cb == ComputeBackend::SPIRV) {
+#ifdef LDC_LLVM_SUPPORTED_TARGET_SPIRV
+    IF_LOG Logger::println("running createSPIRVWriterPass()");
+    llvm::createSPIRVWriterPass(out)->runOnModule(m);
+    IF_LOG Logger::println("Success.");
+#else
+    error(Loc(), "Trying to target SPIRV, but LDC is not built to do so!");
+#endif
+
+    return;
+  }
 #if LDC_LLVM_VER >= 307
 // The DataLayout is already set at the module (in module.cpp,
 // method Module::genLLVMModule())
@@ -107,7 +122,11 @@ static void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
 #else
                                  fout,
 #endif
-                                 fileType, codeGenOptLevel())) {
+        // Always generate assembly for ptx as it is an assembly format
+        // The PTX backend fails if we pass anything else.
+        (cb == ComputeBackend::NVPTX) ? llvm::TargetMachine::CGFT_AssemblyFile
+                                      : fileType,
+          codeGenOptLevel())) {
     llvm_unreachable("no support for asm output");
   }
 
diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h
index ce63db5467..2ecd4e0562 100644
--- a/gen/dcompute/abi-rewrites.h
+++ b/gen/dcompute/abi-rewrites.h
@@ -23,13 +23,13 @@
 #include "gen/dcompute/druntime.h"
 
 struct DComputePointerRewrite : ABIRewrite {
-  LLType* type(Type* t) override {
-    auto ptr = toDcomputePointer(static_cast<TypeStruct*>(t)->sym);
-    return ptr->toLLVMType();
+  LLType *type(Type *t) override {
+    auto ptr = toDcomputePointer(static_cast<TypeStruct *>(t)->sym);
+    return ptr->toLLVMType(true);
   }
   LLValue *getLVal(Type *dty, LLValue *v) override {
     // TODO: Is this correct?
-    return DtoBitCast(v,this->type(dty));
+    return DtoBitCast(v, this->type(dty));
   }
   LLValue *put(DValue *dv) override {
     LLValue *address = getAddressOf(dv);
diff --git a/gen/dcompute/druntime.cpp b/gen/dcompute/druntime.cpp
index 33e5abf5d1..33ebee6131 100644
--- a/gen/dcompute/druntime.cpp
+++ b/gen/dcompute/druntime.cpp
@@ -32,16 +32,14 @@ bool isFromLDC_DCompute(Dsymbol *sym) {
     return false;
 
   return moduleDecl->id == Id::dcompute;
-
 }
 
-llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd)
-{
+llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd) {
   if (sd->ident != Id::dcPointer || !isFromLDC_DCompute(sd))
-      return llvm::Optional<DcomputePointer>(llvm::None);
+    return llvm::Optional<DcomputePointer>(llvm::None);
 
   TemplateInstance *ti = sd->isInstantiated();
   int addrspace = isExpression((*ti->tiargs)[0])->toInteger();
-  Type* type = isType((*ti->tiargs)[1]);
-  return llvm::Optional<DcomputePointer>(DcomputePointer(addrspace,type));
+  Type *type = isType((*ti->tiargs)[1]);
+  return llvm::Optional<DcomputePointer>(DcomputePointer(addrspace, type));
 }
diff --git a/gen/dcompute/druntime.h b/gen/dcompute/druntime.h
index d75aebda4e..b4f9559745 100644
--- a/gen/dcompute/druntime.h
+++ b/gen/dcompute/druntime.h
@@ -15,22 +15,26 @@
 #include "ddmd/aggregate.h"
 #include "ddmd/mtype.h"
 #include "llvm/ADT/Optional.h"
+#include "gen/dcompute/target.h"
+#include "gen/irstate.h"
 #include "gen/llvm.h"
 #include "gen/tollvm.h"
 
 class Dsymbol;
 class Type;
 
-
 bool isFromLDC_DCompute(Dsymbol *sym);
 
 struct DcomputePointer {
   int addrspace;
-  Type* type;
-  DcomputePointer(int as,Type* ty) : addrspace(as),type(ty) {}
-  LLType *toLLVMType() {
+  Type *type;
+  DcomputePointer(int as, Type *ty) : addrspace(as), type(ty) {}
+  LLType *toLLVMType(bool translate) {
     auto llType = DtoMemType(type);
-    return llType->getPointerElementType()->getPointerTo(addrspace);
+    int as = addrspace;
+    if (translate)
+      as = gIR->dcomputetarget->mapping[as];
+    return llType->getPointerElementType()->getPointerTo(as);
   }
 };
 llvm::Optional<DcomputePointer> toDcomputePointer(StructDeclaration *sd);
diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp
index 235f5bec1e..5fcf90b6e0 100644
--- a/gen/dcompute/target.cpp
+++ b/gen/dcompute/target.cpp
@@ -30,7 +30,6 @@ void DComputeTarget::doCodeGen(Module *m) {
 
   if (global.errors)
     fatal();
-
 }
 
 void DComputeTarget::emit(Module *m) {
diff --git a/gen/dcompute/target.h b/gen/dcompute/target.h
index 60bfb8d7de..ecdc41c06f 100644
--- a/gen/dcompute/target.h
+++ b/gen/dcompute/target.h
@@ -38,8 +38,8 @@ public:
 
   DComputeTarget(llvm::LLVMContext &c, int v, ID id, const char *_short_name,
                  const char *suffix, TargetABI *a, std::array<int, 5> map)
-    : ctx(c), tversion(v), target(id), short_name(_short_name),
-      binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) { }
+      : ctx(c), tversion(v), target(id), short_name(_short_name),
+        binSuffix(suffix), abi(a), mapping(map), _ir(nullptr) {}
 
   void emit(Module *m);
   void doCodeGen(Module *m);
diff --git a/gen/dcompute/targetCUDA.cpp b/gen/dcompute/targetCUDA.cpp
index a1a77949cc..b3f5917bdf 100644
--- a/gen/dcompute/targetCUDA.cpp
+++ b/gen/dcompute/targetCUDA.cpp
@@ -21,17 +21,20 @@ namespace {
 class TargetCUDA : public DComputeTarget {
 public:
   TargetCUDA(llvm::LLVMContext &c, int sm)
-     : DComputeTarget(
+      : DComputeTarget(
             c, sm, CUDA, "cuda", "ptx", createNVPTXABI(),
 
             // Map from nominal DCompute address space to NVPTX address space.
             // see $LLVM_ROOT/docs/docs/NVPTXUsage.rst section Address Spaces
             {{5, 1, 3, 4, 0}}) {
-    std::string dl = global.params.is64bit ?
-        "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
-        "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" :
-        "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
-        "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
+    std::string dl =
+        global.params.is64bit
+            ? "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:"
+              "32-"
+              "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+            : "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:"
+              "32-"
+              "f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
 
     _ir = new IRState("dcomputeTargetCUDA", ctx);
     _ir->module.setTargetTriple(global.params.is64bit ? "nvptx64-nvidia-cuda"
@@ -40,20 +43,20 @@ public:
     _ir->dcomputetarget = this;
   }
 
- void addMetadata() override {
+  void addMetadata() override {
     // sm version?
   }
- void setGTargetMachine() override {
-   char buf[8];
-   bool is64 = global.params.is64bit;
-   snprintf(buf, sizeof(buf), "sm_%d", tversion / 10);
-   gTargetMachine = createTargetMachine(
+  void setGTargetMachine() override {
+    char buf[8];
+    bool is64 = global.params.is64bit;
+    snprintf(buf, sizeof(buf), "sm_%d", tversion / 10);
+    gTargetMachine = createTargetMachine(
         is64 ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda",
         is64 ? "nvptx64" : "nvptx", buf, {},
         is64 ? ExplicitBitness::M64 : ExplicitBitness::M32, ::FloatABI::Hard,
         llvm::Reloc::Static, llvm::CodeModel::Medium, codeGenOptLevel(), false,
         false);
- }
+  }
 
   void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf) override {
 // Fix 3.5.2 build failures. Remove when dropping 3.5 support.
diff --git a/gen/dcompute/targetOCL.cpp b/gen/dcompute/targetOCL.cpp
index 1a45cb0fa5..f1f2903cda 100644
--- a/gen/dcompute/targetOCL.cpp
+++ b/gen/dcompute/targetOCL.cpp
@@ -52,10 +52,10 @@ public:
   }
   void setGTargetMachine() override { gTargetMachine = nullptr; }
 
-    // Adapted from clang
+  // Adapted from clang
   void addMetadata() override {
-    // Fix 3.5.2 build failures. Remove when dropping 3.5 support.
-    // OCL is only supported for 3.6.1 and 3.8 anyway.
+// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
+// OCL is only supported for 3.6.1 and 3.8 anyway.
 #if LDC_LLVM_VER >= 306
     // opencl.ident?
     // spirv.Source // debug only
@@ -70,7 +70,7 @@ public:
         llvm::ConstantAsMetadata::get(
             llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), 2))};
     llvm::NamedMDNode *SPIRVerMD =
-      _ir->module.getOrInsertNamedMetadata("opencl.spir.version");
+        _ir->module.getOrInsertNamedMetadata("opencl.spir.version");
     SPIRVerMD->addOperand(llvm::MDNode::get(ctx, SPIRVerElts));
 
     // Add OpenCL version
@@ -81,7 +81,7 @@ public:
             llvm::Type::getInt32Ty(ctx), (tversion % 100) / 10))};
     llvm::NamedMDNode *OCLVerMD =
         _ir->module.getOrInsertNamedMetadata("opencl.ocl.version");
-      
+
     OCLVerMD->addOperand(llvm::MDNode::get(ctx, OCLVerElts));
 #endif
   }
@@ -93,20 +93,20 @@ public:
       KernArgMD_type_qual,
       KernArgMD_name,
       count_KernArgMD
-      
   };
   void addKernelMetadata(FuncDeclaration *fd, llvm::Function *llf) override {
-    // By the time we get here the ABI should have rewritten the function
-    // type so that the magic types in ldc.dcompute are transformed into
-    // what the LLVM backend expects.
+// By the time we get here the ABI should have rewritten the function
+// type so that the magic types in ldc.dcompute are transformed into
+// what the LLVM backend expects.
 
-    // Fix 3.5.2 build failures. Remove when dropping 3.5 support.
+// Fix 3.5.2 build failures. Remove when dropping 3.5 support.
 #if LDC_LLVM_VER >= 306
     unsigned i = 0;
     // TODO: Handle Function attibutes
     llvm::SmallVector<llvm::Metadata *, 8> kernelMDArgs;
     kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(llf));
     // MDNode for the kernel argument address space qualifiers.
+
     std::array<llvm::SmallVector<llvm::Metadata *, 8>,count_KernArgMD> paramArgs;
     std::array<const char*,count_KernArgMD> args = {
       "kernel_arg_addr_space",
@@ -125,17 +125,17 @@ public:
     VarDeclarations *vs = fd->parameters;
     for (i = 0; i < vs->dim; i++) {
       VarDeclaration *v = (*vs)[i];
-      decodeTypes(paramArgs,v);
+      decodeTypes(paramArgs, v);
     }
-  
-    for (auto& md : paramArgs)
+
+    for (auto &md : paramArgs)
       kernelMDArgs.push_back(llvm::MDNode::get(ctx, md));
     ///-------------------------------
     /// TODO: Handle Function attibutes
     ///-------------------------------
     llvm::MDNode *kernelMDNode = llvm::MDNode::get(ctx, kernelMDArgs);
     llvm::NamedMDNode *OpenCLKernelMetadata =
-      _ir->module.getOrInsertNamedMetadata("opencl.kernels");
+        _ir->module.getOrInsertNamedMetadata("opencl.kernels");
     OpenCLKernelMetadata->addOperand(kernelMDNode);
 #endif
   }
@@ -147,19 +147,23 @@ public:
   std::string basicTypeToString(Type *t) {
     std::stringstream ss;
     auto ty = t->ty;
-    if      (ty == Tint8)  ss << "char";
-    else if (ty == Tuns8) ss << "uchar";
+    if (ty == Tint8)
+      ss << "char";
+    else if (ty == Tuns8)
+      ss << "uchar";
     else if (ty == Tvector) {
-      TypeVector* vec = static_cast<TypeVector*>(t);
+      TypeVector *vec = static_cast<TypeVector *>(t);
       auto size = vec->size(Loc());
       auto basety = vec->basetype->ty;
-      if      (basety == Tint8)  ss << "char";
-      else if (basety == Tuns8) ss << "uchar";
-      else ss << vec->basetype->toChars();
+      if (basety == Tint8)
+        ss << "char";
+      else if (basety == Tuns8)
+        ss << "uchar";
+      else
+        ss << vec->basetype->toChars();
       ss << (int)size;
-    }
-    else
-        ss << t->toChars();
+    } else
+      ss << t->toChars();
     return ss.str();
   }
 #if LDC_LLVM_VER >= 306
@@ -173,17 +177,14 @@ public:
     std::string accessQual = "none";
     int addrspace = 0;
     if (v->type->ty == Tstruct &&
-        (ptr = toDcomputePointer(static_cast<TypeStruct*>(v->type)->sym)))
-    {
+        (ptr = toDcomputePointer(static_cast<TypeStruct *>(v->type)->sym))) {
       addrspace = ptr->addrspace;
       tyName = basicTypeToString(ptr->type) + "*";
       baseTyName = tyName;
       // there is no volatile or restrict (yet) in D
       typeQuals = mod2str(ptr->type->mod);
       // TODO: Images and Pipes They are global pointers to opaques
-    }
-    else
-    {
+    } else {
       tyName = basicTypeToString(v->type);
       baseTyName = tyName;
       typeQuals = mod2str(v->type->mod);
diff --git a/gen/declarations.cpp b/gen/declarations.cpp
index 378e6a81ad..f7d6a0d9ef 100644
--- a/gen/declarations.cpp
+++ b/gen/declarations.cpp
@@ -67,6 +67,8 @@ public:
                            decl->toPrettyChars());
     LOG_SCOPE
 
+    assert(!irs->dcomputetarget);
+
     if (decl->ir->isDefined()) {
       return;
     }
@@ -125,14 +127,18 @@ public:
       m->accept(this);
     }
 
-    // Define the __initZ symbol.
-    IrAggr *ir = getIrAggr(decl);
-    llvm::GlobalVariable *initZ = ir->getInitSymbol();
-    initZ->setInitializer(ir->getDefaultInit());
-    setLinkage(decl, initZ);
+    // Skip __initZ and typeinfo for @compute device code.
+    // TODO: support global variables and thus __initZ
+    if (!irs->dcomputetarget) {
+      // Define the __initZ symbol.
+      IrAggr *ir = getIrAggr(decl);
+      llvm::GlobalVariable *initZ = ir->getInitSymbol();
+      initZ->setInitializer(ir->getDefaultInit());
+      setLinkage(decl, initZ);
 
-    // emit typeinfo
-    DtoTypeInfoOf(decl->type);
+      // emit typeinfo
+      DtoTypeInfoOf(decl->type);
+    }
 
     // Emit __xopEquals/__xopCmp/__xtoHash.
     if (decl->xeq && decl->xeq != decl->xerreq) {
@@ -153,6 +159,8 @@ public:
                            decl->toPrettyChars());
     LOG_SCOPE
 
+    assert(!irs->dcomputetarget);
+
     if (decl->ir->isDefined()) {
       return;
     }
@@ -245,6 +253,7 @@ public:
 
       assert(!(decl->storage_class & STCmanifest) &&
              "manifest constant being codegen'd!");
+      assert(!irs->dcomputetarget);
 
       IrGlobal *irGlobal = getIrGlobal(decl);
       LLGlobalVariable *gvar = llvm::cast<LLGlobalVariable>(irGlobal->value);
@@ -412,7 +421,8 @@ public:
   void visit(PragmaDeclaration *decl) LLVM_OVERRIDE {
     if (decl->ident == Id::lib) {
       assert(decl->args && decl->args->dim == 1);
-
+      assert(!irs->dcomputetarget);
+        
       Expression *e = static_cast<Expression *>(decl->args->data[0]);
 
       assert(e->op == TOKstring);
@@ -473,7 +483,7 @@ public:
   //////////////////////////////////////////////////////////////////////////
 
   void visit(TypeInfoDeclaration *decl) LLVM_OVERRIDE {
-    if (isSpeculativeType(decl->tinfo)) {
+    if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) {
       return;
     }
     TypeInfoDeclaration_codegen(decl, irs);
@@ -482,7 +492,7 @@ public:
   //////////////////////////////////////////////////////////////////////////
 
   void visit(TypeInfoClassDeclaration *decl) LLVM_OVERRIDE {
-    if (isSpeculativeType(decl->tinfo)) {
+    if (irs->dcomputetarget || isSpeculativeType(decl->tinfo)) {
       return;
     }
     TypeInfoClassDeclaration_codegen(decl, irs);
diff --git a/gen/functions.cpp b/gen/functions.cpp
index 60dd18d73a..8bd7152571 100644
--- a/gen/functions.cpp
+++ b/gen/functions.cpp
@@ -22,6 +22,7 @@
 #include "gen/abi.h"
 #include "gen/arrays.h"
 #include "gen/classes.h"
+#include "gen/dcompute/target.h"
 #include "gen/dvalue.h"
 #include "gen/funcgenstate.h"
 #include "gen/function-inlining.h"
@@ -977,7 +978,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
 #if LDC_LLVM_VER >= 500
                            0, // Address space
 #endif
-                           "alloca point", beginbb);
+                           "alloca_point", beginbb);
   funcGen.allocapoint = allocaPoint;
 
   // debug info - after all allocas, but before any llvm.dbg.declare etc
@@ -1127,6 +1128,11 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) {
   }
 
   gIR->scopes.pop_back();
+
+  if (gIR->dcomputetarget && hasKernelAttr(fd)) {
+    auto fn = gIR->module.getFunction(fd->mangleString);
+    gIR->dcomputetarget->addKernelMetadata(fd, fn);
+  }
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/gen/optimizer.cpp b/gen/optimizer.cpp
index 0296e360f7..a74d5d3fa7 100644
--- a/gen/optimizer.cpp
+++ b/gen/optimizer.cpp
@@ -12,6 +12,7 @@
 #include "gen/cl_helpers.h"
 #include "gen/logger.h"
 #include "gen/passes/Passes.h"
+#include "driver/targetmachine.h"
 #include "llvm/LinkAllPasses.h"
 #if LDC_LLVM_VER >= 307
 #include "llvm/IR/LegacyPassManager.h"
@@ -341,6 +342,16 @@ bool ldc_optimize_module(llvm::Module *M) {
 #endif
       PassManager mpm;
 
+  // Dont optimise spirv modules because turning GEPs into extracts triggers
+  // asserts in the IR -> SPIR-V translation pass. SPIRV doesn't have a target
+  // machine, so any optimisation passes that rely on it to provide analysis,
+  // like DCE can't be run.
+  // The optimisation is supposed to happen between the SPIRV -> native machine
+  // code pass of the consumer of the binary.
+  // TODO: run rudimentary optimisations to improve IR debuggability.
+  if (getComputeTargetType(M) == ComputeBackend::SPIRV)
+    return false;
+
 #if LDC_LLVM_VER >= 307
   // Add an appropriate TargetLibraryInfo pass for the module's triple.
   TargetLibraryInfoImpl *tlii =
diff --git a/gen/statements.cpp b/gen/statements.cpp
index e203f5bfa6..6d2d0fc8fd 100644
--- a/gen/statements.cpp
+++ b/gen/statements.cpp
@@ -16,6 +16,7 @@
 #include "gen/arrays.h"
 #include "gen/classes.h"
 #include "gen/coverage.h"
+#include "gen/dcompute/target.h"
 #include "gen/dvalue.h"
 #include "gen/funcgenstate.h"
 #include "gen/irstate.h"
@@ -24,6 +25,7 @@
 #include "gen/logger.h"
 #include "gen/runtime.h"
 #include "gen/tollvm.h"
+#include "id.h"
 #include "ir/irfunction.h"
 #include "ir/irmodule.h"
 #include "llvm/IR/CFG.h"
@@ -317,6 +319,18 @@ public:
   }
 
   //////////////////////////////////////////////////////////////////////////
+  
+  bool dcomputeReflectMatches(CallExp *ce) {
+    auto arg1 = (DComputeTarget::ID)(*ce->arguments)[0]->toInteger();
+    auto arg2 = (*ce->arguments)[1]->toInteger();
+    auto dct = irs->dcomputetarget;
+    if (!dct) {
+      return arg1 == DComputeTarget::Host;
+    }
+    else {
+      return arg1 == dct->target && (!arg2 || arg2 == dct->tversion);
+    }
+  }
 
   void visit(IfStatement *stmt) LLVM_OVERRIDE {
     IF_LOG Logger::println("IfStatement::toIR(): %s", stmt->loc.toChars());
@@ -332,6 +346,22 @@ public:
     irs->DBuilder.EmitBlockStart(stmt->loc);
     emitCoverageLinecountInc(stmt->loc);
 
+    // This is a (dirty) hack to get codegen time conditional
+    // compilation, on account of the fact that we are trying
+    // to target multiple backends "simultaneously" with one
+    // pass through the front end, to have a single "static"
+    // context.
+    if (stmt->condition->op == TOKcall) {
+      auto ce = (CallExp *)stmt->condition;
+      if (ce->f && ce->f->ident == Id::dcReflect) {
+        if (dcomputeReflectMatches(ce))
+          stmt->ifbody->accept(this);
+        else if (stmt->elsebody)
+          stmt->elsebody->accept(this);
+        return;
+      }
+    }
+
     DValue *cond_e = toElemDtor(stmt->condition);
     LLValue *cond_val = DtoRVal(cond_e);
 
@@ -745,10 +775,15 @@ public:
     irs->DBuilder.EmitBlockStart(stmt->finalbody->loc);
     stmt->finalbody->accept(this);
     irs->DBuilder.EmitBlockEnd();
+    CleanupCursor cleanupBefore;
 
-    CleanupCursor cleanupBefore = irs->funcGen().scopes.currentCleanupScope();
-    irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb());
-
+    // For @compute code, don't emit any exception handling as there are no
+    // exceptions anyway.
+    const bool computeCode = !!irs->dcomputetarget;
+    if (!computeCode) {
+      cleanupBefore = irs->funcGen().scopes.currentCleanupScope();
+      irs->funcGen().scopes.pushCleanup(finallybb, irs->scopebb());
+    }
     // Emit the try block.
     irs->scope() = IRScope(trybb);
 
@@ -758,12 +793,14 @@ public:
     irs->DBuilder.EmitBlockEnd();
 
     if (successbb) {
-      irs->funcGen().scopes.runCleanups(cleanupBefore, successbb);
+      if (!computeCode)
+        irs->funcGen().scopes.runCleanups(cleanupBefore, successbb);
       irs->scope() = IRScope(successbb);
       // PGO counter tracks the continuation of the try-finally statement
       PGO.emitCounterIncrement(stmt);
     }
-    irs->funcGen().scopes.popCleanups(cleanupBefore);
+    if (!computeCode)
+      irs->funcGen().scopes.popCleanups(cleanupBefore);
   }
 
   //////////////////////////////////////////////////////////////////////////
@@ -772,6 +809,7 @@ public:
     IF_LOG Logger::println("TryCatchStatement::toIR(): %s",
                            stmt->loc.toChars());
     LOG_SCOPE;
+    assert(!irs->dcomputetarget);
 
     auto &PGO = irs->funcGen().pgo;
 
@@ -812,6 +850,7 @@ public:
   void visit(ThrowStatement *stmt) LLVM_OVERRIDE {
     IF_LOG Logger::println("ThrowStatement::toIR(): %s", stmt->loc.toChars());
     LOG_SCOPE;
+    assert(!irs->dcomputetarget);
 
     auto &PGO = irs->funcGen().pgo;
     PGO.setCurrentStmt(stmt);
@@ -871,6 +910,7 @@ public:
     const bool isStringSwitch = !stmt->condition->type->isintegral();
     if (isStringSwitch) {
       Logger::println("is string switch");
+      assert(!irs->dcomputetarget);
 
       // Sort the cases, taking care not to modify the original AST.
       cases = cases->copy();
@@ -1597,7 +1637,8 @@ public:
     IF_LOG Logger::println("SwitchErrorStatement::toIR(): %s",
                            stmt->loc.toChars());
     LOG_SCOPE;
-
+    assert(!irs->dcomputetarget);
+      
     auto &PGO = irs->funcGen().pgo;
     PGO.setCurrentStmt(stmt);
 
@@ -1616,11 +1657,15 @@ public:
 
   //////////////////////////////////////////////////////////////////////////
 
-  void visit(AsmStatement *stmt) LLVM_OVERRIDE { AsmStatement_toIR(stmt, irs); }
+  void visit(AsmStatement *stmt) LLVM_OVERRIDE {
+    assert(!irs->dcomputetarget);
+    AsmStatement_toIR(stmt, irs);
+  }
 
   //////////////////////////////////////////////////////////////////////////
 
   void visit(CompoundAsmStatement *stmt) LLVM_OVERRIDE {
+    assert(!irs->dcomputetarget);
     CompoundAsmStatement_toIR(stmt, irs);
   }
 
diff --git a/gen/uda.cpp b/gen/uda.cpp
index 6ec90c681c..4bbbad4284 100644
--- a/gen/uda.cpp
+++ b/gen/uda.cpp
@@ -418,15 +418,15 @@ bool hasWeakUDA(Dsymbol *sym) {
 
 /// Returns 0 if 'sym' does not have the @ldc.dcompute.compute() UDA applied.
 /// Returns 1 + n if 'sym' does and is @compute(n).
-int hasComputeAttr(Dsymbol *sym) {
+DComputeCompileFor hasComputeAttr(Dsymbol *sym) {
 
   auto sle = getMagicAttribute(sym, Id::udaCompute, Id::dcompute);
   if (!sle)
-    return 0;
+    return DComputeCompileFor::hostOnly;
 
   checkStructElems(sle, {Type::tint32});
 
-  return 1 + (*sle->elements)[0]->toInteger();
+  return static_cast<DComputeCompileFor>(1 + (*sle->elements)[0]->toInteger());
 }
 
 /// Checks whether 'sym' has the @ldc.dcompute._kernel() UDA applied.
@@ -437,7 +437,8 @@ bool hasKernelAttr(Dsymbol *sym) {
 
   checkStructElems(sle, {});
 
-  if (!sym->isFuncDeclaration() && !hasComputeAttr(sym->getModule()))
+  if (!sym->isFuncDeclaration() &&
+      hasComputeAttr(sym->getModule()) != DComputeCompileFor::hostOnly)
     sym->error("@ldc.dcompute.kernel can only be applied to functions"
                " in modules marked @ldc.dcompute.compute");
 
diff --git a/gen/uda.h b/gen/uda.h
index 6735b35d28..b47f24ff59 100644
--- a/gen/uda.h
+++ b/gen/uda.h
@@ -27,7 +27,13 @@ void applyFuncDeclUDAs(FuncDeclaration *decl, IrFunction *irFunc);
 void applyVarDeclUDAs(VarDeclaration *decl, llvm::GlobalVariable *gvar);
 
 bool hasWeakUDA(Dsymbol *sym);
-int hasComputeAttr(Dsymbol *sym);
 bool hasKernelAttr(Dsymbol *sym);
-
+/// Must match ldc.attributes.Compilefor + 1 == DComputeCompileFor
+enum class DComputeCompileFor
+{
+  hostOnly = 0,
+  deviceOnly = 1,
+  hostAndDevice = 2
+};
+DComputeCompileFor hasComputeAttr(Dsymbol *sym);
 #endif
diff --git a/ir/irtypestruct.cpp b/ir/irtypestruct.cpp
index 8fdab7df2a..90e148ed30 100644
--- a/ir/irtypestruct.cpp
+++ b/ir/irtypestruct.cpp
@@ -15,11 +15,14 @@
 #include "declaration.h"
 #include "init.h"
 #include "mtype.h"
+#include "template.h"
 
 #include "gen/irstate.h"
 #include "gen/tollvm.h"
 #include "gen/logger.h"
 #include "gen/llvmhelpers.h"
+#include "gen/dcompute/target.h"
+#include "gen/dcompute/druntime.h"
 
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
@@ -49,11 +52,29 @@ IrTypeStruct *IrTypeStruct::get(StructDeclaration *sd) {
     t->packed = isPacked(sd);
   }
 
-  AggrTypeBuilder builder(t->packed);
-  builder.addAggregate(sd);
-  builder.addTailPadding(sd->structsize);
-  isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed);
-  t->varGEPIndices = builder.varGEPIndices();
+  // For ldc.dcomptetypes.Pointer!(uint n,T),
+  // emit { T addrspace(gIR->dcomputetarget->mapping[n])* }
+    llvm::Optional<DcomputePointer> p;
+  if (gIR->dcomputetarget && (p = toDcomputePointer(sd))) {
+   
+    // Translate the virtual dcompute address space into the real one for
+    // the target
+    int realAS = gIR->dcomputetarget->mapping[p->addrspace];
+
+    llvm::SmallVector<LLType *, 1> body;
+    body.push_back(DtoMemType(p->type)->getPointerTo(realAS));
+
+    isaStruct(t->type)->setBody(body, t->packed);
+    VarGEPIndices v;
+    v[sd->fields[0]] = 0;
+    t->varGEPIndices = v;
+  } else {
+    AggrTypeBuilder builder(t->packed);
+    builder.addAggregate(sd);
+    builder.addTailPadding(sd->structsize);
+    isaStruct(t->type)->setBody(builder.defaultTypes(), t->packed);
+    t->varGEPIndices = builder.varGEPIndices();
+  }
 
   IF_LOG Logger::cout() << "final struct type: " << *t->type << std::endl;