From 7816e7730adb0f3fecf6fffdf6bb78e1be083f5f Mon Sep 17 00:00:00 2001 From: Martin Kinkelin Date: Thu, 31 May 2018 01:44:46 +0200 Subject: [PATCH] Win64: Use LLVM's vector calling convention for extern(D) (#2714) I.e., pass and return vectors in registers. With the default C calling convention (and the Win64 TargetABI not touching any vectors), vectors are returned in XMM0, but inefficiently passed as ref to hidden copy (automatically by LLVM). Microsoft's vector calling convention, introduced opt-in with VS 2013, additionally puts HFAs and HVAs (Homogeneous Float/Vector Aggregates) into registers, see https://blogs.msdn.microsoft.com/vcblog/2013/07/11/introducing-vector-calling-convention/. --- gen/abi-win64.cpp | 20 ++++++++++++++++++++ runtime/jit-rt/cpp-so/compile.cpp | 11 +++++++---- tests/codegen/hashed_mangling.d | 10 +++++----- tests/codegen/vector_abi_x86.d | 19 +++++++++++++++++++ 4 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 tests/codegen/vector_abi_x86.d diff --git a/gen/abi-win64.cpp b/gen/abi-win64.cpp index ee28a1684e..248d47835d 100644 --- a/gen/abi-win64.cpp +++ b/gen/abi-win64.cpp @@ -88,6 +88,26 @@ public: Win64TargetABI() : isMSVC(global.params.targetTriple->isWindowsMSVCEnvironment()) {} + llvm::CallingConv::ID callingConv(LINK l, TypeFunction *tf = nullptr, + FuncDeclaration *fd = nullptr) override { + // Use the vector calling convention for extern(D) (except for variadics) + // => let LLVM pass vectors in registers instead of passing a ref to a + // hidden copy (both cases handled by LLVM automatically for LL vectors + // which we don't rewrite). + return l == LINKd && !(tf && tf->varargs == 1) + ? llvm::CallingConv::X86_VectorCall + : llvm::CallingConv::C; + } + + std::string mangleFunctionForLLVM(std::string name, LINK l) override { + if (l == LINKd) { + // Prepend a 0x1 byte to prevent LLVM from applying vectorcall/stdcall + // mangling: _D… => _D…@ + name.insert(name.begin(), '\1'); + } + return name; + } + bool returnInArg(TypeFunction *tf) override { if (tf->isref) return false; diff --git a/runtime/jit-rt/cpp-so/compile.cpp b/runtime/jit-rt/cpp-so/compile.cpp index 6cb6a2a8c2..3d944c9fd3 100644 --- a/runtime/jit-rt/cpp-so/compile.cpp +++ b/runtime/jit-rt/cpp-so/compile.cpp @@ -104,17 +104,20 @@ struct llvm_init_obj { }; std::string decorate(const std::string &name) { -#if defined(__APPLE__) +#if __APPLE__ return "_" + name; -#elif defined(_WIN32) && defined(_M_IX86) +#elif _WIN32 assert(!name.empty()); - if (0x1 == name[0]) { + if (name[0] == 0x1) return name.substr(1); - } +#if _M_IX86 return "_" + name; #else return name; #endif +#else + return name; +#endif } auto getSymbolInProcess(const std::string &name) diff --git a/tests/codegen/hashed_mangling.d b/tests/codegen/hashed_mangling.d index e10bbe119a..68be736000 100644 --- a/tests/codegen/hashed_mangling.d +++ b/tests/codegen/hashed_mangling.d @@ -15,9 +15,9 @@ extern (C) int externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_e auto s(T)(T t) { - // HASH90-DAG: define{{.*}} @{{(\"\\01_)?}}_D3one3two5three__T1sTiZQfFNaNbNiNfiZSQBkQBjQBi__TQBfTiZQBlFiZ__T6ResultTiZQk - // HASH90-DAG: define{{.*}} @{{(\"\\01_)?}}_D3one3two5three3L1633_182fab6f09ff014d9f4a578edf9609981sZ - // HASH90-DAG: define{{.*}} @{{(\"\\01_)?}}_D3one3two5three3L2333_9b5306e5c42722cd2cb93ae6beb422346Result3fooZ + // HASH90-DAG: define{{.*}} @{{(\"\\01_?)?}}_D3one3two5three__T1sTiZQfFNaNbNiNfiZSQBkQBjQBi__TQBfTiZQBlFiZ__T6ResultTiZQk + // HASH90-DAG: define{{.*}} @{{(\"\\01_?)?}}_D3one3two5three3L1633_182fab6f09ff014d9f4a578edf9609981sZ + // HASH90-DAG: define{{.*}} @{{(\"\\01_?)?}}_D3one3two5three3L2333_9b5306e5c42722cd2cb93ae6beb422346Result3fooZ struct Result(T) { void foo(){} @@ -29,8 +29,8 @@ auto klass(T)(T t) { class Result(T) { - // HASH90-DAG: define{{.*}} @{{(\"\\01_)?}}_D3one3two5three__T5klassTiZQjFiZ__T6ResultTiZQk3fooMFZv - // HASH90-DAG: define{{.*}} @{{(\"\\01_)?}}_D3one3two5three3L3433_de737f3d65ae58efa925cffda52cd8da6Result3fooZ + // HASH90-DAG: define{{.*}} @{{(\"\\01_?)?}}_D3one3two5three__T5klassTiZQjFiZ__T6ResultTiZQk3fooMFZv + // HASH90-DAG: define{{.*}} @{{(\"\\01_?)?}}_D3one3two5three3L3433_de737f3d65ae58efa925cffda52cd8da6Result3fooZ void foo(){} } return new Result!int(); diff --git a/tests/codegen/vector_abi_x86.d b/tests/codegen/vector_abi_x86.d new file mode 100644 index 0000000000..e8de6c7340 --- /dev/null +++ b/tests/codegen/vector_abi_x86.d @@ -0,0 +1,19 @@ +// Makes sure an optimized trivial function taking and returning a vector +// takes and returns it directly in XMM0, with no memory indirections. + +// REQUIRES: host_X86 + +// RUN: %ldc -O -output-s -of=%t.s %s && FileCheck %s < %t.s + +import core.simd; + +// CHECK: _D14vector_abi_x863foo +int4 foo(int4 param) +{ + // CHECK-NOT: mov + // CHECK: paddd + // CHECK-SAME: %xmm0 + return param + 3; + // CHECK-NOT: mov + // CHECK: ret +}