From 805ad589a0204a7e30df4ca162ea2904e079bd0d Mon Sep 17 00:00:00 2001 From: Asadbek Date: Wed, 15 Apr 2026 15:36:06 +0200 Subject: [PATCH 01/20] Implement initial support for Metal GPU backend --- driver/dcomputecodegenerator.cpp | 11 ++ gen/abi/abi.cpp | 13 ++ gen/abi/metal.cpp | 52 +++++++ gen/abi/targets.h | 2 + gen/dcompute/druntime.h | 2 +- gen/dcompute/target.cpp | 2 + gen/dcompute/target.h | 6 +- gen/dcompute/targetMetal.cpp | 215 ++++++++++++++++++++++++++++ runtime/druntime/src/ldc/dcompute.d | 3 +- 9 files changed, 303 insertions(+), 3 deletions(-) create mode 100644 gen/abi/metal.cpp create mode 100644 gen/dcompute/targetMetal.cpp diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index 534757d4533..f1767228908 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -11,6 +11,7 @@ #include "driver/cl_options.h" #include "driver/cl_helpers.h" #include "dmd/errors.h" +#include "globals.h" #include "ir/irdsymbol.h" #include "llvm/Support/CommandLine.h" #include @@ -43,6 +44,15 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } + if (s.substr(0, 6) == "air64-") { +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 //&& LDC_LLVM_VER >= 2100 + return createMetalTarget(ctx, 64); +#else + error(Loc(), "LDC was not built with Apple Metal Dcompute support!"); +#endif + } + + if (s.substr(0, 5) == "cuda-") { #if LDC_LLVM_SUPPORTED_TARGET_NVPTX #define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\ @@ -59,6 +69,7 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } + #define STR(...) #__VA_ARGS__ #define XSTR(x) STR(x) diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index 5e5a1baa203..4ce6b9d0023 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -22,8 +22,10 @@ #include "gen/llvmhelpers.h" #include "gen/logger.h" #include "gen/tollvm.h" +#include "globals.h" #include "ir/irfunction.h" #include "ir/irfuncty.h" +#include "llvm/TargetParser/Triple.h" #include using namespace dmd; @@ -286,6 +288,17 @@ TargetABI *TargetABI::getTarget() { case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWasmTargetABI(); + + case llvm::Triple::UnknownArch: + if (global.params.targetTriple->getArchName() == "air64") { + return createMetalABI(); + } + + warning(Loc(), + "unknown target ABI, falling back to generic implementation. C/C++ " + "interop will almost certainly NOT work."); + return new TargetABI; + default: warning(Loc(), "unknown target ABI, falling back to generic implementation. C/C++ " diff --git a/gen/abi/metal.cpp b/gen/abi/metal.cpp new file mode 100644 index 00000000000..9455a4064b1 --- /dev/null +++ b/gen/abi/metal.cpp @@ -0,0 +1,52 @@ +//===-- gen/abi-metal.cpp ---------------------------------------*- C++ -*-===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// + +#include "dmd/identifier.h" +#include "dmd/nspace.h" +#include "gen/abi/abi.h" +#include "gen/abi/generic.h" +#include "gen/dcompute/druntime.h" +#include "ir/irfuncty.h" +#include "gen/dcompute/abi-rewrites.h" +#include "mtype.h" +#include "llvm/Support/raw_ostream.h" +#include + + +using namespace dmd; + +struct MetalABI : TargetABI { + DComputePointerRewrite pointerRewite; + + auto returnInArg(TypeFunction *tf, bool needsThis) -> bool override { + return false; + } + + auto passByVal(TypeFunction *tf, Type*t) -> bool override { + return false; + } + + void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override { + TargetABI::rewriteArgument(fty, arg); + + if (arg.rewrite) { + return; + } + + Type *ty = arg.type->toBasetype(); + std::optional ptr; + + if (ty->ty == TY::Tstruct && + (ptr = toDcomputePointer(static_cast(ty)->sym))) { + pointerRewite.applyTo(arg); + } + } +}; + +auto createMetalABI() -> TargetABI* { return new MetalABI(); } diff --git a/gen/abi/targets.h b/gen/abi/targets.h index 49098fe2579..ecede93331d 100644 --- a/gen/abi/targets.h +++ b/gen/abi/targets.h @@ -40,3 +40,5 @@ TargetABI *getX86TargetABI(); TargetABI *getLoongArch64TargetABI(); TargetABI *getWasmTargetABI(); + +auto createMetalABI() -> TargetABI*; \ No newline at end of file diff --git a/gen/dcompute/druntime.h b/gen/dcompute/druntime.h index 673eb24951f..5f41d101255 100644 --- a/gen/dcompute/druntime.h +++ b/gen/dcompute/druntime.h @@ -38,4 +38,4 @@ struct DcomputePointer { return LLPointerType::get(getGlobalContext(), as); } }; -std::optional toDcomputePointer(StructDeclaration *sd); +auto toDcomputePointer(StructDeclaration *sd) -> std::optional; diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp index 65c6f027c2e..c3cd9e0e10c 100644 --- a/gen/dcompute/target.cpp +++ b/gen/dcompute/target.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/Support/raw_ostream.h" #if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX #include "dmd/dsymbol.h" diff --git a/gen/dcompute/target.h b/gen/dcompute/target.h index 6ffdbea7678..d78496459bf 100644 --- a/gen/dcompute/target.h +++ b/gen/dcompute/target.h @@ -27,7 +27,7 @@ class DComputeTarget { public: llvm::LLVMContext &ctx; int tversion; // OpenCL or CUDA CC version:major*100 + minor*10 - enum class ID { Host = 0, OpenCL = 1, CUDA = 2 }; + enum class ID { Host = 0, OpenCL = 1, CUDA = 2, Metal = 3 }; ID target; // ID for codegen time conditional compilation. const char *short_name; const char *binSuffix; @@ -58,6 +58,10 @@ class DComputeTarget { DComputeTarget *createCUDATarget(llvm::LLVMContext &c, int sm); #endif +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 +auto createMetalTarget(llvm::LLVMContext &c, int version) -> DComputeTarget *; +#endif + #if LDC_LLVM_SUPPORTED_TARGET_SPIRV DComputeTarget *createOCLTarget(llvm::LLVMContext &c, int oclver); #endif diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp new file mode 100644 index 00000000000..fad5f34373a --- /dev/null +++ b/gen/dcompute/targetMetal.cpp @@ -0,0 +1,215 @@ +//===-- gen/dcompute/targetCUDA.cpp ---------------------------------------===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// + +#include "declaration.h" +#include "gen/dcompute/druntime.h" +#include "mtype.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include "dmd/identifier.h" + +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 + +#include "gen/dcompute/target.h" +#include "gen/abi/targets.h" +#include "gen/logger.h" +#include "gen/optimizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Scalar.h" +#include "driver/targetmachine.h" +#include + +namespace { +class TargetMetal : public DComputeTarget { +public: + TargetMetal(llvm::LLVMContext &c, int version) + : DComputeTarget( + c, version, ID::Metal, "metal", "air", createMetalABI(), + + // DCompute Order: [Private, Global, Shared, Constant, Generic] + // AIR equivalents: Private=0, Device/Global=1, Threadgroup/Shared=3, Constant=2 + {{0, 1, 3, 2, 0}}) { + const bool is64 = global.params.targetTriple->isArch64Bit(); + + _ir = new IRState("dcomputeTargetMetal", ctx); + // TODO: need to find 32-bit triple + auto tripleString = "air64_v28-apple-macosx26.0.0"; + + // std::string targTripleStr = is64 ? SPIR_TARGETTRIPLE64 + // : SPIR_TARGETTRIPLE32; + #if LDC_LLVM_VER >= 2100 + llvm::Triple targTriple = llvm::Triple(tripleString); + #else + std::string targTriple = tripleString; + #endif + _ir->module.setTargetTriple(targTriple); + + llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" + "v512:512:512-v1024:1024:1024-n8:16:32"; + + auto floatABI = ::FloatABI::Hard; + // targetMachine = createTargetMachine( + // targTriple, + // is64 ? "" : "", + // "", {}, + // is64 ? ExplicitBitness::M64 : ExplicitBitness::M32, floatABI, + // llvm::Reloc::Static, llvm::CodeModel::Medium, codeGenOptLevel(), false); + _ir->module.setDataLayout(is64 ? dataLayout: /* TODO: need to find 32-bit data layout */dataLayout); + _ir->dcomputetarget = this; + } + + void addMetadata() override { + llvm::NamedMDNode *airVersion = _ir->module.getOrInsertNamedMetadata("air.version"); + llvm::Metadata *major = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 2)); + llvm::Metadata *minor = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 8)); + llvm::Metadata *patch = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)); + std::array arr = {major, minor, patch}; + airVersion->addOperand(llvm::MDTuple::get(ctx, arr)); + + llvm::NamedMDNode *airLangVersion = _ir->module.getOrInsertNamedMetadata("air.language_version"); + std::array langArr = { + llvm::MDString::get(ctx, "Metal"), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 4)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)) + }; + airLangVersion->addOperand(llvm::MDTuple::get(ctx, langArr)); + } + + void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf, StructLiteralExp *_unused_) override { + llvm::errs() << "\n\nAdding kernel metadata...............\n\n"; + llvm::NamedMDNode *kernels = _ir->module.getOrInsertNamedMetadata("air.kernel"); + + std::vector kernelMetadataArguments; + kernelMetadataArguments.push_back(llvm::ConstantAsMetadata::get(llf)); + + // XXX: unknown, not sure why we need this, Metal backend expects it + kernelMetadataArguments.push_back( + llvm::MDNode::get(ctx, {}) + ); + + std::vector argumentMetadata = addArgumentMetadata(df, llf); + + kernelMetadataArguments.push_back( + llvm::MDNode::get(ctx, argumentMetadata) + ); + + llvm::MDTuple *kernelTuple = llvm::MDTuple::get(ctx, kernelMetadataArguments); + + kernels->addOperand(kernelTuple); + } + + auto addArgumentMetadata(FuncDeclaration *df, llvm::Function *llf) -> std::vector { + std::vector kernelMetadataArguments; + int locationIndex = 0; + + for(auto &arg: llf->args()) { + std::vector argumentMetadata; + + argumentMetadata.push_back( + llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get( + llvm::IntegerType::get(ctx, 32), locationIndex))); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.buffer")); + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.location_index")); + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), locationIndex) + )); + + // XXX: unknown, not sure why we need this, Metal backend expects it + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 1) + )); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.read_write")); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.address_space")); + + if (arg.getType()->isPointerTy()){ + unsigned addressSpace = arg.getType()->getPointerAddressSpace(); + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), addressSpace) + )); + } else { + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0) + )); + } + + VarDeclaration *vd = (*df->parameters)[locationIndex]; + addArgumentTypeInformation(vd, argumentMetadata); + + if (!argumentMetadata.empty()) { + kernelMetadataArguments.push_back(llvm::MDTuple::get(ctx, argumentMetadata)); + } + + locationIndex++; + } + + return kernelMetadataArguments; + } + + void addArgumentTypeInformation(VarDeclaration *vd, std::vector &argumentMetadata) { + Type *type = nullptr; + std::optional ptr; + if (vd->type->ty == TY::Tstruct && (ptr = toDcomputePointer(static_cast(vd->type)->sym))){ + type = ptr->type; + } else { + type = vd->type; + } + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_size")); + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), dmd::size(type, vd->loc)) + )); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_align_size")); + argumentMetadata.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), type->alignsize()) + )); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_name")); + // TODO: check if using char needed instead of int8 as in ocl target implementation + argumentMetadata.push_back(llvm::MDString::get(ctx, basicTypeToString(type))); + + argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_name")); + argumentMetadata.push_back(llvm::MDString::get(ctx, vd->ident->toChars())); + } + + auto basicTypeToString(Type *t) -> std::string { + std::stringstream ss; + auto ty = t->ty; + if (ty == TY::Tint8) { + ss << "char"; + } else if (ty == TY::Tuns8) { + ss << "uchar"; + } else { + ss << t->toChars(); + } + + return ss.str(); + } + +}; +} // anonymous namespace. + +auto createMetalTarget(llvm::LLVMContext &c, int version) -> DComputeTarget * { + return new TargetMetal(c, version); +}; + +#endif // LDC_LLVM_SUPPORTED_TARGET_AArch64 diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index 04002a43d91..1ed33e5dc96 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -14,6 +14,7 @@ enum ReflectTarget : uint Host = 0, OpenCL = 1, CUDA = 2, + Metal = 3, } /** * The pseudo conditional compilation function. @@ -24,7 +25,7 @@ enum ReflectTarget : uint * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y * use 0 as a wildcard to match any version. - + * This is mostly used for selecting the correct intrinsic for the * given target and version, but could also be used to tailor for * performance characteristics. See dcompute.std.index for an example From 3420ee892e0b7c7484a00fab16f53e508460849e Mon Sep 17 00:00:00 2001 From: Asadbek Date: Thu, 23 Apr 2026 21:15:28 +0200 Subject: [PATCH 02/20] Implement support for simple saxpy kernel. Convert scalars into pointer and load them in the function body --- gen/abi/abi.cpp | 6 +++--- gen/abi/metal.cpp | 15 +++++++++++++++ gen/dcompute/abi-rewrites.h | 25 +++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index 4ce6b9d0023..e6ee7755ff1 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -288,17 +288,17 @@ TargetABI *TargetABI::getTarget() { case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWasmTargetABI(); - + case llvm::Triple::UnknownArch: if (global.params.targetTriple->getArchName() == "air64") { return createMetalABI(); } - + warning(Loc(), "unknown target ABI, falling back to generic implementation. C/C++ " "interop will almost certainly NOT work."); return new TargetABI; - + default: warning(Loc(), "unknown target ABI, falling back to generic implementation. C/C++ " diff --git a/gen/abi/metal.cpp b/gen/abi/metal.cpp index 9455a4064b1..081b7cdf4cc 100644 --- a/gen/abi/metal.cpp +++ b/gen/abi/metal.cpp @@ -12,6 +12,7 @@ #include "gen/abi/abi.h" #include "gen/abi/generic.h" #include "gen/dcompute/druntime.h" +#include "gen/uda.h" #include "ir/irfuncty.h" #include "gen/dcompute/abi-rewrites.h" #include "mtype.h" @@ -23,6 +24,7 @@ using namespace dmd; struct MetalABI : TargetABI { DComputePointerRewrite pointerRewite; + DcomputeMetalScalarRewrite metalScalarRewrite; auto returnInArg(TypeFunction *tf, bool needsThis) -> bool override { return false; @@ -32,6 +34,14 @@ struct MetalABI : TargetABI { return false; } + void rewriteFunctionType(IrFuncTy &fty) override { + for (auto arg : fty.args) { + if (!arg->byref) { + rewriteArgument(fty, *arg); + } + } + } + void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override { TargetABI::rewriteArgument(fty, arg); @@ -46,6 +56,11 @@ struct MetalABI : TargetABI { (ptr = toDcomputePointer(static_cast(ty)->sym))) { pointerRewite.applyTo(arg); } + + if (ty->isScalar()) { + llvm::errs() << "Applying Metal Scalar Rewrite to: " << ty->toChars() << "\n"; + metalScalarRewrite.applyTo(arg); + } } }; diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h index 1821df81b0c..bcfde8b9e13 100644 --- a/gen/dcompute/abi-rewrites.h +++ b/gen/dcompute/abi-rewrites.h @@ -13,7 +13,16 @@ #pragma once +#include "gen/abi/abi.h" #include "gen/abi/generic.h" +#include "gen/dcompute/druntime.h" +#include "gen/dvalue.h" +#include "gen/irstate.h" +#include "gen/llvm.h" +#include "gen/tollvm.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/raw_ostream.h" struct DComputePointerRewrite : ABIRewrite { LLValue *put(DValue *v, bool isLValueExp, bool) override { @@ -32,3 +41,19 @@ struct DComputePointerRewrite : ABIRewrite { return ptr->toLLVMType(true); } }; + +struct DcomputeMetalScalarRewrite : ABIRewrite { + LLType *type(Type* t) override { + // XXXX: Scalar variables are stored in the constant memory space for Metal GPU + return llvm::PointerType::get(gIR->context(), 2/*Constant Memory space*/); + } + + LLValue *getLVal(Type *dty, LLValue *v) override { + return v; + } + + LLValue *put(DValue *v, bool isLValueExp, bool) override { + auto value = DtoRVal(v); + return value; + } +}; From eb961247243852bb47f50c3d61a1286426ee313e Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 10:01:49 +0200 Subject: [PATCH 03/20] Resolve comments on incorrect logging & small cleanup --- driver/dcomputecodegenerator.cpp | 8 ++++---- gen/abi/abi.cpp | 2 +- gen/abi/metal.cpp | 2 +- gen/dcompute/target.cpp | 2 -- gen/dcompute/targetMetal.cpp | 3 +-- runtime/druntime/src/ldc/dcompute.d | 31 +++++++++++++++-------------- 6 files changed, 23 insertions(+), 25 deletions(-) diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index f1767228908..ff25fc64a57 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -11,7 +11,7 @@ #include "driver/cl_options.h" #include "driver/cl_helpers.h" #include "dmd/errors.h" -#include "globals.h" +#include "dmd/globals.h" #include "ir/irdsymbol.h" #include "llvm/Support/CommandLine.h" #include @@ -45,8 +45,8 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { } if (s.substr(0, 6) == "air64-") { -#if LDC_LLVM_SUPPORTED_TARGET_AArch64 //&& LDC_LLVM_VER >= 2100 - return createMetalTarget(ctx, 64); +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 //&& LDC_LLVM_VER >= 2100 + return createMetalTarget(ctx, 64); #else error(Loc(), "LDC was not built with Apple Metal Dcompute support!"); #endif @@ -69,7 +69,7 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } - + #define STR(...) #__VA_ARGS__ #define XSTR(x) STR(x) diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index e6ee7755ff1..ac1d001e4f1 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -22,7 +22,7 @@ #include "gen/llvmhelpers.h" #include "gen/logger.h" #include "gen/tollvm.h" -#include "globals.h" +#include "dmd/globals.h" #include "ir/irfunction.h" #include "ir/irfuncty.h" #include "llvm/TargetParser/Triple.h" diff --git a/gen/abi/metal.cpp b/gen/abi/metal.cpp index 081b7cdf4cc..1e10b9b72e2 100644 --- a/gen/abi/metal.cpp +++ b/gen/abi/metal.cpp @@ -12,6 +12,7 @@ #include "gen/abi/abi.h" #include "gen/abi/generic.h" #include "gen/dcompute/druntime.h" +#include "gen/logger.h" #include "gen/uda.h" #include "ir/irfuncty.h" #include "gen/dcompute/abi-rewrites.h" @@ -58,7 +59,6 @@ struct MetalABI : TargetABI { } if (ty->isScalar()) { - llvm::errs() << "Applying Metal Scalar Rewrite to: " << ty->toChars() << "\n"; metalScalarRewrite.applyTo(arg); } } diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp index c3cd9e0e10c..65c6f027c2e 100644 --- a/gen/dcompute/target.cpp +++ b/gen/dcompute/target.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Support/raw_ostream.h" #if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX #include "dmd/dsymbol.h" diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index fad5f34373a..02c9c276c89 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -61,7 +61,7 @@ class TargetMetal : public DComputeTarget { "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; - auto floatABI = ::FloatABI::Hard; + // auto floatABI = ::FloatABI::Hard; // targetMachine = createTargetMachine( // targTriple, // is64 ? "" : "", @@ -91,7 +91,6 @@ class TargetMetal : public DComputeTarget { } void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf, StructLiteralExp *_unused_) override { - llvm::errs() << "\n\nAdding kernel metadata...............\n\n"; llvm::NamedMDNode *kernels = _ir->module.getOrInsertNamedMetadata("air.kernel"); std::vector kernelMetadataArguments; diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index 1ed33e5dc96..6ddc38f43be 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -25,14 +25,13 @@ enum ReflectTarget : uint * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y * use 0 as a wildcard to match any version. - * This is mostly used for selecting the correct intrinsic for the * given target and version, but could also be used to tailor for * performance characteristics. See dcompute.std.index for an example * of how to select intrinsics. */ pure nothrow @nogc -extern(C) bool __dcompute_reflect(ReflectTarget t, uint _version = 0); +extern (C) bool __dcompute_reflect(ReflectTarget t, uint _version = 0); ///Readability aliases for compute enum CompileFor : int @@ -72,10 +71,12 @@ struct compute + } + --- +/ -private struct _kernel { +private struct _kernel +{ size_t[3] bounds; } -_kernel kernel(size_t[3] a = [1,1,1]) => _kernel(a); + +_kernel kernel(size_t[3] a = [1, 1, 1]) => _kernel(a); /++ + DCompute has the notion of adress spaces, provide by the magic structs below. @@ -104,18 +105,18 @@ struct Variable(AddrSpace as, T) enum AddrSpace : uint { - Private = 0, - Global = 1, - Shared = 2, + Private = 0, + Global = 1, + Shared = 2, Constant = 3, - Generic = 4, + Generic = 4, } -alias PrivatePointer(T) = Pointer!(AddrSpace.Private, T); -alias GlobalPointer(T) = Pointer!(AddrSpace.Global, T); -alias SharedPointer(T) = Pointer!(AddrSpace.Shared, T); +alias PrivatePointer(T) = Pointer!(AddrSpace.Private, T); +alias GlobalPointer(T) = Pointer!(AddrSpace.Global, T); +alias SharedPointer(T) = Pointer!(AddrSpace.Shared, T); alias ConstantPointer(T) = Pointer!(AddrSpace.Constant, immutable(T)); -alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); +alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); // N.B private variables are declared on the stack and so cannot be declared // at module scope. @@ -124,6 +125,6 @@ alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); // // The __gshared below does not work. It is kludged into place in `DtoResolveVariable` -alias Global(T) = /*__gshared*/ Variable!(AddrSpace.Global, T); -alias Shared(T) = shared Variable!(AddrSpace.Shared, T); -alias Constant(T) = immutable Variable!(AddrSpace.Constant, T); +alias Global(T) = /*__gshared*/ Variable!(AddrSpace.Global, T); +alias Shared(T) = shared Variable!(AddrSpace.Shared, T); +alias Constant(T) = immutable Variable!(AddrSpace.Constant, T); From bf0016e212f2270c85b3f63814234c60ce3644ca Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 10:22:46 +0200 Subject: [PATCH 04/20] Fix build error on CI --- gen/dcompute/targetMetal.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 02c9c276c89..7b4129d8579 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -51,11 +51,10 @@ class TargetMetal : public DComputeTarget { // std::string targTripleStr = is64 ? SPIR_TARGETTRIPLE64 // : SPIR_TARGETTRIPLE32; #if LDC_LLVM_VER >= 2100 - llvm::Triple targTriple = llvm::Triple(tripleString); + _ir->module.setTargetTriple(llvm::Triple(tripleString)); #else - std::string targTriple = tripleString; + _ir->module.setTargetTriple(tripleString); #endif - _ir->module.setTargetTriple(targTriple); llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" From 182df17709909d3069519ef2edcacd04f80cb32f Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 11:16:47 +0200 Subject: [PATCH 05/20] Fix CI build error --- gen/dcompute/targetMetal.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 7b4129d8579..d5e34653d3c 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -51,11 +51,11 @@ class TargetMetal : public DComputeTarget { // std::string targTripleStr = is64 ? SPIR_TARGETTRIPLE64 // : SPIR_TARGETTRIPLE32; #if LDC_LLVM_VER >= 2100 - _ir->module.setTargetTriple(llvm::Triple(tripleString)); + llvm::Triple targTriple = llvm::Triple(tripleString); #else - _ir->module.setTargetTriple(tripleString); + std::string targTriple = tripleString; #endif - + _ir->module.setTargetTriple(tripleString); llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; From eae34b9a67a7dd2b8eb357fe07b1806534519e8e Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 11:23:47 +0200 Subject: [PATCH 06/20] Fix CI build error --- gen/dcompute/targetMetal.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index d5e34653d3c..873cf22419d 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -48,14 +48,12 @@ class TargetMetal : public DComputeTarget { // TODO: need to find 32-bit triple auto tripleString = "air64_v28-apple-macosx26.0.0"; - // std::string targTripleStr = is64 ? SPIR_TARGETTRIPLE64 - // : SPIR_TARGETTRIPLE32; #if LDC_LLVM_VER >= 2100 llvm::Triple targTriple = llvm::Triple(tripleString); #else std::string targTriple = tripleString; #endif - _ir->module.setTargetTriple(tripleString); + _ir->module.setTargetTriple(targTriple); llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; From 05c37fc8b8b374623b7589f63575c3f7f7259e6f Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 11:47:55 +0200 Subject: [PATCH 07/20] Fix CI build error --- gen/dcompute/targetMetal.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 873cf22419d..670b2fdc7ba 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include "dmd/identifier.h" @@ -46,14 +47,13 @@ class TargetMetal : public DComputeTarget { _ir = new IRState("dcomputeTargetMetal", ctx); // TODO: need to find 32-bit triple - auto tripleString = "air64_v28-apple-macosx26.0.0"; + llvm::StringRef tripleString = "air64_v28-apple-macosx26.0.0"; #if LDC_LLVM_VER >= 2100 - llvm::Triple targTriple = llvm::Triple(tripleString); + _ir->module.setTargetTriple(llvm::Triple(tripleString)); #else - std::string targTriple = tripleString; + _ir->module.setTargetTriple(tripleString); #endif - _ir->module.setTargetTriple(targTriple); llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; From 7fe33964db29220799c4fdf15c23834b2d7cb64c Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sat, 25 Apr 2026 12:01:53 +0200 Subject: [PATCH 08/20] Fix CI build error --- gen/dcompute/targetMetal.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 670b2fdc7ba..17b5679b6d2 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -49,11 +49,14 @@ class TargetMetal : public DComputeTarget { // TODO: need to find 32-bit triple llvm::StringRef tripleString = "air64_v28-apple-macosx26.0.0"; - #if LDC_LLVM_VER >= 2100 - _ir->module.setTargetTriple(llvm::Triple(tripleString)); - #else - _ir->module.setTargetTriple(tripleString); - #endif + _ir->module.setTargetTriple(llvm::Triple(tripleString)); + + + // #if LDC_LLVM_VER >= 2100 + // _ir->module.setTargetTriple(llvm::Triple(tripleString)); + // #else + // _ir->module.setTargetTriple(tripleString); + // #endif llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; From 5906ad4b309a40bf53a8be88e3d284fb459ffe62 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Tue, 28 Apr 2026 13:49:47 +0200 Subject: [PATCH 09/20] Fix CI build error --- gen/dcompute/targetMetal.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 17b5679b6d2..c2834a24475 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -6,6 +6,7 @@ // file for details. // //===----------------------------------------------------------------------===// +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 #include "declaration.h" #include "gen/dcompute/druntime.h" @@ -21,9 +22,6 @@ #include #include #include "dmd/identifier.h" - -#if LDC_LLVM_SUPPORTED_TARGET_AArch64 - #include "gen/dcompute/target.h" #include "gen/abi/targets.h" #include "gen/logger.h" @@ -49,25 +47,16 @@ class TargetMetal : public DComputeTarget { // TODO: need to find 32-bit triple llvm::StringRef tripleString = "air64_v28-apple-macosx26.0.0"; - _ir->module.setTargetTriple(llvm::Triple(tripleString)); - + #if LLVM_VERSION_MAJOR>= 21 + _ir->module.setTargetTriple(llvm::Triple(tripleString)); + #else + _ir->module.setTargetTriple(tripleString); + #endif - // #if LDC_LLVM_VER >= 2100 - // _ir->module.setTargetTriple(llvm::Triple(tripleString)); - // #else - // _ir->module.setTargetTriple(tripleString); - // #endif llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" "v512:512:512-v1024:1024:1024-n8:16:32"; - // auto floatABI = ::FloatABI::Hard; - // targetMachine = createTargetMachine( - // targTriple, - // is64 ? "" : "", - // "", {}, - // is64 ? ExplicitBitness::M64 : ExplicitBitness::M32, floatABI, - // llvm::Reloc::Static, llvm::CodeModel::Medium, codeGenOptLevel(), false); _ir->module.setDataLayout(is64 ? dataLayout: /* TODO: need to find 32-bit data layout */dataLayout); _ir->dcomputetarget = this; } From 955b6ec8b70704d8480701dfa8f0072be6377369 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Wed, 29 Apr 2026 16:00:45 +0200 Subject: [PATCH 10/20] Resolve comments on unused includes & small refactoring --- driver/cl_options.cpp | 2 +- driver/cl_options.h | 2 +- driver/dcomputecodegenerator.cpp | 5 +- driver/dcomputecodegenerator.h | 2 +- driver/main.cpp | 2 +- gen/abi/abi.cpp | 6 +- gen/abi/metal.cpp | 12 +- gen/abi/targets.h | 2 +- gen/dcompute/druntime.h | 2 +- gen/dcompute/target.cpp | 2 +- gen/dcompute/target.h | 2 +- gen/dcompute/targetMetal.cpp | 227 +++++++++++++++---------------- gen/target.cpp | 2 +- 13 files changed, 127 insertions(+), 141 deletions(-) diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp index c101cf199c8..0ad73230788 100644 --- a/driver/cl_options.cpp +++ b/driver/cl_options.cpp @@ -751,7 +751,7 @@ cl::opt cl::desc("Warn for stack size bigger than the given number"), cl::value_desc("threshold")); -#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 cl::list dcomputeTargets("mdcompute-targets", cl::CommaSeparated, cl::desc("Generates code for the specified DCompute target" diff --git a/driver/cl_options.h b/driver/cl_options.h index 481b83adcc4..d9d8d6445f1 100644 --- a/driver/cl_options.h +++ b/driver/cl_options.h @@ -140,7 +140,7 @@ extern cl::opt saveOptimizationRecord; extern cl::opt fWarnStackSize; -#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 extern cl::list dcomputeTargets; extern cl::opt dcomputeFilePrefix; #endif diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index ff25fc64a57..aaf1c257ec2 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -11,14 +11,13 @@ #include "driver/cl_options.h" #include "driver/cl_helpers.h" #include "dmd/errors.h" -#include "dmd/globals.h" #include "ir/irdsymbol.h" #include "llvm/Support/CommandLine.h" #include #include #include -#if !(LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX) +#if !(LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64) DComputeCodeGenManager::DComputeCodeGenManager(llvm::LLVMContext &c) : ctx(c) {} void DComputeCodeGenManager::emit(Module *) {} @@ -117,4 +116,4 @@ DComputeCodeGenManager::~DComputeCodeGenManager() { gTargetMachine = oldGTargetMachine; } -#endif // LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#endif // LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 diff --git a/driver/dcomputecodegenerator.h b/driver/dcomputecodegenerator.h index 21d3fd34790..379335fa9d6 100644 --- a/driver/dcomputecodegenerator.h +++ b/driver/dcomputecodegenerator.h @@ -21,7 +21,7 @@ namespace llvm { class DComputeCodeGenManager { llvm::LLVMContext &ctx; - llvm::SmallVector targets; + llvm::SmallVector targets; DComputeTarget *createComputeTarget(const std::string &s); IRState *oldGIR = nullptr; llvm::TargetMachine *oldGTargetMachine = nullptr; diff --git a/driver/main.cpp b/driver/main.cpp index df3637f55c9..b42f0b6113c 100644 --- a/driver/main.cpp +++ b/driver/main.cpp @@ -950,7 +950,7 @@ void registerPredefinedVersions() { VersionCondition::addPredefinedGlobalIdent("all"); VersionCondition::addPredefinedGlobalIdent("D_Version2"); -#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 if (dcomputeTargets.size() != 0) { VersionCondition::addPredefinedGlobalIdent("LDC_DCompute"); } diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index ac1d001e4f1..2b9503dfdc7 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -11,8 +11,6 @@ #include "dmd/argtypes.h" #include "dmd/expression.h" -#include "dmd/id.h" -#include "dmd/identifier.h" #include "dmd/target.h" #include "gen/abi/targets.h" #include "gen/abi/generic.h" @@ -22,11 +20,9 @@ #include "gen/llvmhelpers.h" #include "gen/logger.h" #include "gen/tollvm.h" -#include "dmd/globals.h" #include "ir/irfunction.h" #include "ir/irfuncty.h" -#include "llvm/TargetParser/Triple.h" -#include +// #include using namespace dmd; diff --git a/gen/abi/metal.cpp b/gen/abi/metal.cpp index 1e10b9b72e2..6035d651e48 100644 --- a/gen/abi/metal.cpp +++ b/gen/abi/metal.cpp @@ -7,17 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "dmd/identifier.h" -#include "dmd/nspace.h" #include "gen/abi/abi.h" -#include "gen/abi/generic.h" #include "gen/dcompute/druntime.h" -#include "gen/logger.h" -#include "gen/uda.h" -#include "ir/irfuncty.h" #include "gen/dcompute/abi-rewrites.h" -#include "mtype.h" -#include "llvm/Support/raw_ostream.h" +#include "ir/irfuncty.h" +#include "dmd/mtype.h" #include @@ -64,4 +58,4 @@ struct MetalABI : TargetABI { } }; -auto createMetalABI() -> TargetABI* { return new MetalABI(); } +TargetABI* createMetalABI() { return new MetalABI(); } diff --git a/gen/abi/targets.h b/gen/abi/targets.h index ecede93331d..afc6c64fe18 100644 --- a/gen/abi/targets.h +++ b/gen/abi/targets.h @@ -41,4 +41,4 @@ TargetABI *getLoongArch64TargetABI(); TargetABI *getWasmTargetABI(); -auto createMetalABI() -> TargetABI*; \ No newline at end of file +TargetABI* createMetalABI(); diff --git a/gen/dcompute/druntime.h b/gen/dcompute/druntime.h index 5f41d101255..673eb24951f 100644 --- a/gen/dcompute/druntime.h +++ b/gen/dcompute/druntime.h @@ -38,4 +38,4 @@ struct DcomputePointer { return LLPointerType::get(getGlobalContext(), as); } }; -auto toDcomputePointer(StructDeclaration *sd) -> std::optional; +std::optional toDcomputePointer(StructDeclaration *sd); diff --git a/gen/dcompute/target.cpp b/gen/dcompute/target.cpp index 65c6f027c2e..bf67cd011e1 100644 --- a/gen/dcompute/target.cpp +++ b/gen/dcompute/target.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 #include "dmd/dsymbol.h" #include "dmd/errors.h" diff --git a/gen/dcompute/target.h b/gen/dcompute/target.h index d78496459bf..24e354f9516 100644 --- a/gen/dcompute/target.h +++ b/gen/dcompute/target.h @@ -59,7 +59,7 @@ DComputeTarget *createCUDATarget(llvm::LLVMContext &c, int sm); #endif #if LDC_LLVM_SUPPORTED_TARGET_AArch64 -auto createMetalTarget(llvm::LLVMContext &c, int version) -> DComputeTarget *; +DComputeTarget* createMetalTarget(llvm::LLVMContext &c, int version); #endif #if LDC_LLVM_SUPPORTED_TARGET_SPIRV diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index c2834a24475..eafc43bb4c2 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -8,175 +8,173 @@ //===----------------------------------------------------------------------===// #if LDC_LLVM_SUPPORTED_TARGET_AArch64 -#include "declaration.h" #include "gen/dcompute/druntime.h" -#include "mtype.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include "dmd/identifier.h" #include "gen/dcompute/target.h" #include "gen/abi/targets.h" -#include "gen/logger.h" -#include "gen/optimizer.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Scalar.h" -#include "driver/targetmachine.h" -#include +#include "dmd/identifier.h" namespace { class TargetMetal : public DComputeTarget { public: TargetMetal(llvm::LLVMContext &c, int version) - : DComputeTarget( - c, version, ID::Metal, "metal", "air", createMetalABI(), + : DComputeTarget(c, version, ID::Metal, "metal", "air", createMetalABI(), - // DCompute Order: [Private, Global, Shared, Constant, Generic] - // AIR equivalents: Private=0, Device/Global=1, Threadgroup/Shared=3, Constant=2 - {{0, 1, 3, 2, 0}}) { - const bool is64 = global.params.targetTriple->isArch64Bit(); + // DCompute Order: [Private, Global, Shared, Constant, + // Generic] AIR equivalents: Private=0, Device/Global=1, + // Threadgroup/Shared=3, Constant=2 + {{0, 1, 3, 2, 0}}) { _ir = new IRState("dcomputeTargetMetal", ctx); - // TODO: need to find 32-bit triple llvm::StringRef tripleString = "air64_v28-apple-macosx26.0.0"; - #if LLVM_VERSION_MAJOR>= 21 - _ir->module.setTargetTriple(llvm::Triple(tripleString)); - #else - _ir->module.setTargetTriple(tripleString); - #endif - - llvm::StringRef dataLayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-" - "v512:512:512-v1024:1024:1024-n8:16:32"; - - _ir->module.setDataLayout(is64 ? dataLayout: /* TODO: need to find 32-bit data layout */dataLayout); - _ir->dcomputetarget = this; +#if LLVM_VERSION_MAJOR >= 21 + _ir->module.setTargetTriple(llvm::Triple(tripleString)); +#else + _ir->module.setTargetTriple(tripleString); +#endif + + llvm::StringRef dataLayout = + "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-" + "f64:64:64" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:" + "128:128-v192:256:256-v256:256:256-" + "v512:512:512-v1024:1024:1024-n8:16:32"; + + _ir->module.setDataLayout(dataLayout); + _ir->dcomputetarget = this; } void addMetadata() override { - llvm::NamedMDNode *airVersion = _ir->module.getOrInsertNamedMetadata("air.version"); - llvm::Metadata *major = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 2)); - llvm::Metadata *minor = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 8)); - llvm::Metadata *patch = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)); - std::array arr = {major, minor, patch}; + llvm::NamedMDNode *airVersion = + _ir->module.getOrInsertNamedMetadata("air.version"); + llvm::Metadata *major = metaInt(2); + llvm::Metadata *minor = metaInt(8); + llvm::Metadata *patch = metaInt(0); + + std::array arr = {major, minor, patch}; airVersion->addOperand(llvm::MDTuple::get(ctx, arr)); - llvm::NamedMDNode *airLangVersion = _ir->module.getOrInsertNamedMetadata("air.language_version"); - std::array langArr = { - llvm::MDString::get(ctx, "Metal"), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 4)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0)) + llvm::NamedMDNode *airLangVersion = + _ir->module.getOrInsertNamedMetadata("air.language_version"); + std::array langArr = { + metaString("Metal"), + metaInt(4), + metaInt(0), + metaInt(0), }; + airLangVersion->addOperand(llvm::MDTuple::get(ctx, langArr)); } - void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf, StructLiteralExp *_unused_) override { - llvm::NamedMDNode *kernels = _ir->module.getOrInsertNamedMetadata("air.kernel"); + void addKernelMetadata(FuncDeclaration *df, llvm::Function *llf, + StructLiteralExp *_unused_) override { + llvm::NamedMDNode *kernels = + _ir->module.getOrInsertNamedMetadata("air.kernel"); std::vector kernelMetadataArguments; kernelMetadataArguments.push_back(llvm::ConstantAsMetadata::get(llf)); // XXX: unknown, not sure why we need this, Metal backend expects it - kernelMetadataArguments.push_back( - llvm::MDNode::get(ctx, {}) - ); + kernelMetadataArguments.push_back(llvm::MDNode::get(ctx, {})); - std::vector argumentMetadata = addArgumentMetadata(df, llf); + std::vector argumentMetadata = + addArgumentMetadata(df, llf); - kernelMetadataArguments.push_back( - llvm::MDNode::get(ctx, argumentMetadata) - ); + kernelMetadataArguments.push_back(llvm::MDNode::get(ctx, argumentMetadata)); - llvm::MDTuple *kernelTuple = llvm::MDTuple::get(ctx, kernelMetadataArguments); + llvm::MDTuple *kernelTuple = + llvm::MDTuple::get(ctx, kernelMetadataArguments); kernels->addOperand(kernelTuple); } - auto addArgumentMetadata(FuncDeclaration *df, llvm::Function *llf) -> std::vector { + auto addArgumentMetadata(FuncDeclaration *df, llvm::Function *llf) + -> std::vector { std::vector kernelMetadataArguments; int locationIndex = 0; - for(auto &arg: llf->args()) { - std::vector argumentMetadata; + for (auto &arg : llf->args()) { + std::vector argumentMetadata; + + argumentMetadata.push_back( + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + llvm::IntegerType::get(ctx, 32), locationIndex))); + + argumentMetadata.push_back(metaString("air.buffer")); + argumentMetadata.push_back( + metaString("air.location_index")); + argumentMetadata.push_back( + metaInt(locationIndex)); + + // XXX: unknown, not sure why we need this, Metal backend expects it + argumentMetadata.push_back(metaInt(1)); + argumentMetadata.push_back(metaString("air.read_write")); + + argumentMetadata.push_back(metaString("air.address_space")); + + if (arg.getType()->isPointerTy()) { + unsigned addressSpace = arg.getType()->getPointerAddressSpace(); argumentMetadata.push_back( - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get( - llvm::IntegerType::get(ctx, 32), locationIndex))); - - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.buffer")); - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.location_index")); - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), locationIndex) - )); - - // XXX: unknown, not sure why we need this, Metal backend expects it - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 1) - )); - - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.read_write")); - - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.address_space")); - - if (arg.getType()->isPointerTy()){ - unsigned addressSpace = arg.getType()->getPointerAddressSpace(); - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), addressSpace) - )); - } else { - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), 0) - )); - } - - VarDeclaration *vd = (*df->parameters)[locationIndex]; - addArgumentTypeInformation(vd, argumentMetadata); - - if (!argumentMetadata.empty()) { - kernelMetadataArguments.push_back(llvm::MDTuple::get(ctx, argumentMetadata)); - } - - locationIndex++; + metaInt(addressSpace)); + } else { + // XXX: 0 - generic address space + argumentMetadata.push_back(metaInt(0)); + } + + VarDeclaration *vd = (*df->parameters)[locationIndex]; + addArgumentTypeInformation(vd, argumentMetadata); + + if (!argumentMetadata.empty()) { + kernelMetadataArguments.push_back( + llvm::MDTuple::get(ctx, argumentMetadata)); + } + + locationIndex++; } return kernelMetadataArguments; } - void addArgumentTypeInformation(VarDeclaration *vd, std::vector &argumentMetadata) { + void + addArgumentTypeInformation(VarDeclaration *vd, + std::vector &argumentMetadata) { Type *type = nullptr; std::optional ptr; - if (vd->type->ty == TY::Tstruct && (ptr = toDcomputePointer(static_cast(vd->type)->sym))){ + if (vd->type->ty == TY::Tstruct && + (ptr = toDcomputePointer(static_cast(vd->type)->sym))) { type = ptr->type; } else { type = vd->type; } - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_size")); - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), dmd::size(type, vd->loc)) - )); + argumentMetadata.push_back(metaString("air.arg_type_size")); + argumentMetadata.push_back( + metaInt(dmd::size(type, vd->loc))); - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_align_size")); - argumentMetadata.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), type->alignsize()) - )); + argumentMetadata.push_back( + metaString("air.arg_type_align_size")) + ; + argumentMetadata.push_back( + metaInt(type->alignsize())); - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_type_name")); + argumentMetadata.push_back(metaString("air.arg_type_name")); // TODO: check if using char needed instead of int8 as in ocl target implementation - argumentMetadata.push_back(llvm::MDString::get(ctx, basicTypeToString(type))); + argumentMetadata.push_back( + metaString(basicTypeToString(type))); - argumentMetadata.push_back(llvm::MDString::get(ctx, "air.arg_name")); - argumentMetadata.push_back(llvm::MDString::get(ctx, vd->ident->toChars())); + argumentMetadata.push_back(metaString("air.arg_name")); + argumentMetadata.push_back(metaString(vd->ident->toChars())); + } + + llvm::Metadata *metaInt(int n) { + return llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::IntegerType::get(ctx, 32), n) + ); + } + + llvm::Metadata *metaString(llvm::StringRef s) { + return llvm::MDString::get(ctx, s); } auto basicTypeToString(Type *t) -> std::string { @@ -192,7 +190,6 @@ class TargetMetal : public DComputeTarget { return ss.str(); } - }; } // anonymous namespace. diff --git a/gen/target.cpp b/gen/target.cpp index 16622a99227..e6453c9aac7 100644 --- a/gen/target.cpp +++ b/gen/target.cpp @@ -401,7 +401,7 @@ Expression *Target::getTargetInfo(const char *name_, Loc loc) { return IntegerExp::create(loc, static_cast(cet), Type::tint32); } -#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX +#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64 if (name == "dcomputeTargets") { Expressions* exps = createExpressions(); for (auto &targ : opts::dcomputeTargets) { From 172eda2d22cb4dd454668da7192b0d97bae22402 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Wed, 29 Apr 2026 16:13:38 +0200 Subject: [PATCH 11/20] Remove unused includes --- gen/abi/abi.cpp | 1 - gen/dcompute/abi-rewrites.h | 8 ------- gen/dcompute/targetMetal.cpp | 34 ++++++++++++++++------------- runtime/druntime/src/ldc/dcompute.d | 31 +++++++++++++------------- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index 2b9503dfdc7..7e5ab125ad2 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -22,7 +22,6 @@ #include "gen/tollvm.h" #include "ir/irfunction.h" #include "ir/irfuncty.h" -// #include using namespace dmd; diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h index bcfde8b9e13..0a159b67633 100644 --- a/gen/dcompute/abi-rewrites.h +++ b/gen/dcompute/abi-rewrites.h @@ -13,16 +13,8 @@ #pragma once -#include "gen/abi/abi.h" #include "gen/abi/generic.h" #include "gen/dcompute/druntime.h" -#include "gen/dvalue.h" -#include "gen/irstate.h" -#include "gen/llvm.h" -#include "gen/tollvm.h" -#include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/Support/raw_ostream.h" struct DComputePointerRewrite : ABIRewrite { LLValue *put(DValue *v, bool isLValueExp, bool) override { diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index eafc43bb4c2..e81f38c8617 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -97,21 +97,23 @@ class TargetMetal : public DComputeTarget { std::vector argumentMetadata; argumentMetadata.push_back( - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - llvm::IntegerType::get(ctx, 32), locationIndex))); + metaInt(locationIndex)); - argumentMetadata.push_back(metaString("air.buffer")); + argumentMetadata.push_back( + metaString("air.buffer")); argumentMetadata.push_back( metaString("air.location_index")); argumentMetadata.push_back( metaInt(locationIndex)); // XXX: unknown, not sure why we need this, Metal backend expects it - argumentMetadata.push_back(metaInt(1)); - - argumentMetadata.push_back(metaString("air.read_write")); + argumentMetadata.push_back( + metaInt(1)); - argumentMetadata.push_back(metaString("air.address_space")); + argumentMetadata.push_back( + metaString("air.read_write")); + argumentMetadata.push_back( + metaString("air.address_space")); if (arg.getType()->isPointerTy()) { unsigned addressSpace = arg.getType()->getPointerAddressSpace(); @@ -136,8 +138,7 @@ class TargetMetal : public DComputeTarget { return kernelMetadataArguments; } - void - addArgumentTypeInformation(VarDeclaration *vd, + void addArgumentTypeInformation(VarDeclaration *vd, std::vector &argumentMetadata) { Type *type = nullptr; std::optional ptr; @@ -148,23 +149,26 @@ class TargetMetal : public DComputeTarget { type = vd->type; } - argumentMetadata.push_back(metaString("air.arg_type_size")); + argumentMetadata.push_back( + metaString("air.arg_type_size")); argumentMetadata.push_back( metaInt(dmd::size(type, vd->loc))); argumentMetadata.push_back( - metaString("air.arg_type_align_size")) - ; + metaString("air.arg_type_align_size")); argumentMetadata.push_back( metaInt(type->alignsize())); - argumentMetadata.push_back(metaString("air.arg_type_name")); + argumentMetadata.push_back( + metaString("air.arg_type_name")); // TODO: check if using char needed instead of int8 as in ocl target implementation argumentMetadata.push_back( metaString(basicTypeToString(type))); - argumentMetadata.push_back(metaString("air.arg_name")); - argumentMetadata.push_back(metaString(vd->ident->toChars())); + argumentMetadata.push_back( + metaString("air.arg_name")); + argumentMetadata.push_back( + metaString(vd->ident->toChars())); } llvm::Metadata *metaInt(int n) { diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index 6ddc38f43be..1ed33e5dc96 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -25,13 +25,14 @@ enum ReflectTarget : uint * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y * use 0 as a wildcard to match any version. + * This is mostly used for selecting the correct intrinsic for the * given target and version, but could also be used to tailor for * performance characteristics. See dcompute.std.index for an example * of how to select intrinsics. */ pure nothrow @nogc -extern (C) bool __dcompute_reflect(ReflectTarget t, uint _version = 0); +extern(C) bool __dcompute_reflect(ReflectTarget t, uint _version = 0); ///Readability aliases for compute enum CompileFor : int @@ -71,12 +72,10 @@ struct compute + } + --- +/ -private struct _kernel -{ +private struct _kernel { size_t[3] bounds; } - -_kernel kernel(size_t[3] a = [1, 1, 1]) => _kernel(a); +_kernel kernel(size_t[3] a = [1,1,1]) => _kernel(a); /++ + DCompute has the notion of adress spaces, provide by the magic structs below. @@ -105,18 +104,18 @@ struct Variable(AddrSpace as, T) enum AddrSpace : uint { - Private = 0, - Global = 1, - Shared = 2, + Private = 0, + Global = 1, + Shared = 2, Constant = 3, - Generic = 4, + Generic = 4, } -alias PrivatePointer(T) = Pointer!(AddrSpace.Private, T); -alias GlobalPointer(T) = Pointer!(AddrSpace.Global, T); -alias SharedPointer(T) = Pointer!(AddrSpace.Shared, T); +alias PrivatePointer(T) = Pointer!(AddrSpace.Private, T); +alias GlobalPointer(T) = Pointer!(AddrSpace.Global, T); +alias SharedPointer(T) = Pointer!(AddrSpace.Shared, T); alias ConstantPointer(T) = Pointer!(AddrSpace.Constant, immutable(T)); -alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); +alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); // N.B private variables are declared on the stack and so cannot be declared // at module scope. @@ -125,6 +124,6 @@ alias GenericPointer(T) = Pointer!(AddrSpace.Generic, T); // // The __gshared below does not work. It is kludged into place in `DtoResolveVariable` -alias Global(T) = /*__gshared*/ Variable!(AddrSpace.Global, T); -alias Shared(T) = shared Variable!(AddrSpace.Shared, T); -alias Constant(T) = immutable Variable!(AddrSpace.Constant, T); +alias Global(T) = /*__gshared*/ Variable!(AddrSpace.Global, T); +alias Shared(T) = shared Variable!(AddrSpace.Shared, T); +alias Constant(T) = immutable Variable!(AddrSpace.Constant, T); From fce3b8e920b60ff58087e1ffe083d26b1d6bef9a Mon Sep 17 00:00:00 2001 From: Asadbek Date: Wed, 29 Apr 2026 16:21:32 +0200 Subject: [PATCH 12/20] Add falltrough comment and remove unnecessary change --- driver/dcomputecodegenerator.cpp | 1 - gen/abi/abi.cpp | 6 +----- runtime/druntime/src/ldc/dcompute.d | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index aaf1c257ec2..a427c0ac13f 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -51,7 +51,6 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } - if (s.substr(0, 5) == "cuda-") { #if LDC_LLVM_SUPPORTED_TARGET_NVPTX #define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\ diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index 7e5ab125ad2..d592ca5ffb8 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -288,11 +288,7 @@ TargetABI *TargetABI::getTarget() { if (global.params.targetTriple->getArchName() == "air64") { return createMetalABI(); } - - warning(Loc(), - "unknown target ABI, falling back to generic implementation. C/C++ " - "interop will almost certainly NOT work."); - return new TargetABI; + // fallthrough default: warning(Loc(), diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index 1ed33e5dc96..a8d8394ac1e 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -25,7 +25,6 @@ enum ReflectTarget : uint * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y * use 0 as a wildcard to match any version. - * This is mostly used for selecting the correct intrinsic for the * given target and version, but could also be used to tailor for * performance characteristics. See dcompute.std.index for an example From 3615c1b8b5ff1c1e24e6970f28c13a49234ec7d0 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Thu, 30 Apr 2026 15:19:06 +0200 Subject: [PATCH 13/20] Remove unused include and whitespace --- driver/dcomputecodegenerator.cpp | 1 - gen/dcompute/abi-rewrites.h | 1 - gen/dcompute/targetMetal.cpp | 2 +- runtime/druntime/src/ldc/dcompute.d | 2 ++ 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index a427c0ac13f..70496413c09 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -67,7 +67,6 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } - #define STR(...) #__VA_ARGS__ #define XSTR(x) STR(x) diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h index 0a159b67633..f3b2292179f 100644 --- a/gen/dcompute/abi-rewrites.h +++ b/gen/dcompute/abi-rewrites.h @@ -14,7 +14,6 @@ #pragma once #include "gen/abi/generic.h" -#include "gen/dcompute/druntime.h" struct DComputePointerRewrite : ABIRewrite { LLValue *put(DValue *v, bool isLValueExp, bool) override { diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index e81f38c8617..751f805585d 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -197,7 +197,7 @@ class TargetMetal : public DComputeTarget { }; } // anonymous namespace. -auto createMetalTarget(llvm::LLVMContext &c, int version) -> DComputeTarget * { +DComputeTarget* createMetalTarget(llvm::LLVMContext &c, int version) { return new TargetMetal(c, version); }; diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index a8d8394ac1e..bde43b44237 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -24,7 +24,9 @@ enum ReflectTarget : uint * arguments MUST be compiletime constants * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y + * and for Metal is 4.0.0 * use 0 as a wildcard to match any version. + * This is mostly used for selecting the correct intrinsic for the * given target and version, but could also be used to tailor for * performance characteristics. See dcompute.std.index for an example From e98a9c36ddb71dedc8f2b3618e95aeaddf15671b Mon Sep 17 00:00:00 2001 From: Asadbek Date: Thu, 30 Apr 2026 15:54:01 +0200 Subject: [PATCH 14/20] Update command line option for metal dcompute target --- driver/dcomputecodegenerator.cpp | 16 +++++++++++++--- gen/dcompute/targetMetal.cpp | 7 ++++--- runtime/druntime/src/ldc/dcompute.d | 2 +- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/driver/dcomputecodegenerator.cpp b/driver/dcomputecodegenerator.cpp index 70496413c09..14285322615 100644 --- a/driver/dcomputecodegenerator.cpp +++ b/driver/dcomputecodegenerator.cpp @@ -43,9 +43,15 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { #endif } - if (s.substr(0, 6) == "air64-") { + if (s.substr(0, 6) == "metal-") { #if LDC_LLVM_SUPPORTED_TARGET_AArch64 //&& LDC_LLVM_VER >= 2100 - return createMetalTarget(ctx, 64); +#define METAL_VALID_VER_INIT 400 + const std::array valid_metal_versions = {{METAL_VALID_VER_INIT}}; + const int v = atoi(s.c_str() + 6); + if (std::find(valid_metal_versions.begin(), valid_metal_versions.end(), v) != + valid_metal_versions.end()) { + return createMetalTarget(ctx, v); + } #else error(Loc(), "LDC was not built with Apple Metal Dcompute support!"); #endif @@ -72,14 +78,18 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) { error(Loc(), "Unrecognised or invalid DCompute targets: the format is ocl-xy0 " - "for OpenCl x.y and cuda-xy0 for CUDA CC x.y." + "for OpenCl x.y and cuda-xy0 for CUDA CC x.y and metal-xy0 for Metal x.y." #if LDC_LLVM_SUPPORTED_TARGET_SPIRV " Valid version strings for OpenCl are ocl-{" XSTR(OCL_VALID_VER_INIT) "}." #endif #if LDC_LLVM_SUPPORTED_TARGET_NVPTX " Valid version strings for CUDA are cuda-{" XSTR(CUDA_VALID_VER_INIT) "}." +#endif +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 + "Valid version strings for Metal are metal-{" XSTR(METAL_VALID_VER_INIT) "}" #endif ); + #undef XSTR #undef STR diff --git a/gen/dcompute/targetMetal.cpp b/gen/dcompute/targetMetal.cpp index 751f805585d..c8312e39c38 100644 --- a/gen/dcompute/targetMetal.cpp +++ b/gen/dcompute/targetMetal.cpp @@ -56,11 +56,12 @@ class TargetMetal : public DComputeTarget { llvm::NamedMDNode *airLangVersion = _ir->module.getOrInsertNamedMetadata("air.language_version"); + std::array langArr = { metaString("Metal"), - metaInt(4), - metaInt(0), - metaInt(0), + metaInt(tversion / 100), + metaInt((tversion / 10) % 10), + metaInt(tversion % 10), }; airLangVersion->addOperand(llvm::MDTuple::get(ctx, langArr)); diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index bde43b44237..4f47dd38896 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -24,7 +24,7 @@ enum ReflectTarget : uint * arguments MUST be compiletime constants * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y - * and for Metal is 4.0.0 + * and for Metal is 400 * use 0 as a wildcard to match any version. * This is mostly used for selecting the correct intrinsic for the From 13ba2222b9cb966cdb8aa87590d96265c638016d Mon Sep 17 00:00:00 2001 From: Asadbek Date: Thu, 30 Apr 2026 15:55:02 +0200 Subject: [PATCH 15/20] Update commented doc about metal dcompute target version --- runtime/druntime/src/ldc/dcompute.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/druntime/src/ldc/dcompute.d b/runtime/druntime/src/ldc/dcompute.d index 4f47dd38896..ec807b58041 100644 --- a/runtime/druntime/src/ldc/dcompute.d +++ b/runtime/druntime/src/ldc/dcompute.d @@ -24,7 +24,7 @@ enum ReflectTarget : uint * arguments MUST be compiletime constants * valid values of _version are for OpenCL 100 110 120 200 210 * and for CUDA are x*100 + y*10 for x any valid values of sm x.y - * and for Metal is 400 + * and 400 for Metal 4.0.0 * use 0 as a wildcard to match any version. * This is mostly used for selecting the correct intrinsic for the From d6b2612e16ee62d759f356eab104f4ed4fa9cb62 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sun, 3 May 2026 00:03:03 +0200 Subject: [PATCH 16/20] Add codegen for metallib through xcrun -sdk macosx metallib terminal linker --- dmd/globals.h | 1 + driver/targetmachine.cpp | 10 +++++-- driver/targetmachine.h | 2 +- driver/toobj.cpp | 59 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 5 deletions(-) diff --git a/dmd/globals.h b/dmd/globals.h index bad8e9c4100..d38f45c9956 100644 --- a/dmd/globals.h +++ b/dmd/globals.h @@ -393,6 +393,7 @@ const DString ll_ext = "ll"; const DString mlir_ext = "mlir"; const DString bc_ext = "bc"; const DString s_ext = "s"; +const DString metallib_ext = "metallib"; #endif struct CompileEnv diff --git a/driver/targetmachine.cpp b/driver/targetmachine.cpp index 6f85efd6603..c13949eee5a 100644 --- a/driver/targetmachine.cpp +++ b/driver/targetmachine.cpp @@ -653,6 +653,12 @@ ComputeBackend::Type getComputeTargetType(llvm::Module* m) { return ComputeBackend::SPIRV; else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64) return ComputeBackend::NVPTX; - else - return ComputeBackend::None; + + llvm::StringRef tripleString = m->getTargetTriple(); + + + if (tripleString.starts_with("air64")) + return ComputeBackend::METAL; + + return ComputeBackend::None; } diff --git a/driver/targetmachine.h b/driver/targetmachine.h index e0c21ee1407..bf5c97625ea 100644 --- a/driver/targetmachine.h +++ b/driver/targetmachine.h @@ -41,7 +41,7 @@ class Module; } namespace ComputeBackend { -enum Type { None, SPIRV, NVPTX }; +enum Type { None, SPIRV, NVPTX, METAL }; } ComputeBackend::Type getComputeTargetType(llvm::Module*); diff --git a/driver/toobj.cpp b/driver/toobj.cpp index 211d8d88f46..4ee92725abb 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -38,7 +38,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/IR/Module.h" #include -#include namespace llvm { namespace codegen { @@ -73,6 +72,62 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, #endif } +#ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64 + if (cb == ComputeBackend::METAL) { + + // Terminate upon errors during the LLVM passes. + if (global.errors || global.warnings) { + Logger::println("Aborting because of errors/warnings during LLVM passes"); + fatal(); + } + + { + std::error_code errinfo; + llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None); + if (errinfo) { + error(Loc(), "cannot write file '%s': %s", filename, + errinfo.message().c_str()); + fatal(); + } + + llvm::WriteBitcodeToFile(m, out.os()); + + out.keep(); + } + + uint64_t fileSize = 0; + llvm::sys::fs::file_size(filename, fileSize); + + auto xcrunpath = llvm::sys::findProgramByName("xcrun"); + if (!xcrunpath) { + error(Loc(), "xcrun not found - XCode should be installed first!"); + fatal(); + } + + llvm::SmallString<256> metallibOutPath; + llvm::sys::fs::current_path(metallibOutPath); + llvm::sys::path::append(metallibOutPath, llvm::sys::path::filename(filename)); + llvm::sys::path::replace_extension(metallibOutPath, "metallib"); + + + std::vector args = { + xcrunpath.get(), "-sdk", "macosx", "metallib", filename, "-o", metallibOutPath.c_str() + }; + + std::string errorMsg; + + int status = executeToolAndWait(Loc(), args[0], args); + + if (status < 0) { + error(Loc(), "program received signal %d (%s)", -status, + strsignal(-status)); + fatal(); + } + + return; + } +#endif + std::error_code errinfo; llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None); if (errinfo) { @@ -468,5 +523,5 @@ void writeModule(llvm::Module *m, const char *filename) { if (useIR2ObjCache) { cache::cacheObjectFile(filename, moduleHash); } - } + } } From 392acdf65a5df0423939f9aebce2b783843e42fb Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sun, 3 May 2026 00:07:49 +0200 Subject: [PATCH 17/20] Move the error handling to line after writing the bitcode --- driver/toobj.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/driver/toobj.cpp b/driver/toobj.cpp index 4ee92725abb..9d4c005dd96 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -74,13 +74,6 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, #ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64 if (cb == ComputeBackend::METAL) { - - // Terminate upon errors during the LLVM passes. - if (global.errors || global.warnings) { - Logger::println("Aborting because of errors/warnings during LLVM passes"); - fatal(); - } - { std::error_code errinfo; llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None); @@ -93,6 +86,12 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, llvm::WriteBitcodeToFile(m, out.os()); out.keep(); + + // Terminate upon errors during the LLVM passes. + if (global.errors || global.warnings) { + Logger::println("Aborting because of errors/warnings during LLVM passes"); + fatal(); + } } uint64_t fileSize = 0; From cfb8a02f9f3b79c6140216b328b7d2d0efa67d2c Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sun, 3 May 2026 00:09:10 +0200 Subject: [PATCH 18/20] remove empty spaces and unused variables --- driver/targetmachine.cpp | 1 - driver/toobj.cpp | 3 --- 2 files changed, 4 deletions(-) diff --git a/driver/targetmachine.cpp b/driver/targetmachine.cpp index c13949eee5a..3a8c1598310 100644 --- a/driver/targetmachine.cpp +++ b/driver/targetmachine.cpp @@ -655,7 +655,6 @@ ComputeBackend::Type getComputeTargetType(llvm::Module* m) { return ComputeBackend::NVPTX; llvm::StringRef tripleString = m->getTargetTriple(); - if (tripleString.starts_with("air64")) return ComputeBackend::METAL; diff --git a/driver/toobj.cpp b/driver/toobj.cpp index 9d4c005dd96..d0d2d4916b4 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -94,9 +94,6 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, } } - uint64_t fileSize = 0; - llvm::sys::fs::file_size(filename, fileSize); - auto xcrunpath = llvm::sys::findProgramByName("xcrun"); if (!xcrunpath) { error(Loc(), "xcrun not found - XCode should be installed first!"); From 4c3010544b759e570e6b2fe2080a1feedb5997b5 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Sun, 3 May 2026 00:13:21 +0200 Subject: [PATCH 19/20] remove unnecessary whitespaces --- driver/toobj.cpp | 1 - gen/abi/metal.cpp | 1 - gen/dcompute/abi-rewrites.h | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/driver/toobj.cpp b/driver/toobj.cpp index d0d2d4916b4..a81ac219e4b 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -105,7 +105,6 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, llvm::sys::path::append(metallibOutPath, llvm::sys::path::filename(filename)); llvm::sys::path::replace_extension(metallibOutPath, "metallib"); - std::vector args = { xcrunpath.get(), "-sdk", "macosx", "metallib", filename, "-o", metallibOutPath.c_str() }; diff --git a/gen/abi/metal.cpp b/gen/abi/metal.cpp index 6035d651e48..904f3e88082 100644 --- a/gen/abi/metal.cpp +++ b/gen/abi/metal.cpp @@ -14,7 +14,6 @@ #include "dmd/mtype.h" #include - using namespace dmd; struct MetalABI : TargetABI { diff --git a/gen/dcompute/abi-rewrites.h b/gen/dcompute/abi-rewrites.h index f3b2292179f..ead84da2875 100644 --- a/gen/dcompute/abi-rewrites.h +++ b/gen/dcompute/abi-rewrites.h @@ -35,7 +35,7 @@ struct DComputePointerRewrite : ABIRewrite { struct DcomputeMetalScalarRewrite : ABIRewrite { LLType *type(Type* t) override { - // XXXX: Scalar variables are stored in the constant memory space for Metal GPU + // XXX: Scalar variables are stored in the constant memory space for Metal GPU return llvm::PointerType::get(gIR->context(), 2/*Constant Memory space*/); } From 8e1f05f6bd5426f5ae0aec7dd5197faef14c4721 Mon Sep 17 00:00:00 2001 From: Asadbek Date: Mon, 11 May 2026 10:43:26 +0200 Subject: [PATCH 20/20] Fix CI error and add function inline pass for non-kernel functions --- dmd/globals.h | 1 - driver/targetmachine.cpp | 11 +++++--- driver/toobj.cpp | 55 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/dmd/globals.h b/dmd/globals.h index d38f45c9956..bad8e9c4100 100644 --- a/dmd/globals.h +++ b/dmd/globals.h @@ -393,7 +393,6 @@ const DString ll_ext = "ll"; const DString mlir_ext = "mlir"; const DString bc_ext = "bc"; const DString s_ext = "s"; -const DString metallib_ext = "metallib"; #endif struct CompileEnv diff --git a/driver/targetmachine.cpp b/driver/targetmachine.cpp index 3a8c1598310..4187f85144b 100644 --- a/driver/targetmachine.cpp +++ b/driver/targetmachine.cpp @@ -654,10 +654,15 @@ ComputeBackend::Type getComputeTargetType(llvm::Module* m) { else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64) return ComputeBackend::NVPTX; - llvm::StringRef tripleString = m->getTargetTriple(); - + +#if LDC_LLVM_SUPPORTED_TARGET_AArch64 && LLVM_VERSION_MAJOR >= 21 + llvm::StringRef tripleString = m->getTargetTriple().str(); +#else + llvm::StringRef tripleString = m->getTargetTriple(); +#endif + + if (tripleString.starts_with("air64")) return ComputeBackend::METAL; - return ComputeBackend::None; } diff --git a/driver/toobj.cpp b/driver/toobj.cpp index a81ac219e4b..91b176f4cd3 100644 --- a/driver/toobj.cpp +++ b/driver/toobj.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/IR/Module.h" #include @@ -57,6 +58,40 @@ void runDLLImportRelocationPass(llvm::TargetMachine &Target, llvm::Module &m) { pm.run(m); } +void inlineDComputeKernelFunctions(llvm::Module *m) { + // Create a PassManager to hold and optimize the collection of passes we are + // about to build. + llvm::legacy::PassManager Passes; + + llvm::SmallPtrSet kernelFunctions; + + // Extract all the kernel functions + if (auto *kernelMetadata = m->getNamedMetadata("air.kernel")) { + for(auto *op: kernelMetadata->operands()) { + if (auto *F = llvm::mdconst::dyn_extract(op->getOperand(0))) { + kernelFunctions.insert(F); + } + } + } + + // Prepare non-kernel functions to be inlined + for(auto& F: *m) { + if (!F.isDeclaration() && !kernelFunctions.contains(&F)) { + F.addFnAttr(llvm::Attribute::AlwaysInline); + } + } + + Passes.add(llvm::createAlwaysInlinerLegacyPass()); + + Passes.run(*m); + + // Terminate upon errors during the LLVM passes. + if (global.errors || global.warnings) { + error(Loc(), "Aborting because of errors/warnings during LLVM passes"); + fatal(); + } +} + // based on llc code, University of Illinois Open Source License void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, const char *filename, @@ -75,6 +110,9 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, #ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64 if (cb == ComputeBackend::METAL) { { + // Inline non-kernel functions for Metal dcompute target + inlineDComputeKernelFunctions(&m); + std::error_code errinfo; llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None); if (errinfo) { @@ -82,14 +120,14 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, errinfo.message().c_str()); fatal(); } - + llvm::WriteBitcodeToFile(m, out.os()); out.keep(); // Terminate upon errors during the LLVM passes. if (global.errors || global.warnings) { - Logger::println("Aborting because of errors/warnings during LLVM passes"); + error(Loc(), "Aborting because of errors/warnings during LLVM passes!"); fatal(); } } @@ -104,7 +142,7 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m, llvm::sys::fs::current_path(metallibOutPath); llvm::sys::path::append(metallibOutPath, llvm::sys::path::filename(filename)); llvm::sys::path::replace_extension(metallibOutPath, "metallib"); - + std::vector args = { xcrunpath.get(), "-sdk", "macosx", "metallib", filename, "-o", metallibOutPath.c_str() }; @@ -353,6 +391,15 @@ std::string replaceExtensionWith(const DArray &ext, } void writeModule(llvm::Module *m, const char *filename) { +// Inline non-kernel functions for Metal dcompute target +#ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64 + const ComputeBackend::Type cb = getComputeTargetType(m); + + if (cb == ComputeBackend::METAL) { + inlineDComputeKernelFunctions(m); + } +#endif + const bool doLTO = opts::isUsingLTO(); const bool outputObj = shouldOutputObjectFile(); const bool assembleExternally = shouldAssembleExternally(); @@ -518,5 +565,5 @@ void writeModule(llvm::Module *m, const char *filename) { if (useIR2ObjCache) { cache::cacheObjectFile(filename, moduleHash); } - } + } }