Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion driver/cl_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ cl::opt<unsigned>
cl::desc("Warn for stack size bigger than the given number"),
cl::value_desc("threshold"));

#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64
cl::list<std::string>
dcomputeTargets("mdcompute-targets", cl::CommaSeparated,
cl::desc("Generates code for the specified DCompute target"
Expand Down
2 changes: 1 addition & 1 deletion driver/cl_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ extern cl::opt<std::string> saveOptimizationRecord;

extern cl::opt<unsigned> fWarnStackSize;

#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64
extern cl::list<std::string> dcomputeTargets;
extern cl::opt<std::string> dcomputeFilePrefix;
#endif
Expand Down
24 changes: 21 additions & 3 deletions driver/dcomputecodegenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include <algorithm>

#if !(LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX)
#if !(LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64)

DComputeCodeGenManager::DComputeCodeGenManager(llvm::LLVMContext &c) : ctx(c) {}
void DComputeCodeGenManager::emit(Module *) {}
Expand All @@ -43,6 +43,20 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) {
#endif
}

if (s.substr(0, 6) == "metal-") {
#if LDC_LLVM_SUPPORTED_TARGET_AArch64 //&& LDC_LLVM_VER >= 2100
#define METAL_VALID_VER_INIT 400
const std::array<int, 6> valid_metal_versions = {{METAL_VALID_VER_INIT}};
const int v = atoi(s.c_str() + 6);
if (std::find(valid_metal_versions.begin(), valid_metal_versions.end(), v) !=
valid_metal_versions.end()) {
return createMetalTarget(ctx, v);
}
#else
error(Loc(), "LDC was not built with Apple Metal Dcompute support!");
#endif
}

if (s.substr(0, 5) == "cuda-") {
#if LDC_LLVM_SUPPORTED_TARGET_NVPTX
#define CUDA_VALID_VER_INIT 100, 110, 120, 130, 200, 210, 300, 350, 370,\
Expand All @@ -64,14 +78,18 @@ DComputeCodeGenManager::createComputeTarget(const std::string &s) {

error(Loc(),
"Unrecognised or invalid DCompute targets: the format is ocl-xy0 "
"for OpenCl x.y and cuda-xy0 for CUDA CC x.y."
"for OpenCl x.y and cuda-xy0 for CUDA CC x.y and metal-xy0 for Metal x.y."
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV
" Valid version strings for OpenCl are ocl-{" XSTR(OCL_VALID_VER_INIT) "}."
#endif
#if LDC_LLVM_SUPPORTED_TARGET_NVPTX
" Valid version strings for CUDA are cuda-{" XSTR(CUDA_VALID_VER_INIT) "}."
#endif
#if LDC_LLVM_SUPPORTED_TARGET_AArch64
"Valid version strings for Metal are metal-{" XSTR(METAL_VALID_VER_INIT) "}"
#endif
);


#undef XSTR
#undef STR
Expand Down Expand Up @@ -106,4 +124,4 @@ DComputeCodeGenManager::~DComputeCodeGenManager() {
gTargetMachine = oldGTargetMachine;
}

#endif // LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
#endif // LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64
2 changes: 1 addition & 1 deletion driver/dcomputecodegenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace llvm {
class DComputeCodeGenManager {

llvm::LLVMContext &ctx;
llvm::SmallVector<DComputeTarget *, 2> targets;
llvm::SmallVector<DComputeTarget *, 3> targets;
DComputeTarget *createComputeTarget(const std::string &s);
IRState *oldGIR = nullptr;
llvm::TargetMachine *oldGTargetMachine = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion driver/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ void registerPredefinedVersions() {
VersionCondition::addPredefinedGlobalIdent("all");
VersionCondition::addPredefinedGlobalIdent("D_Version2");

#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64
if (dcomputeTargets.size() != 0) {
VersionCondition::addPredefinedGlobalIdent("LDC_DCompute");
}
Expand Down
14 changes: 12 additions & 2 deletions driver/targetmachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,16 @@ ComputeBackend::Type getComputeTargetType(llvm::Module* m) {
return ComputeBackend::SPIRV;
else if (a == llvm::Triple::nvptx || a == llvm::Triple::nvptx64)
return ComputeBackend::NVPTX;
else
return ComputeBackend::None;


#if LDC_LLVM_SUPPORTED_TARGET_AArch64 && LLVM_VERSION_MAJOR >= 21
llvm::StringRef tripleString = m->getTargetTriple().str();
#else
llvm::StringRef tripleString = m->getTargetTriple();
#endif


if (tripleString.starts_with("air64"))
return ComputeBackend::METAL;
return ComputeBackend::None;
}
2 changes: 1 addition & 1 deletion driver/targetmachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Module;
}

namespace ComputeBackend {
enum Type { None, SPIRV, NVPTX };
enum Type { None, SPIRV, NVPTX, METAL };
}

ComputeBackend::Type getComputeTargetType(llvm::Module*);
Expand Down
99 changes: 98 additions & 1 deletion driver/toobj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/IR/Module.h"
#include <cstddef>
#include <fstream>

namespace llvm {
namespace codegen {
Expand All @@ -58,6 +58,40 @@ void runDLLImportRelocationPass(llvm::TargetMachine &Target, llvm::Module &m) {
pm.run(m);
}

void inlineDComputeKernelFunctions(llvm::Module *m) {
// Create a PassManager to hold and optimize the collection of passes we are
// about to build.
llvm::legacy::PassManager Passes;

llvm::SmallPtrSet<llvm::Function*, 8> kernelFunctions;

// Extract all the kernel functions
if (auto *kernelMetadata = m->getNamedMetadata("air.kernel")) {
for(auto *op: kernelMetadata->operands()) {
if (auto *F = llvm::mdconst::dyn_extract<llvm::Function>(op->getOperand(0))) {
kernelFunctions.insert(F);
}
}
}

// Prepare non-kernel functions to be inlined
for(auto& F: *m) {
if (!F.isDeclaration() && !kernelFunctions.contains(&F)) {
F.addFnAttr(llvm::Attribute::AlwaysInline);
}
}

Passes.add(llvm::createAlwaysInlinerLegacyPass());

Passes.run(*m);

// Terminate upon errors during the LLVM passes.
if (global.errors || global.warnings) {
error(Loc(), "Aborting because of errors/warnings during LLVM passes");
fatal();
}
}

// based on llc code, University of Illinois Open Source License
void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
const char *filename,
Expand All @@ -73,6 +107,60 @@ void codegenModule(llvm::TargetMachine &Target, llvm::Module &m,
#endif
}

#ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64
if (cb == ComputeBackend::METAL) {
{
// Inline non-kernel functions for Metal dcompute target
inlineDComputeKernelFunctions(&m);

std::error_code errinfo;
llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None);
if (errinfo) {
error(Loc(), "cannot write file '%s': %s", filename,
errinfo.message().c_str());
fatal();
}

llvm::WriteBitcodeToFile(m, out.os());

out.keep();

// Terminate upon errors during the LLVM passes.
if (global.errors || global.warnings) {
error(Loc(), "Aborting because of errors/warnings during LLVM passes!");
fatal();
}
}

auto xcrunpath = llvm::sys::findProgramByName("xcrun");
if (!xcrunpath) {
error(Loc(), "xcrun not found - XCode should be installed first!");
fatal();
}

llvm::SmallString<256> metallibOutPath;
llvm::sys::fs::current_path(metallibOutPath);
llvm::sys::path::append(metallibOutPath, llvm::sys::path::filename(filename));
llvm::sys::path::replace_extension(metallibOutPath, "metallib");

std::vector<std::string> args = {
xcrunpath.get(), "-sdk", "macosx", "metallib", filename, "-o", metallibOutPath.c_str()
};

std::string errorMsg;

int status = executeToolAndWait(Loc(), args[0], args);

if (status < 0) {
error(Loc(), "program received signal %d (%s)", -status,
strsignal(-status));
fatal();
}

return;
}
#endif

std::error_code errinfo;
llvm::ToolOutputFile out(filename, errinfo, llvm::sys::fs::OF_None);
if (errinfo) {
Expand Down Expand Up @@ -303,6 +391,15 @@ std::string replaceExtensionWith(const DArray<const char> &ext,
}

void writeModule(llvm::Module *m, const char *filename) {
// Inline non-kernel functions for Metal dcompute target
#ifdef LDC_LLVM_SUPPORTED_TARGET_AArch64
const ComputeBackend::Type cb = getComputeTargetType(m);

if (cb == ComputeBackend::METAL) {
inlineDComputeKernelFunctions(m);
}
#endif

const bool doLTO = opts::isUsingLTO();
const bool outputObj = shouldOutputObjectFile();
const bool assembleExternally = shouldAssembleExternally();
Expand Down
10 changes: 7 additions & 3 deletions gen/abi/abi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

#include "dmd/argtypes.h"
#include "dmd/expression.h"
#include "dmd/id.h"
#include "dmd/identifier.h"
#include "dmd/target.h"
#include "gen/abi/targets.h"
#include "gen/abi/generic.h"
Expand All @@ -24,7 +22,6 @@
#include "gen/tollvm.h"
#include "ir/irfunction.h"
#include "ir/irfuncty.h"
#include <algorithm>

using namespace dmd;

Expand Down Expand Up @@ -286,6 +283,13 @@ TargetABI *TargetABI::getTarget() {
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
return getWasmTargetABI();

case llvm::Triple::UnknownArch:
if (global.params.targetTriple->getArchName() == "air64") {
return createMetalABI();
}
// fallthrough

default:
warning(Loc(),
"unknown target ABI, falling back to generic implementation. C/C++ "
Expand Down
60 changes: 60 additions & 0 deletions gen/abi/metal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===-- gen/abi-metal.cpp ---------------------------------------*- C++ -*-===//
//
// LDC – the LLVM D compiler
//
// This file is distributed under the BSD-style LDC license. See the LICENSE
// file for details.
//
//===----------------------------------------------------------------------===//

#include "gen/abi/abi.h"
#include "gen/dcompute/druntime.h"
#include "gen/dcompute/abi-rewrites.h"
#include "ir/irfuncty.h"
#include "dmd/mtype.h"
#include <optional>

using namespace dmd;

struct MetalABI : TargetABI {
DComputePointerRewrite pointerRewite;
DcomputeMetalScalarRewrite metalScalarRewrite;

auto returnInArg(TypeFunction *tf, bool needsThis) -> bool override {
return false;
}

auto passByVal(TypeFunction *tf, Type*t) -> bool override {
return false;
}

void rewriteFunctionType(IrFuncTy &fty) override {
for (auto arg : fty.args) {
if (!arg->byref) {
rewriteArgument(fty, *arg);
}
}
}

void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override {
TargetABI::rewriteArgument(fty, arg);

if (arg.rewrite) {
return;
}

Type *ty = arg.type->toBasetype();
std::optional<DcomputePointer> ptr;

if (ty->ty == TY::Tstruct &&
(ptr = toDcomputePointer(static_cast<TypeStruct *>(ty)->sym))) {
pointerRewite.applyTo(arg);
}

if (ty->isScalar()) {
metalScalarRewrite.applyTo(arg);
}
}
};

TargetABI* createMetalABI() { return new MetalABI(); }
2 changes: 2 additions & 0 deletions gen/abi/targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ TargetABI *getX86TargetABI();
TargetABI *getLoongArch64TargetABI();

TargetABI *getWasmTargetABI();

TargetABI* createMetalABI();
16 changes: 16 additions & 0 deletions gen/dcompute/abi-rewrites.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,19 @@ struct DComputePointerRewrite : ABIRewrite {
return ptr->toLLVMType(true);
}
};

struct DcomputeMetalScalarRewrite : ABIRewrite {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this now unused?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it still used in abi/metal.cpp file:

struct MetalABI : TargetABI {
    DComputePointerRewrite pointerRewite;
    DcomputeMetalScalarRewrite metalScalarRewrite;
.....

It is used to make scalar objects turn into a pointer in constant memory address space. Although it turns out scalars can also be stored in device level address space which will be Global in the dcompute term as I understand

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scalar arguments to kernels are just passed as parameters

LLType *type(Type* t) override {
// XXX: Scalar variables are stored in the constant memory space for Metal GPU
return llvm::PointerType::get(gIR->context(), 2/*Constant Memory space*/);
}

LLValue *getLVal(Type *dty, LLValue *v) override {
return v;
}

LLValue *put(DValue *v, bool isLValueExp, bool) override {
auto value = DtoRVal(v);
return value;
}
};
2 changes: 1 addition & 1 deletion gen/dcompute/target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//

#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX
#if LDC_LLVM_SUPPORTED_TARGET_SPIRV || LDC_LLVM_SUPPORTED_TARGET_NVPTX || LDC_LLVM_SUPPORTED_TARGET_AArch64

#include "dmd/dsymbol.h"
#include "dmd/errors.h"
Expand Down
6 changes: 5 additions & 1 deletion gen/dcompute/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class DComputeTarget {
public:
llvm::LLVMContext &ctx;
int tversion; // OpenCL or CUDA CC version:major*100 + minor*10
enum class ID { Host = 0, OpenCL = 1, CUDA = 2 };
enum class ID { Host = 0, OpenCL = 1, CUDA = 2, Metal = 3 };
ID target; // ID for codegen time conditional compilation.
const char *short_name;
const char *binSuffix;
Expand Down Expand Up @@ -58,6 +58,10 @@ class DComputeTarget {
DComputeTarget *createCUDATarget(llvm::LLVMContext &c, int sm);
#endif

#if LDC_LLVM_SUPPORTED_TARGET_AArch64
DComputeTarget* createMetalTarget(llvm::LLVMContext &c, int version);
#endif

#if LDC_LLVM_SUPPORTED_TARGET_SPIRV
DComputeTarget *createOCLTarget(llvm::LLVMContext &c, int oclver);
#endif
Loading
Loading