diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index 74543885e32af..56b559b3e8e99 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -8,7 +8,8 @@ // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-spirv.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SIMPLE-FO // SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv +// SIMPLE-FO-NEXT: "{{.*}}sycl-post-link{{.*}}" {{.*}} [[LLVMLINKOUT]].bc +// SIMPLE-FO-NEXT: LLVM backend: input: {{.*}}.bc, output: {{.*}}_0.spv // // Test that IMG_SPIRV image kind is set for non-AOT compilation. // RUN: llvm-objdump --offloading %t-spirv.out | FileCheck %s --check-prefix=IMAGE-KIND-SPIRV @@ -20,8 +21,9 @@ // RUN: touch %t.dir/lib2.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc --library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=DEVLIBS -// DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc -// DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv +// DEVLIBS: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc +// DEVLIBS-NEXT: "{{.*}}sycl-post-link{{.*}}" {{.*}} [[LLVMLINKOUT]].bc +// DEVLIBS-NEXT: LLVM backend: input: {{.*}}.bc, output: a_0.spv // // Test a simple case with a random file (not bitcode) as input. // RUN: touch %t.o @@ -42,7 +44,8 @@ // RUN: --ocloc-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU // AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv +// AOT-INTEL-GPU-NEXT: "{{.*}}sycl-post-link{{.*}}" {{.*}} [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-NEXT: LLVM backend: input: {{.*}}.bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv // // Test that IMG_Object image kind is set for AOT compilation (Intel GPU). @@ -54,7 +57,8 @@ // RUN: --opencl-aot-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU // AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv +// AOT-INTEL-CPU-NEXT: "{{.*}}sycl-post-link{{.*}}" {{.*}} [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-NEXT: LLVM backend: input: {{.*}}.bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv // // Test that IMG_Object image kind is set for AOT compilation (Intel CPU). @@ -69,3 +73,186 @@ // RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=NOTARGET // NOTARGET: Target triple must be specified +// +// ============================================================================ +// Tests for sycl-post-link functionality +// ============================================================================ +// +// Test that --use-sycl-post-link-tool (default) invokes external tool. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-tool.out 2>&1 \ +// RUN: --use-sycl-post-link-tool \ +// RUN: | FileCheck %s --check-prefix=USE-TOOL +// USE-TOOL: sycl-post-link{{.*}}.bc +// +// Test that --no-use-sycl-post-link-tool uses library API and requires --sycl-module-split-mode. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-lib.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=none \ +// RUN: | FileCheck %s --check-prefix=USE-LIB +// USE-LIB: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// USE-LIB-NOT: "{{.*}}sycl-post-link{{.*}}" +// +// Test that --sycl-module-split-mode cannot be used with --use-sycl-post-link-tool. +// RUN: not clang-sycl-linker --dry-run -triple=spirv64 %t_1.bc %t_2.bc -o %t-err.out 2>&1 \ +// RUN: --use-sycl-post-link-tool --sycl-module-split-mode=kernel \ +// RUN: | FileCheck %s --check-prefix=MODE-TOOL-ERROR +// MODE-TOOL-ERROR: --sycl-module-split-mode cannot be used with --use-sycl-post-link-tool +// +// Test --sycl-module-split-mode=kernel. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-perkernel.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=kernel \ +// RUN: | FileCheck %s --check-prefix=SPLIT-PER-KERNEL +// SPLIT-PER-KERNEL: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// +// Test --sycl-module-split-mode=source. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-pertu.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=source \ +// RUN: | FileCheck %s --check-prefix=SPLIT-PER-SOURCE +// SPLIT-PER-SOURCE: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// +// Test --sycl-module-split-mode=auto. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-auto.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=auto \ +// RUN: | FileCheck %s --check-prefix=SPLIT-AUTO +// SPLIT-AUTO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// +// Test --sycl-module-split-mode=none. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-none.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=none \ +// RUN: | FileCheck %s --check-prefix=SPLIT-NONE +// SPLIT-NONE: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// +// Test invalid split mode. +// RUN: not clang-sycl-linker --dry-run -triple=spirv64 %t_1.bc %t_2.bc -o %t-invalid.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=invalid \ +// RUN: | FileCheck %s --check-prefix=INVALID-SPLIT-MODE +// INVALID-SPLIT-MODE: Invalid split mode: invalid +// +// Test --sycl-device-code-split-esimd option is passed to tool mode. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-esimd.out 2>&1 \ +// RUN: --sycl-device-code-split-esimd \ +// RUN: | FileCheck %s --check-prefix=ESIMD-SPLIT +// ESIMD-SPLIT: sycl-post-link{{.*}}-split-esimd{{.*}} +// +// Test --no-sycl-device-code-split-esimd option is passed to tool mode. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-no-esimd.out 2>&1 \ +// RUN: --no-sycl-device-code-split-esimd \ +// RUN: | FileCheck %s --check-prefix=NO-ESIMD-SPLIT +// NO-ESIMD-SPLIT: sycl-post-link +// NO-ESIMD-SPLIT-NOT: -split-esimd +// +// Test --sycl-add-default-spec-consts-image for AOT target. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-spec-consts.out 2>&1 \ +// RUN: --sycl-add-default-spec-consts-image \ +// RUN: | FileCheck %s --check-prefix=SPEC-CONSTS-AOT +// SPEC-CONSTS-AOT: sycl-post-link{{.*}}-emit-only-kernels-as-entry-points{{.*}} +// +// Test --sycl-remove-unused-external-funcs for Intel GPU target. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-remove-funcs.out 2>&1 \ +// RUN: --sycl-remove-unused-external-funcs \ +// RUN: | FileCheck %s --check-prefix=REMOVE-FUNCS +// REMOVE-FUNCS: sycl-post-link{{.*}}-emit-only-kernels-as-entry-points{{.*}} +// +// Test --no-sycl-remove-unused-external-funcs keeps functions. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-keep-funcs.out 2>&1 \ +// RUN: --no-sycl-remove-unused-external-funcs \ +// RUN: | FileCheck %s --check-prefix=KEEP-FUNCS +// KEEP-FUNCS: sycl-post-link +// KEEP-FUNCS-NOT: -emit-only-kernels-as-entry-points +// +// Test --sycl-post-link-options passes custom options to tool. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-custom.out 2>&1 \ +// RUN: --sycl-post-link-options="--custom-opt1 --custom-opt2" \ +// RUN: | FileCheck %s --check-prefix=CUSTOM-OPTS +// CUSTOM-OPTS: sycl-post-link{{.*}}--custom-opt1 --custom-opt2{{.*}} +// +// Test triple-specific settings for SPIR-V target (native mode for spec constants). +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-spirv-native.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=SPIRV-NATIVE +// SPIRV-NATIVE: sycl-post-link{{.*}}-spec-const=native{{.*}} +// +// Test --no-sycl-add-default-spec-consts-image (default behavior). +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-no-spec-consts.out 2>&1 \ +// RUN: --no-sycl-add-default-spec-consts-image \ +// RUN: | FileCheck %s --check-prefix=NO-SPEC-CONSTS +// NO-SPEC-CONSTS: sycl-post-link +// NO-SPEC-CONSTS-NOT: -emit-default-spec-consts +// +// Test --sycl-allow-device-image-dependencies option. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-img-deps.out 2>&1 \ +// RUN: --sycl-allow-device-image-dependencies \ +// RUN: | FileCheck %s --check-prefix=IMAGE-DEPS +// IMAGE-DEPS: sycl-post-link +// IMAGE-DEPS-NOT: -emit-only-kernels-as-entry-points +// +// Test --sycl-thin-lto option suppresses -symbols flag (but not -emit-*-symbols). +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-thin-lto.out 2>&1 \ +// RUN: --sycl-thin-lto \ +// RUN: | FileCheck %s --check-prefix=THIN-LTO +// THIN-LTO: sycl-post-link{{.*}}-emit-exported-symbols +// THIN-LTO-NOT: {{[[:space:]]}}-symbols{{[[:space:]]}} +// +// Test --syclbin option enables kernel name emission. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-syclbin.out 2>&1 \ +// RUN: --syclbin=executable \ +// RUN: | FileCheck %s --check-prefix=SYCLBIN +// SYCLBIN: sycl-post-link{{.*}}-emit-kernel-names{{.*}} +// +// Test --sycl-device-library-location option is passed to tool. +// RUN: mkdir -p %t-devlib.dir +// RUN: touch %t-devlib.dir/libsycl-native-bfloat16.bc +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-devlib.out 2>&1 \ +// RUN: --sycl-device-library-location=%t-devlib.dir \ +// RUN: | FileCheck %s --check-prefix=DEVLIB-LOC +// DEVLIB-LOC: sycl-post-link{{.*}}--device-lib-dir={{.*}}devlib.dir{{.*}} +// +// ============================================================================ +// Tests for sycl-post-link function behavior +// ============================================================================ +// +// Test getSYCLPostLinkSettings() - verify triple-specific configuration. +// For Intel GPU (spirv64 with arch), should emit param info and entry point optimization. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-intel-gpu-settings.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=INTEL-GPU-SETTINGS +// INTEL-GPU-SETTINGS: sycl-post-link{{.*}}-spec-const=native +// INTEL-GPU-SETTINGS: -emit-only-kernels-as-entry-points +// INTEL-GPU-SETTINGS: -emit-param-info +// +// Test getSYCLPostLinkSettings() - verify ESIMD is split by default for SPIR-V. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-esimd-default.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ESIMD-DEFAULT +// ESIMD-DEFAULT: sycl-post-link{{.*}}-split-esimd{{.*}}-lower-esimd +// +// Test getTripleBasedSYCLPostLinkOpts() - verify it generates correct command-line args. +// Should include properties, symbols, exported/imported symbols for all targets. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-cmdline-args.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CMDLINE-ARGS +// CMDLINE-ARGS: sycl-post-link{{.*}}-properties{{.*}}-symbols{{.*}}-emit-exported-symbols{{.*}}-emit-imported-symbols +// +// Test runSYCLPostLinkTool() - verify tool is invoked with input/output files. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-tool-invoke.out 2>&1 \ +// RUN: --use-sycl-post-link-tool \ +// RUN: | FileCheck %s --check-prefix=TOOL-INVOKE +// TOOL-INVOKE: sycl-post-link{{.*}}-o {{.*}}.table {{.*}}.bc +// +// Test runSYCLPostLinkLibrary() - verify library mode doesn't print sycl-post-link command. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-lib-invoke.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=none \ +// RUN: | FileCheck %s --check-prefix=LIB-INVOKE +// LIB-INVOKE: sycl-device-link: +// LIB-INVOKE-NOT: "sycl-post-link" +// LIB-INVOKE: LLVM backend: +// +// Test that library mode with split mode produces split modules. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-lib-split.out 2>&1 \ +// RUN: --no-use-sycl-post-link-tool --sycl-module-split-mode=kernel \ +// RUN: | FileCheck %s --check-prefix=LIB-SPLIT +// LIB-SPLIT: sycl-device-link: +// LIB-SPLIT: LLVM backend: +// +// Test combination of multiple options together. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-combo.out 2>&1 \ +// RUN: --sycl-device-code-split-esimd --sycl-thin-lto \ +// RUN: --sycl-post-link-options="--debug" \ +// RUN: | FileCheck %s --check-prefix=COMBO +// COMBO: sycl-post-link{{.*}}-split-esimd{{.*}}--debug +// COMBO-NOT: {{[[:space:]]}}-symbols{{[[:space:]]}} diff --git a/clang/tools/clang-sycl-linker/CMakeLists.txt b/clang/tools/clang-sycl-linker/CMakeLists.txt index ee89e8b0a5570..1de7b3369be5c 100644 --- a/clang/tools/clang-sycl-linker/CMakeLists.txt +++ b/clang/tools/clang-sycl-linker/CMakeLists.txt @@ -12,6 +12,7 @@ set(LLVM_LINK_COMPONENTS Support Target TargetParser + SYCLPostLink ) set(LLVM_TARGET_DEFINITIONS SYCLLinkOpts.td) diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 99e1a78a97bf3..7cb4792fb44c1 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -46,11 +46,16 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/WithColor.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/SYCLPostLink/ModuleSplitter.h" +#include "llvm/SYCLPostLink/SYCLPostLink.h" using namespace llvm; using namespace llvm::opt; using namespace llvm::object; using namespace clang; +using namespace llvm::sycl_post_link; +using namespace llvm::module_split; +using namespace llvm::util; /// Print commands/steps with arguments without executing. static bool DryRun = false; @@ -64,6 +69,10 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +/// Global state for post-link configuration. +static std::optional SYCLModuleSplitMode; +static bool UseSYCLPostLinkTool = true; + using OffloadingImage = OffloadBinary::OffloadingImage; static void printVersion(raw_ostream &OS) { @@ -373,6 +382,244 @@ static Error runCodeGen(StringRef File, const ArgList &Args, return Error::success(); } +/// Creates and configures PostLinkSettings for SYCL post-link library processing. +/// NOTE: Any changes made here should be reflected in getTripleBasedSYCLPostLinkOpts(). +static PostLinkSettings getSYCLPostLinkSettings(const ArgList &Args, + const llvm::Triple Triple) { + PostLinkSettings Settings; + bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && + !Triple.isSPIRAOT() && !Triple.isNativeCPU()); + if (SpecConstsSupported) + Settings.SpecConstMode = SpecConstantsPass::HandlingMode::native; + else + Settings.SpecConstMode = SpecConstantsPass::HandlingMode::emulation; + + // On Intel targets we don't need non-kernel functions as entry points, + // because it only increases amount of code for device compiler to handle, + // without any actual benefits. + // TODO: Try to extend this feature for non-Intel GPUs. + if (!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs, + OPT_sycl_remove_unused_external_funcs, false) && + !Args.hasArg(OPT_sycl_allow_device_image_dependencies) && + !Triple.isNVPTX() && !Triple.isAMDGPU()) + Settings.EmitOnlyKernelsAsEntryPoints = true; + + if (!Triple.isAMDGCN()) + Settings.EmitParamInfo = true; + if (Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isNativeCPU()) + Settings.EmitProgramMetadata = true; + + if (Args.hasArg(OPT_syclbin_EQ)) + Settings.EmitKernelNames = true; + // Specialization constant info generation is mandatory - + // add options unconditionally. + Settings.EmitExportedSymbols = true; + Settings.EmitImportedSymbols = true; + + bool SplitEsimdByDefault = Triple.isSPIROrSPIRV(); + if (Args.hasFlag(OPT_sycl_device_code_split_esimd, + OPT_no_sycl_device_code_split_esimd, SplitEsimdByDefault)) + Settings.ESIMDOptions.SplitESIMD = true; + + // If the code doesn't contain ESIMD intrinsics then lowering has no effect. + // Otherwise, it is mandatory to lower ESIMD intrinsics. + // Therefore, it is always set true. + Settings.ESIMDOptions.LowerESIMD = true; + + bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isSPIRAOT(); + if (Args.hasFlag(OPT_sycl_add_default_spec_consts_image, + OPT_no_sycl_add_default_spec_consts_image, false) && + IsAOT) + Settings.GenerateModuleDescWithDefaultSpecConsts = true; + + Settings.SplitMode = Settings.ESIMDOptions.SplitMode = *SYCLModuleSplitMode; + // TODO: fill AllowDeviceImageDependencies, ESIMDOptions.OptLevel and + // ESIMDOptions.ForceDisableESIMDOpt + + return Settings; +} + +/// Add triple-based sycl-post-link options. +/// NOTE: Any changes made here should be reflected in getSYCLPostLinkSettings(). +static void getTripleBasedSYCLPostLinkOpts(const ArgList &Args, + SmallVector &PostLinkArgs, + const llvm::Triple Triple) { + bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && + !Triple.isSPIRAOT() && !Triple.isNativeCPU()); + if (SpecConstsSupported) + PostLinkArgs.push_back("-spec-const=native"); + else + PostLinkArgs.push_back("-spec-const=emulation"); + + // TODO: If we ever pass -ir-output-only based on the triple, + // make sure we don't pass -properties. + PostLinkArgs.push_back("-properties"); + + // On Intel targets we don't need non-kernel functions as entry points, + // because it only increases amount of code for device compiler to handle, + // without any actual benefits. + // TODO: Try to extend this feature for non-Intel GPUs. + if (!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs, + OPT_sycl_remove_unused_external_funcs, false) && + !Args.hasArg(OPT_sycl_allow_device_image_dependencies) && + !Triple.isNVPTX() && !Triple.isAMDGPU()) + PostLinkArgs.push_back("-emit-only-kernels-as-entry-points"); + + if (!Triple.isAMDGCN()) + PostLinkArgs.push_back("-emit-param-info"); + // Enable program metadata + if (Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isNativeCPU()) + PostLinkArgs.push_back("-emit-program-metadata"); + + bool SplitEsimd = + Args.hasFlag(OPT_sycl_device_code_split_esimd, + OPT_no_sycl_device_code_split_esimd, + Triple.isSPIROrSPIRV()); + + if (!Args.hasArg(OPT_sycl_thin_lto)) + PostLinkArgs.push_back("-symbols"); + // Emit kernel names if we are producing SYCLBIN. + if (Args.hasArg(OPT_syclbin_EQ)) + PostLinkArgs.push_back("-emit-kernel-names"); + // Specialization constant info generation is mandatory - + // add options unconditionally + PostLinkArgs.push_back("-emit-exported-symbols"); + PostLinkArgs.push_back("-emit-imported-symbols"); + if (SplitEsimd) + PostLinkArgs.push_back("-split-esimd"); + PostLinkArgs.push_back("-lower-esimd"); + + bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isSPIRAOT(); + if (Args.hasFlag(OPT_sycl_add_default_spec_consts_image, + OPT_no_sycl_add_default_spec_consts_image, false) && + IsAOT) + PostLinkArgs.push_back("-generate-device-image-default-spec-consts"); +} + +/// Run sycl-post-link tool for SYCL offloading. +static Expected> +runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args, + bool IsDevicePassedWithSyclTargetBackend) { + Expected SYCLPostLinkPath = + findProgram(Args, "sycl-post-link", {getMainExecutable("sycl-post-link")}); + if (!SYCLPostLinkPath) + return SYCLPostLinkPath.takeError(); + + // Create a new file to write the output of sycl-post-link to. + auto TempFileOrErr = + createTempFile(Args, sys::path::filename(OutputFile), "table"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + std::string OutputPathWithArch = TempFileOrErr->str(); + + // Enable the driver to invoke sycl-post-link with the device architecture + // when Intel GPU targets are passed in -fsycl-targets. + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + + if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() && + !IsDevicePassedWithSyclTargetBackend && Arch != "*") + OutputPathWithArch = "intel_gpu_" + Arch.str() + "," + OutputPathWithArch; + else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) + OutputPathWithArch = "spir64_x86_64," + OutputPathWithArch; + + SmallVector CmdArgs; + CmdArgs.push_back(*SYCLPostLinkPath); + Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); + if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { + std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue(); + std::string BF16DeviceLibLoc = + SYCLDeviceLibLocParam + "/libsycl-native-bfloat16.bc"; + if (llvm::sys::fs::exists(BF16DeviceLibLoc)) { + SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam; + CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam))); + } + } + getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); + StringRef SYCLPostLinkOptions; + if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ)) + SYCLPostLinkOptions = A->getValue(); + SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, + /* KeepEmpty = */ false); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Args.MakeArgString(OutputPathWithArch)); + for (auto &File : InputFiles) + CmdArgs.push_back(File); + if (Error Err = executeCommands(*SYCLPostLinkPath, CmdArgs)) + return std::move(Err); + + if (DryRun) { + // In DryRun we need a dummy entry in order to continue the whole pipeline. + auto ImageFileOrErr = createTempFile(Args, + sys::path::filename(OutputFile) + ".sycl.split.image", "bc"); + if (!ImageFileOrErr) + return ImageFileOrErr.takeError(); + + std::vector Modules = {SplitModule(*ImageFileOrErr, PropertySetRegistry())}; + return Modules; + } + + return llvm::sycl_post_link::parseSplitModulesFromFile(*TempFileOrErr); +} + +/// Invoke SYCL post-link library for SYCL offloading. +static Expected> +runSYCLPostLinkLibrary(ArrayRef InputFiles, const ArgList &Args, + IRSplitMode SplitMode) { + std::vector SplitModules; + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + + PostLinkSettings Settings = getSYCLPostLinkSettings(Args, Triple); + + if (DryRun) { + auto OutputFileOrErr = createTempFile(Args, + sys::path::filename(OutputFile) + ".sycl.split.image", "bc"); + if (!OutputFileOrErr) + return OutputFileOrErr.takeError(); + + StringRef OutputFilePath = *OutputFileOrErr; + auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); + errs() << formatv("sycl-post-link-library: input: {0}, output: {1}, {2}\n", + InputFilesStr, OutputFilePath, + sycl_post_link::convertSettingsToString(Settings)); + SplitModules.emplace_back(OutputFilePath, PropertySetRegistry()); + return SplitModules; + } + + for (StringRef InputFile : InputFiles) { + SMDiagnostic Err; + LLVMContext C; + std::unique_ptr M = parseIRFile(InputFile, Err, C); + if (!M) + return createStringError(inconvertibleErrorCode(), Err.getMessage()); + + Expected> SplitModulesOrErr = + sycl_post_link::performPostLinkProcessing(std::move(M), Settings); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + + SplitModules.insert(SplitModules.end(), SplitModulesOrErr->begin(), + SplitModulesOrErr->end()); + } + + if (Verbose) { + auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); + std::string OutputFilesStr; + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + if (I > 0) + OutputFilesStr += ','; + OutputFilesStr += SplitModules[I].ModuleFilePath; + } + + errs() << formatv( + "sycl-post-link-library: input: {0}, output: {1}, settings: {2}\n", + InputFilesStr, OutputFilesStr, + sycl_post_link::convertSettingsToString(Settings)); + } + + return SplitModules; +} + /// Run AOT compilation for Intel CPU. /// Calls opencl-aot tool to generate device code for the Intel OpenCL CPU /// Runtime. @@ -472,31 +719,22 @@ Error runSYCLLink(ArrayRef Files, const ArgList &Args) { if (!LinkedFile) return LinkedFile.takeError(); - // TODO: SYCL post link functionality involves device code splitting and will - // result in multiple bitcode codes. - // The following lines are placeholders to represent multiple files and will - // be refactored once SYCL post link support is available. - SmallVector SplitModules; - SplitModules.emplace_back(*LinkedFile); + // Run sycl-post-link processing on the linked module. + bool IsDevicePassedWithSyclTargetBackend = false; // TODO: Detect from args + SmallVector InputFilesSYCL = {*LinkedFile}; - // Generate symbol table. - SmallVector> SymbolTable; - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - Expected> ModOrErr = - getBitcodeModule(SplitModules[I], C); - if (!ModOrErr) - return ModOrErr.takeError(); + auto SplitModulesOrErr = + UseSYCLPostLinkTool + ? runSYCLPostLinkTool(InputFilesSYCL, Args, + IsDevicePassedWithSyclTargetBackend) + : runSYCLPostLinkLibrary(InputFilesSYCL, Args, *SYCLModuleSplitMode); - SmallString<0> SymbolData; - for (Function &F : **ModOrErr) { - // TODO: Consider using LLVM-IR metadata to identify globals of interest - if (F.hasKernelCallingConv()) { - SymbolData.append(F.getName()); - SymbolData.push_back('\0'); - } - } - SymbolTable.emplace_back(std::move(SymbolData)); - } + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + + auto &SplitModules = *SplitModulesOrErr; + + // Symbol tables are already generated by post-link and stored in SplitModules bool IsAOTCompileNeeded = IsIntelOffloadArch( StringToOffloadArch(Args.getLastArgValue(OPT_arch_EQ))); @@ -505,16 +743,16 @@ Error runSYCLLink(ArrayRef Files, const ArgList &Args) { for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { StringRef Stem = OutputFile.rsplit('.').first; std::string SPVFile = (Stem + "_" + Twine(I) + ".spv").str(); - if (Error Err = runCodeGen(SplitModules[I], Args, SPVFile, C)) + if (Error Err = runCodeGen(SplitModules[I].ModuleFilePath, Args, SPVFile, C)) return Err; if (!IsAOTCompileNeeded) { - SplitModules[I] = SPVFile; + SplitModules[I].ModuleFilePath = SPVFile; } else { // AOT compilation step. std::string AOTFile = (Stem + "_" + Twine(I) + ".out").str(); if (Error Err = runAOTCompile(SPVFile, AOTFile, Args)) return Err; - SplitModules[I] = AOTFile; + SplitModules[I].ModuleFilePath = AOTFile; } } @@ -525,7 +763,7 @@ Error runSYCLLink(ArrayRef Files, const ArgList &Args) { llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true); for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - auto File = SplitModules[I]; + auto File = SplitModules[I].ModuleFilePath; llvm::ErrorOr> FileOrErr = llvm::MemoryBuffer::getFileOrSTDIN(File); if (std::error_code EC = FileOrErr.getError()) { @@ -541,7 +779,7 @@ Error runSYCLLink(ArrayRef Files, const ArgList &Args) { Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ)); TheImage.StringData["arch"] = Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ)); - TheImage.StringData["symbols"] = SymbolTable[I]; + TheImage.StringData["symbols"] = SplitModules[I].Symbols; // Use post-link symbols TheImage.Image = std::move(*FileOrErr); llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage); @@ -589,6 +827,23 @@ int main(int argc, char **argv) { Verbose = Args.hasArg(OPT_verbose); DryRun = Args.hasArg(OPT_dry_run); + // Parse tool vs library mode for sycl-post-link + UseSYCLPostLinkTool = Args.hasFlag(OPT_use_sycl_post_link_tool, + OPT_no_use_sycl_post_link_tool, true); + + // Parse split mode (for library mode) + if (Args.hasArg(OPT_sycl_module_split_mode_EQ)) { + StringRef StrMode = Args.getLastArgValue(OPT_sycl_module_split_mode_EQ); + if (UseSYCLPostLinkTool) + reportError(createStringError( + "--sycl-module-split-mode cannot be used with --use-sycl-post-link-tool")); + SYCLModuleSplitMode = module_split::convertStringToSplitMode(StrMode); + if (!SYCLModuleSplitMode) + reportError(createStringError("Invalid split mode: " + StrMode)); + } else { + SYCLModuleSplitMode = IRSplitMode::SPLIT_NONE; + } + if (!Args.hasArg(OPT_o)) reportError(createStringError("Output file must be specified")); OutputFile = Args.getLastArgValue(OPT_o); diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index a2ea6b57aa691..3f34a167d7f00 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -54,3 +54,60 @@ def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">, def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, Flags<[LinkerOnlyOption]>, HelpText<"Options passed to opencl-aot for Intel CPU AOT compilation">; + +// SYCL post-link options +def sycl_post_link_options_EQ : Joined<["--", "-"], "sycl-post-link-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to sycl-post-link tool">; + +def sycl_module_split_mode_EQ : Joined<["--", "-"], "sycl-module-split-mode=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Module split mode: kernel, source, auto, or none">; + +def use_sycl_post_link_tool : Flag<["--", "-"], "use-sycl-post-link-tool">, + Flags<[LinkerOnlyOption]>, + HelpText<"Use external sycl-post-link tool (default)">; + +def no_use_sycl_post_link_tool : Flag<["--", "-"], "no-use-sycl-post-link-tool">, + Flags<[LinkerOnlyOption]>, + HelpText<"Use sycl-post-link library API instead of tool">; + +def sycl_device_code_split_esimd : Flag<["--", "-"], "sycl-device-code-split-esimd">, + Flags<[LinkerOnlyOption]>, + HelpText<"Enable ESIMD code splitting">; + +def no_sycl_device_code_split_esimd : Flag<["--", "-"], "no-sycl-device-code-split-esimd">, + Flags<[LinkerOnlyOption]>, + HelpText<"Disable ESIMD code splitting">; + +def sycl_add_default_spec_consts_image : Flag<["--", "-"], "sycl-add-default-spec-consts-image">, + Flags<[LinkerOnlyOption]>, + HelpText<"Generate module descriptor with default specialization constants">; + +def no_sycl_add_default_spec_consts_image : Flag<["--", "-"], "no-sycl-add-default-spec-consts-image">, + Flags<[LinkerOnlyOption]>, + HelpText<"Do not generate default spec consts module descriptor">; + +def sycl_remove_unused_external_funcs : Flag<["--", "-"], "sycl-remove-unused-external-funcs">, + Flags<[LinkerOnlyOption]>, + HelpText<"Remove unused external functions">; + +def no_sycl_remove_unused_external_funcs : Flag<["--", "-"], "no-sycl-remove-unused-external-funcs">, + Flags<[LinkerOnlyOption]>, + HelpText<"Keep all external functions (default)">; + +def sycl_allow_device_image_dependencies : Flag<["--", "-"], "sycl-allow-device-image-dependencies">, + Flags<[LinkerOnlyOption]>, + HelpText<"Allow dependencies between device images">; + +def sycl_thin_lto : Flag<["--", "-"], "sycl-thin-lto">, + Flags<[LinkerOnlyOption]>, + HelpText<"Enable thin LTO for SYCL device code">; + +def syclbin_EQ : Joined<["--", "-"], "syclbin=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Generate SYCLBIN output (input, object, or executable)">; + +def sycl_device_library_location_EQ : CommaJoined<["--", "-"], "sycl-device-library-location=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Path(s) to SYCL device library directory">;