From bd861a340d3fe1fc586ae89db28bac50d61de5b6 Mon Sep 17 00:00:00 2001
From: Paul Richmond
Date: Wed, 5 Nov 2025 13:38:10 +0000
Subject: [PATCH] Fix for Simple Test
JITIFY_USE_CONTEXT_INDEPENDENT_LOADING update for occupancy API
---
jitify2.hpp | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/jitify2.hpp b/jitify2.hpp
index a32e001..76599cb 100644
--- a/jitify2.hpp
+++ b/jitify2.hpp
@@ -1723,6 +1723,7 @@ class LibCuda
int, CUkernel, CUdevice)
JITIFY_DEFINE_CUDA_WRAPPER(KernelGetAttribute, CUresult, int*,
CUfunction_attribute, CUkernel, CUdevice)
+ JITIFY_DEFINE_CUDA_WRAPPER(KernelGetFunction, CUresult, CUfunction*, CUkernel)
#endif
#undef JITIFY_DEFINE_CUDA_WRAPPER
#undef JITIFY_STR
@@ -2562,8 +2563,18 @@ inline ConfiguredKernel ConfiguredKernel::configure_1d_max_occupancy(
unsigned int flags) {
int grid, block;
if (!cuda()) return Error(cuda().error());
- CUresult ret = cuda().OccupancyMaxPotentialBlockSizeWithFlags()(
- &grid, &block, (CUfunction)kernel.function(),
+ CUfunction cu_func = NULL;
+ CUresult ret;
+#if JITIFY_USE_CONTEXT_INDEPENDENT_LOADING
+ ret = cuda().KernelGetFunction()(&cu_func, (CUkernel)kernel.function());
+ if (ret != CUDA_SUCCESS) {
+ return Error("Configure failed getting Kernel Function: " + detail::get_cuda_error_string(ret));
+ }
+#else
+ cu_func = (CUfunction)kernel.function(),
+#endif
+ ret = cuda().OccupancyMaxPotentialBlockSizeWithFlags()(
+ &grid, &block, (CUfunction)cu_func,
shared_memory_bytes_callback, shared_memory_bytes, max_block_size, flags);
if (ret != CUDA_SUCCESS) {
return Error("Configure failed: " + detail::get_cuda_error_string(ret));