diff --git a/ggml/src/ggml-cuda/cross-ring-interleave.cu b/ggml/src/ggml-cuda/cross-ring-interleave.cu index a88571eb826..01202822207 100644 --- a/ggml/src/ggml-cuda/cross-ring-interleave.cu +++ b/ggml/src/ggml-cuda/cross-ring-interleave.cu @@ -178,7 +178,7 @@ extern "C" bool dflash_cross_ring_gpu_write_d2d( cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (attr.type != cudaMemoryTypeDevice || attr.device != ring->device) { return false; } @@ -253,7 +253,7 @@ extern "C" bool dflash_rebuild_conv_state( cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (r_attr.type != cudaMemoryTypeDevice || qkv_attr.type != cudaMemoryTypeDevice || r_attr.device != qkv_attr.device) { return false; @@ -288,7 +288,7 @@ extern "C" bool dflash_cuda_copy_d2d(void * dst, const void * src, size_t size) cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (dst_attr.type != cudaMemoryTypeDevice || src_attr.type != cudaMemoryTypeDevice || dst_attr.device != src_attr.device) { return false; @@ -315,7 +315,7 @@ extern "C" bool dflash_cuda_prepare_ptr(const void * ptr) { cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (attr.type != cudaMemoryTypeDevice) { return false; } @@ -345,7 +345,7 @@ extern "C" bool dflash_cuda_synchronize_ptr(const void * ptr) { cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (attr.type != cudaMemoryTypeDevice) { return false; } @@ -418,7 +418,7 @@ extern "C" void dflash_cross_ring_gpu_set_tensor( cudaGetLastError(); } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) const bool dst_is_device = dst_err == cudaSuccess && dst_attr.type == cudaMemoryTypeDevice; const bool src_is_device = src_err == cudaSuccess && src_attr.type == cudaMemoryTypeDevice; #else @@ -456,7 +456,7 @@ extern "C" bool dflash_kv_cache_write_d2d( cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (ring_attr.type != cudaMemoryTypeDevice || src_attr.type != cudaMemoryTypeDevice || ring_attr.device != src_attr.device) { return false; @@ -528,7 +528,7 @@ extern "C" bool dflash_kv_cache_append_d2d( cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (cache_attr.type != cudaMemoryTypeDevice || src_attr.type != cudaMemoryTypeDevice || cache_attr.device != src_attr.device) { return false; @@ -649,7 +649,7 @@ extern "C" bool dflash_kv_cache_interleave( cudaGetLastError(); return false; } -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) if (ring_attr.type != cudaMemoryTypeDevice || stage_attr.type != cudaMemoryTypeDevice || ring_attr.device != stage_attr.device) { return false; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index e8e4b0f24b4..47646dd1e26 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3147,7 +3147,7 @@ static void ggml_cuda_log_nonlocal_src_buffer( cudaError_t err = cudaPointerGetAttributes(&attr, data); if (err == cudaSuccess) { ptr_device = attr.device; -#if CUDART_VERSION >= 10000 +#if CUDART_VERSION >= 10000 || defined(GGML_USE_HIP) switch (attr.type) { #else switch (attr.memoryType) { diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index dbda5a41124..1d9928ff1ed 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -99,6 +99,11 @@ #define cudaMemGetInfo hipMemGetInfo #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize #define cudaSetDevice hipSetDevice +#define cudaPointerAttributes hipPointerAttribute_t +#define cudaPointerGetAttributes hipPointerGetAttributes +#define cudaMemoryTypeDevice hipMemoryTypeDevice +#define cudaMemoryTypeHost hipMemoryTypeHost +#define cudaMemoryTypeManaged hipMemoryTypeManaged #define cuDeviceGet hipDeviceGet #define CUdevice hipDevice_t #define CUdeviceptr hipDeviceptr_t diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 03fbae813c8..062c957506e 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -80,10 +80,8 @@ else() ../ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu ../ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu ../ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu) - if (WIN32) - file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec-instance-*turbo*.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - endif() + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec-instance-*turbo*.cu") + list(APPEND GGML_SOURCES_ROCM ${SRCS}) endif() ggml_add_backend_library(ggml-hip