Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions arraymancer.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ task test_opencl, "Run all OpenCL backend tests":
switches.add " -d:blas=cblas" # Archlinux, comment out on Debian/Ubuntu
test "tests_opencl", switches, split = false, "cpp"

task test_metal, "Run all Metal backend tests":
var switches = " -d:metal"
switches.add " -d:blas=cblas" # Archlinux, comment out on Debian/Ubuntu
test "tests_metal", switches, split = false
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with other GPU backends (CUDA, OpenCL) and to ensure compatibility with Objective-C code via C++, it's recommended to specify "cpp" as the compilation language for this test task.

  test "tests_metal", switches, split = false, "cpp"


# task test_deprecated, "Run all tests on deprecated procs":
# test "tests_cpu_deprecated"

Expand Down
4 changes: 4 additions & 0 deletions src/arraymancer/tensor.nim
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,7 @@ when defined(cuda) or defined(nimdoc) or defined(nimsuggest):
when defined(opencl) or defined(nimdoc) or defined(nimsuggest):
import ./tensor/tensor_opencl
export tensor_opencl

when defined(metal) or defined(nimdoc) or defined(nimsuggest):
import ./tensor/tensor_metal
export tensor_metal
4 changes: 4 additions & 0 deletions src/arraymancer/tensor/backend/global_config.nim
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ const CUDA_HOF_BPG*: cint = 256 # should be (grid-stride+threadsPerBlock-1)
const OMP_FOR_THRESHOLD* = 1000 # Tensor number of elements threshold before using OpenMP multithreading
const OMP_MAX_REDUCE_BLOCKS* = 8 # Max number of expected OpenMP threads (used in reduce)

# Metal-specific configuration
const METAL_THREADGROUP_SIZE* = 256 # Default threadgroup size for Metal kernels
const METAL_BUFFER_POOL_MAX_SIZE* = 1024 * 1024 * 1024 # 1GB max buffer pool size

# Full procesor optimization (AVX, AVX2, ARM neon, ... if applicable)
when defined(native):
{.passC: "-march=native".}
Expand Down
29 changes: 29 additions & 0 deletions src/arraymancer/tensor/backend/metal.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2017 the Arraymancer contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Metal backend for Arraymancer
## Provides GPU acceleration on macOS using Apple's Metal framework

when defined(metal):
import ../data_structure,
./metal/metal_buffer,
./metal/metal_backend

export metal_buffer,
metal_backend

# Re-export types for convenience
export MetalBuffer, MetalBufferPool
export MTLDevice, MTLCommandQueue, MTLCommandBuffer, MTLBuffer
export MetalContext, metalContext, initMetalContext, isMetalAvailable
Loading
Loading