diff --git a/skills/detection/yolo-detection-2026/SKILL.md b/skills/detection/yolo-detection-2026/SKILL.md
index 278d924..939099a 100644
--- a/skills/detection/yolo-detection-2026/SKILL.md
+++ b/skills/detection/yolo-detection-2026/SKILL.md
@@ -66,6 +66,15 @@ parameters:
     description: "Auto-convert model to optimized format for faster inference"
     group: Performance
 
+  - name: compute_units
+    label: "Apple Compute Units"
+    type: select
+    options: ["auto", "cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"]
+    default: "auto"
+    description: "CoreML compute target — 'auto' routes to Neural Engine (NPU), leaving GPU free for LLM/VLM"
+    group: Performance
+    platform: macos
+
 capabilities:
   live_detection:
     script: scripts/detect.py
@@ -89,13 +98,15 @@ Real-time object detection using the latest YOLO 2026 models. Detects 80+ COCO o
 
 The skill uses [`env_config.py`](../../lib/env_config.py) to **automatically detect hardware** and convert the model to the fastest format for your platform. Conversion happens once during deployment and is cached.
 
-| Platform | Backend | Optimized Format | Expected Speedup |
-|----------|---------|------------------|:----------------:|
-| NVIDIA GPU | CUDA | TensorRT `.engine` | ~3-5x |
-| Apple Silicon (M1+) | MPS | CoreML `.mlpackage` | ~2x |
-| Intel CPU/GPU/NPU | OpenVINO | OpenVINO IR `.xml` | ~2-3x |
-| AMD GPU | ROCm | ONNX Runtime | ~1.5-2x |
-| CPU (any) | CPU | ONNX Runtime | ~1.5x |
+| Platform | Backend | Optimized Format | Compute Units | Expected Speedup |
+|----------|---------|------------------|:-------------:|:----------------:|
+| NVIDIA GPU | CUDA | TensorRT `.engine` | GPU | ~3-5x |
+| Apple Silicon (M1+) | MPS | CoreML `.mlpackage` | **Neural Engine** (NPU) | ~2x |
+| Intel CPU/GPU/NPU | OpenVINO | OpenVINO IR `.xml` | CPU/GPU/NPU | ~2-3x |
+| AMD GPU | ROCm | ONNX Runtime | GPU | ~1.5-2x |
+| CPU (any) | CPU | ONNX Runtime | CPU | ~1.5x |
+
+> **Apple Silicon Note**: Detection defaults to `cpu_and_ne` (CPU + Neural Engine), keeping the GPU free for LLM/VLM inference. Set `compute_units: all` to include GPU if not running local LLM.
 
 ### How It Works
 
diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py
index d149374..40bea8b 100644
--- a/skills/detection/yolo-detection-2026/scripts/detect.py
+++ b/skills/detection/yolo-detection-2026/scripts/detect.py
@@ -248,7 +248,7 @@ def main():
         perf.model_load_ms = env.load_ms
         perf.export_ms = env.export_ms
 
-        emit({
+        ready_event = {
             "event": "ready",
             "model": f"yolo2026{model_size[0]}",
             "model_size": model_size,
@@ -260,7 +260,10 @@ def main():
             "fps": fps,
             "model_load_ms": round(env.load_ms, 1),
             "available_sizes": list(MODEL_SIZE_MAP.keys()),
-        })
+        }
+        if hasattr(env, 'compute_units') and env.backend == "mps":
+            ready_event["compute_units"] = env.compute_units
+        emit(ready_event)
     except Exception as e:
         emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False})
         sys.exit(1)
diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py
index ff42e6f..7c46c05 100644
--- a/skills/detection/yolo-detection-2026/scripts/env_config.py
+++ b/skills/detection/yolo-detection-2026/scripts/env_config.py
@@ -40,6 +40,7 @@ class BackendSpec:
     model_suffix: str       # file extension/dir to look for cached model
     half: bool = True       # use FP16
     extra_export_args: dict = field(default_factory=dict)
+    compute_units: Optional[str] = None  # CoreML compute units: "cpu_and_ne", "all", etc.
 
 
 BACKEND_SPECS = {
@@ -61,6 +62,7 @@ class BackendSpec:
         model_suffix=".mlpackage",
         half=True,
         extra_export_args={"nms": False},
+        compute_units="cpu_and_ne",  # Route to Neural Engine, leave GPU free for LLM/VLM
     ),
     "intel": BackendSpec(
         name="intel",
@@ -86,6 +88,7 @@ class HardwareEnv:
     backend: str = "cpu"              # "cuda" | "rocm" | "mps" | "intel" | "cpu"
     device: str = "cpu"               # torch device string
     export_format: str = "onnx"       # optimal export format
+    compute_units: str = "all"        # CoreML compute units (Apple only)
     gpu_name: str = ""                # human-readable GPU name
     gpu_memory_mb: int = 0            # GPU memory in MB
     driver_version: str = ""          # GPU driver version
@@ -113,9 +116,11 @@ def detect() -> "HardwareEnv":
         else:
             env._fallback_cpu()
 
-        # Set export format from backend spec
+        # Set export format and compute units from backend spec
         spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"])
         env.export_format = spec.export_format
+        if spec.compute_units:
+            env.compute_units = spec.compute_units
 
         # Check if optimized runtime is available
         env.framework_ok = env._check_framework()
@@ -439,6 +444,58 @@ def export_model(self, model, model_name: str) -> Optional[Path]:
 
         return None
 
+    def _load_coreml_with_compute_units(self, model_path: str):
+        """
+        Load a CoreML model via YOLO with specific compute_units.
+
+        Monkey-patches coremltools.MLModel to inject compute_units
+        (e.g. CPU_AND_NE for Neural Engine) since ultralytics doesn't
+        expose this parameter. Patch is scoped and immediately restored.
+        """
+        from ultralytics import YOLO
+
+        # Map string config → coremltools enum
+        _COMPUTE_UNIT_MAP = {
+            "all": "ALL",
+            "cpu_only": "CPU_ONLY",
+            "cpu_and_gpu": "CPU_AND_GPU",
+            "cpu_and_ne": "CPU_AND_NE",
+        }
+
+        ct_enum_name = _COMPUTE_UNIT_MAP.get(self.compute_units)
+        if not ct_enum_name:
+            _log(f"Unknown compute_units '{self.compute_units}', using default")
+            return YOLO(model_path)
+
+        try:
+            import coremltools as ct
+            target_units = getattr(ct.ComputeUnit, ct_enum_name, None)
+            if target_units is None:
+                _log(f"coremltools.ComputeUnit.{ct_enum_name} not available")
+                return YOLO(model_path)
+
+            # Temporarily patch MLModel to inject compute_units
+            _OrigMLModel = ct.models.MLModel
+
+            class _PatchedMLModel(_OrigMLModel):
+                def __init__(self, *args, **kwargs):
+                    kwargs.setdefault('compute_units', target_units)
+                    super().__init__(*args, **kwargs)
+
+            ct.models.MLModel = _PatchedMLModel
+            try:
+                model = YOLO(model_path)
+            finally:
+                ct.models.MLModel = _OrigMLModel  # Always restore
+
+            _log(f"CoreML model loaded with compute_units={ct_enum_name} "
+                 f"(Neural Engine preferred)")
+            return model
+
+        except ImportError:
+            _log("coremltools not available, loading without compute_units")
+            return YOLO(model_path)
+
     def load_optimized(self, model_name: str, use_optimized: bool = True):
         """
         Load the best available model for this hardware.
@@ -455,7 +512,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
             optimized_path = self.get_optimized_path(model_name)
             if optimized_path.exists():
                 try:
-                    model = YOLO(str(optimized_path))
+                    # On Apple Silicon: route CoreML to Neural Engine
+                    if self.backend == "mps" and self.compute_units != "all":
+                        model = self._load_coreml_with_compute_units(
+                            str(optimized_path))
+                    else:
+                        model = YOLO(str(optimized_path))
                     self.load_ms = (time.perf_counter() - t0) * 1000
                     _log(f"Loaded {self.export_format} model ({self.load_ms:.0f}ms)")
                     return model, self.export_format
@@ -467,7 +529,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
             exported = self.export_model(pt_model, model_name)
             if exported:
                 try:
-                    model = YOLO(str(exported))
+                    # On Apple Silicon: route CoreML to Neural Engine
+                    if self.backend == "mps" and self.compute_units != "all":
+                        model = self._load_coreml_with_compute_units(
+                            str(exported))
+                    else:
+                        model = YOLO(str(exported))
                     self.load_ms = (time.perf_counter() - t0) * 1000
                     _log(f"Loaded freshly exported {self.export_format} model ({self.load_ms:.0f}ms)")
                     return model, self.export_format
@@ -508,7 +575,7 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
     def to_dict(self) -> dict:
         """Serialize environment info for JSON output."""
-        return {
+        d = {
             "backend": self.backend,
             "device": self.device,
             "export_format": self.export_format,
@@ -519,6 +586,9 @@ def to_dict(self) -> dict:
             "export_ms": round(self.export_ms, 1),
             "load_ms": round(self.load_ms, 1),
         }
+        if self.backend == "mps":
+            d["compute_units"] = self.compute_units
+        return d
 
 
 # ─── CLI: run standalone for diagnostics ─────────────────────────────────────
diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py
index ff42e6f..7c46c05 100644
--- a/skills/lib/env_config.py
+++ b/skills/lib/env_config.py
@@ -40,6 +40,7 @@ class BackendSpec:
     model_suffix: str       # file extension/dir to look for cached model
     half: bool = True       # use FP16
     extra_export_args: dict = field(default_factory=dict)
+    compute_units: Optional[str] = None  # CoreML compute units: "cpu_and_ne", "all", etc.
 
 
 BACKEND_SPECS = {
@@ -61,6 +62,7 @@ class BackendSpec:
         model_suffix=".mlpackage",
         half=True,
         extra_export_args={"nms": False},
+        compute_units="cpu_and_ne",  # Route to Neural Engine, leave GPU free for LLM/VLM
     ),
     "intel": BackendSpec(
         name="intel",
@@ -86,6 +88,7 @@ class HardwareEnv:
     backend: str = "cpu"              # "cuda" | "rocm" | "mps" | "intel" | "cpu"
     device: str = "cpu"               # torch device string
     export_format: str = "onnx"       # optimal export format
+    compute_units: str = "all"        # CoreML compute units (Apple only)
     gpu_name: str = ""                # human-readable GPU name
     gpu_memory_mb: int = 0            # GPU memory in MB
     driver_version: str = ""          # GPU driver version
@@ -113,9 +116,11 @@ def detect() -> "HardwareEnv":
         else:
             env._fallback_cpu()
 
-        # Set export format from backend spec
+        # Set export format and compute units from backend spec
         spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"])
         env.export_format = spec.export_format
+        if spec.compute_units:
+            env.compute_units = spec.compute_units
 
         # Check if optimized runtime is available
         env.framework_ok = env._check_framework()
@@ -439,6 +444,58 @@ def export_model(self, model, model_name: str) -> Optional[Path]:
 
         return None
 
+    def _load_coreml_with_compute_units(self, model_path: str):
+        """
+        Load a CoreML model via YOLO with specific compute_units.
+
+        Monkey-patches coremltools.MLModel to inject compute_units
+        (e.g. CPU_AND_NE for Neural Engine) since ultralytics doesn't
+        expose this parameter. Patch is scoped and immediately restored.
+        """
+        from ultralytics import YOLO
+
+        # Map string config → coremltools enum
+        _COMPUTE_UNIT_MAP = {
+            "all": "ALL",
+            "cpu_only": "CPU_ONLY",
+            "cpu_and_gpu": "CPU_AND_GPU",
+            "cpu_and_ne": "CPU_AND_NE",
+        }
+
+        ct_enum_name = _COMPUTE_UNIT_MAP.get(self.compute_units)
+        if not ct_enum_name:
+            _log(f"Unknown compute_units '{self.compute_units}', using default")
+            return YOLO(model_path)
+
+        try:
+            import coremltools as ct
+            target_units = getattr(ct.ComputeUnit, ct_enum_name, None)
+            if target_units is None:
+                _log(f"coremltools.ComputeUnit.{ct_enum_name} not available")
+                return YOLO(model_path)
+
+            # Temporarily patch MLModel to inject compute_units
+            _OrigMLModel = ct.models.MLModel
+
+            class _PatchedMLModel(_OrigMLModel):
+                def __init__(self, *args, **kwargs):
+                    kwargs.setdefault('compute_units', target_units)
+                    super().__init__(*args, **kwargs)
+
+            ct.models.MLModel = _PatchedMLModel
+            try:
+                model = YOLO(model_path)
+            finally:
+                ct.models.MLModel = _OrigMLModel  # Always restore
+
+            _log(f"CoreML model loaded with compute_units={ct_enum_name} "
+                 f"(Neural Engine preferred)")
+            return model
+
+        except ImportError:
+            _log("coremltools not available, loading without compute_units")
+            return YOLO(model_path)
+
     def load_optimized(self, model_name: str, use_optimized: bool = True):
         """
         Load the best available model for this hardware.
@@ -455,7 +512,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
             optimized_path = self.get_optimized_path(model_name)
             if optimized_path.exists():
                 try:
-                    model = YOLO(str(optimized_path))
+                    # On Apple Silicon: route CoreML to Neural Engine
+                    if self.backend == "mps" and self.compute_units != "all":
+                        model = self._load_coreml_with_compute_units(
+                            str(optimized_path))
+                    else:
+                        model = YOLO(str(optimized_path))
                     self.load_ms = (time.perf_counter() - t0) * 1000
                     _log(f"Loaded {self.export_format} model ({self.load_ms:.0f}ms)")
                     return model, self.export_format
@@ -467,7 +529,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
             exported = self.export_model(pt_model, model_name)
             if exported:
                 try:
-                    model = YOLO(str(exported))
+                    # On Apple Silicon: route CoreML to Neural Engine
+                    if self.backend == "mps" and self.compute_units != "all":
+                        model = self._load_coreml_with_compute_units(
+                            str(exported))
+                    else:
+                        model = YOLO(str(exported))
                     self.load_ms = (time.perf_counter() - t0) * 1000
                     _log(f"Loaded freshly exported {self.export_format} model ({self.load_ms:.0f}ms)")
                     return model, self.export_format
@@ -508,7 +575,7 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
     def to_dict(self) -> dict:
         """Serialize environment info for JSON output."""
-        return {
+        d = {
             "backend": self.backend,
             "device": self.device,
             "export_format": self.export_format,
@@ -519,6 +586,9 @@ def to_dict(self) -> dict:
             "export_ms": round(self.export_ms, 1),
             "load_ms": round(self.load_ms, 1),
         }
+        if self.backend == "mps":
+            d["compute_units"] = self.compute_units
+        return d
 
 
 # ─── CLI: run standalone for diagnostics ─────────────────────────────────────
diff --git a/skills/lib/test_env_config_ane.py b/skills/lib/test_env_config_ane.py
new file mode 100644
index 0000000..dc032eb
--- /dev/null
+++ b/skills/lib/test_env_config_ane.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""
+Unit tests for Apple Neural Engine (ANE) compute_units in env_config.py.
+
+Tests compute_units configuration, monkey-patch scoping, and CoreML
+load-time injection — all mocked, no Apple hardware required.
+
+Run:  python -m pytest skills/lib/test_env_config_ane.py -v
+"""
+
+import platform
+import subprocess
+import sys
+from pathlib import Path
+from unittest import mock
+
+import pytest
+
+# Ensure env_config is importable from skills/lib/
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from env_config import BackendSpec, BACKEND_SPECS, HardwareEnv, _log  # noqa: E402
+
+
+# ── Tests: BackendSpec compute_units ────────────────────────────────────────
+
+class TestBackendSpecComputeUnits:
+    """Verify compute_units field on backend specs."""
+
+    def test_mps_spec_has_cpu_and_ne(self):
+        """MPS backend defaults to cpu_and_ne (Neural Engine)."""
+        spec = BACKEND_SPECS["mps"]
+        assert spec.compute_units == "cpu_and_ne"
+
+    def test_cuda_spec_has_no_compute_units(self):
+        """Non-Apple backends have no compute_units set."""
+        assert BACKEND_SPECS["cuda"].compute_units is None
+
+    def test_cpu_spec_has_no_compute_units(self):
+        assert BACKEND_SPECS["cpu"].compute_units is None
+
+    def test_rocm_spec_has_no_compute_units(self):
+        assert BACKEND_SPECS["rocm"].compute_units is None
+
+    def test_intel_spec_has_no_compute_units(self):
+        assert BACKEND_SPECS["intel"].compute_units is None
+
+
+# ── Tests: HardwareEnv compute_units field ──────────────────────────────────
+
+class TestHardwareEnvComputeUnits:
+    """Verify compute_units is set correctly during detection."""
+
+    def test_default_compute_units_is_all(self):
+        """Default HardwareEnv has compute_units='all'."""
+        env = HardwareEnv()
+        assert env.compute_units == "all"
+
+    @mock.patch("env_config.platform.system", return_value="Darwin")
+    @mock.patch("env_config.platform.machine", return_value="arm64")
+    @mock.patch("env_config.subprocess.run")
+    @mock.patch("env_config.shutil.which", return_value=None)
+    @mock.patch("env_config.Path.is_dir", return_value=False)
+    def test_mps_sets_compute_units_cpu_and_ne(
+        self, _dir, _which, mock_run, _machine, _system
+    ):
+        """Apple Silicon detection sets compute_units to 'cpu_and_ne'."""
+        mock_run.return_value = subprocess.CompletedProcess(
+            args=[], returncode=0, stdout="Apple M3 Max"
+        )
+
+        env = HardwareEnv()
+        result = env._try_mps()
+        assert result is True
+
+        # Simulate what detect() does after _try_mps
+        spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"])
+        if spec.compute_units:
+            env.compute_units = spec.compute_units
+
+        assert env.backend == "mps"
+        assert env.compute_units == "cpu_and_ne"
+
+    def test_to_dict_includes_compute_units_for_mps(self):
+        """to_dict() includes compute_units when backend is mps."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.compute_units = "cpu_and_ne"
+        d = env.to_dict()
+        assert "compute_units" in d
+        assert d["compute_units"] == "cpu_and_ne"
+
+    def test_to_dict_excludes_compute_units_for_non_mps(self):
+        """to_dict() does NOT include compute_units for non-mps backends."""
+        env = HardwareEnv()
+        env.backend = "cuda"
+        d = env.to_dict()
+        assert "compute_units" not in d
+
+
+# ── Tests: _load_coreml_with_compute_units ──────────────────────────────────
+
+class TestLoadCoremlWithComputeUnits:
+    """Test the monkey-patch mechanism for CoreML compute_units."""
+
+    def test_monkey_patch_injects_compute_units(self):
+        """MLModel is temporarily patched to inject CPU_AND_NE."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.compute_units = "cpu_and_ne"
+
+        # Create mock coremltools module
+        mock_ct = mock.MagicMock()
+        mock_ct.ComputeUnit.CPU_AND_NE = "CPU_AND_NE_SENTINEL"
+        mock_ct.ComputeUnit.ALL = "ALL_SENTINEL"
+
+        # Track MLModel calls to verify compute_units was injected
+        original_mlmodel = mock.MagicMock()
+        mock_ct.models.MLModel = original_mlmodel
+
+        captured_kwargs = {}
+
+        mock_yolo_cls = mock.MagicMock()
+
+        def capture_yolo_init(path):
+            """When YOLO loads the model, check if MLModel was patched."""
+            # Simulate what YOLO does internally: call ct.models.MLModel
+            current_mlmodel = mock_ct.models.MLModel
+            # The patched class should be different from original
+            instance = current_mlmodel("test.mlpackage")
+            return mock.MagicMock()
+
+        mock_yolo_cls.side_effect = capture_yolo_init
+
+        with mock.patch.dict("sys.modules", {"coremltools": mock_ct}):
+            with mock.patch("env_config.YOLO", mock_yolo_cls, create=True):
+                # Can't easily test the full flow since YOLO import is inside
+                # the method. Instead, test the logic directly.
+                pass
+
+        # Direct test: verify the patch class works correctly
+        class MockMLModel:
+            def __init__(self, *args, **kwargs):
+                self.kwargs = kwargs
+
+        mock_ct.models.MLModel = MockMLModel
+
+        with mock.patch.dict("sys.modules", {"coremltools": mock_ct}):
+            # Simulate the patching logic
+            _OrigMLModel = mock_ct.models.MLModel
+            target_units = mock_ct.ComputeUnit.CPU_AND_NE
+
+            class _PatchedMLModel(_OrigMLModel):
+                def __init__(self, *args, **kwargs):
+                    kwargs.setdefault('compute_units', target_units)
+                    super().__init__(*args, **kwargs)
+
+            # Verify patch injects compute_units
+            patched = _PatchedMLModel("test.mlpackage")
+            assert patched.kwargs.get('compute_units') == "CPU_AND_NE_SENTINEL"
+
+            # Verify explicit override is preserved
+            explicit = _PatchedMLModel("test.mlpackage", compute_units="CUSTOM")
+            assert explicit.kwargs.get('compute_units') == "CUSTOM"
+
+    def test_monkey_patch_restored_after_load(self):
+        """MLModel is restored to original after YOLO load, even on error."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.compute_units = "cpu_and_ne"
+
+        mock_ct = mock.MagicMock()
+        mock_ct.ComputeUnit.CPU_AND_NE = "CPU_AND_NE_SENTINEL"
+        original_mlmodel = mock.MagicMock()
+        mock_ct.models.MLModel = original_mlmodel
+
+        mock_yolo = mock.MagicMock(side_effect=Exception("test error"))
+
+        with mock.patch.dict("sys.modules", {
+            "coremltools": mock_ct,
+            "ultralytics": mock.MagicMock(YOLO=mock_yolo),
+        }):
+            try:
+                env._load_coreml_with_compute_units("test.mlpackage")
+            except Exception:
+                pass
+
+            # MLModel should be restored to original even after error
+            assert mock_ct.models.MLModel is original_mlmodel
+
+    def test_unknown_compute_units_falls_back(self):
+        """Unknown compute_units string falls back to plain YOLO load."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.compute_units = "unknown_units"
+
+        mock_yolo = mock.MagicMock()
+        mock_model = mock.MagicMock()
+        mock_yolo.return_value = mock_model
+
+        with mock.patch.dict("sys.modules", {
+            "ultralytics": mock.MagicMock(YOLO=mock_yolo),
+        }):
+            result = env._load_coreml_with_compute_units("test.mlpackage")
+            mock_yolo.assert_called_once_with("test.mlpackage")
+
+    def test_coremltools_missing_falls_back(self):
+        """If coremltools import fails, falls back to plain YOLO load."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.compute_units = "cpu_and_ne"
+
+        mock_yolo = mock.MagicMock()
+        mock_model = mock.MagicMock()
+        mock_yolo.return_value = mock_model
+
+        # Make coremltools import fail
+        with mock.patch.dict("sys.modules", {
+            "coremltools": None,
+            "ultralytics": mock.MagicMock(YOLO=mock_yolo),
+        }):
+            result = env._load_coreml_with_compute_units("test.mlpackage")
+            mock_yolo.assert_called_once_with("test.mlpackage")
+
+
+# ── Tests: load_optimized integration ───────────────────────────────────────
+
+class TestLoadOptimizedMPS:
+    """Test that load_optimized routes through compute_units on MPS."""
+
+    def test_mps_cached_model_uses_compute_units(self):
+        """When cached .mlpackage exists, loads via _load_coreml_with_compute_units."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.device = "mps"
+        env.export_format = "coreml"
+        env.framework_ok = True
+        env.compute_units = "cpu_and_ne"
+
+        mock_model = mock.MagicMock()
+
+        with mock.patch.object(env, "_load_coreml_with_compute_units",
+                               return_value=mock_model) as mock_load:
+            with mock.patch.object(env, "get_optimized_path") as mock_path:
+                mock_path.return_value = mock.MagicMock(exists=lambda: True)
+
+                with mock.patch.dict("sys.modules", {
+                    "ultralytics": mock.MagicMock(),
+                }):
+                    model, fmt = env.load_optimized("yolo26n")
+
+                assert fmt == "coreml"
+                mock_load.assert_called_once()
+
+    def test_mps_compute_units_all_skips_monkey_patch(self):
+        """When compute_units='all', loads via standard YOLO path."""
+        env = HardwareEnv()
+        env.backend = "mps"
+        env.device = "mps"
+        env.export_format = "coreml"
+        env.framework_ok = True
+        env.compute_units = "all"  # explicit: use all units including GPU
+
+        mock_yolo = mock.MagicMock()
+        mock_model = mock.MagicMock()
+        mock_yolo.return_value = mock_model
+
+        with mock.patch.object(env, "get_optimized_path") as mock_path:
+            mock_path.return_value = mock.MagicMock(exists=lambda: True)
+
+            with mock.patch.dict("sys.modules", {
+                "ultralytics": mock.MagicMock(YOLO=mock_yolo),
+            }):
+                model, fmt = env.load_optimized("yolo26n")
+
+            assert fmt == "coreml"
+            mock_yolo.assert_called_once()