audiohacking · Copilot · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/.github/workflows/ci-mps.yml b/.github/workflows/ci-mps.yml
@@ -0,0 +1,186 @@
+name: MPS/Metal CI (comfy-action)
+
+# End-to-end CI for the macOS MPS/Metal backend using Comfy-Org/comfy-action.
+#
+# comfy-action is designed to run inside a ComfyUI repository workspace (it
+# calls `python main.py`).  For a custom node repo we therefore:
+#   1. Use comfy-cli (the official Comfy-Org CLI used by comfy-action) to
+#      install ComfyUI into $GITHUB_WORKSPACE so that `main.py` is present.
+#   2. Install this custom node into ComfyUI's custom_nodes directory.
+#   3. Build the C++ binaries — install.py now passes
+#      -DGGML_METAL_EMBED_LIBRARY=ON so Metal shaders are compiled at build
+#      time, avoiding the runtime JIT failures (MTLLibraryErrorDomain / exit
+#      -6 / SIGABRT) that caused this issue.
+#   4. Run a fast Metal smoke test that exercises binary start-up without
+#      needing any model files.
+#   5. Download the minimum GGUF model set (cached between runs).
+#   6. Execute the text2music workflow via Comfy-Org/comfy-action which starts
+#      ComfyUI, submits the workflow JSON over the HTTP API, and verifies the
+#      output — the same detection path that would have caught the MPS failure.
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  test-mps:
+    name: E2E test – macOS MPS/Metal (comfy-action)
+    # macos-latest uses an Apple Silicon (M-series) hosted runner, giving us a
+    # real Metal/MPS environment without a self-hosted machine.
+    runs-on: macos-latest
+    permissions:
+      contents: read
+
+    steps:
+      # -----------------------------------------------------------------------
+      # Step 1 – Check out our custom node source to a subdirectory so that
+      # the workspace root can be taken over by the ComfyUI installation that
+      # comfy-action expects to find there (main.py etc.).
+      # -----------------------------------------------------------------------
+      - name: Checkout custom node source
+        uses: actions/checkout@v4
+        with:
+          path: node_src
+
+      # -----------------------------------------------------------------------
+      # Step 2 – Python + comfy-cli setup.
+      # comfy-cli is the official Comfy-Org CLI; it is also installed by
+      # comfy-action itself during its dependency step.
+      # -----------------------------------------------------------------------
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install comfy-cli and build tools
+        run: |
+          pip install comfy-cli
+          brew install cmake
+
+      # -----------------------------------------------------------------------
+      # Step 3 – Install ComfyUI into the workspace root.
+      # After this step $GITHUB_WORKSPACE contains main.py, requirements.txt,
+      # models/, custom_nodes/ etc. — the layout comfy-action expects.
+      # -----------------------------------------------------------------------
+      - name: Install ComfyUI into workspace root
+        run: |
+          comfy install \
+            --workspace "$GITHUB_WORKSPACE" \
+            --version nightly
+
+      # -----------------------------------------------------------------------
+      # Step 4 – Copy our custom node into ComfyUI's custom_nodes directory and
+      # build the C++ binaries.  install.py now passes
+      # -DGGML_METAL_EMBED_LIBRARY=ON so the Metal .metallib is compiled at
+      # cmake build time and embedded in the binary.  This avoids the runtime
+      # JIT compilation step that was crashing with MTLLibraryErrorDomain
+      # errors on certain macOS/Xcode SDK combinations.
+      # -----------------------------------------------------------------------
+      - name: Install custom node into ComfyUI
+        run: |
+          cp -r node_src "$GITHUB_WORKSPACE/custom_nodes/acestep-cpp-comfyui"
+
+      - name: Build C++ binaries (GGML_METAL_EMBED_LIBRARY=ON)
+        run: python "$GITHUB_WORKSPACE/custom_nodes/acestep-cpp-comfyui/install.py"
+
+      # -----------------------------------------------------------------------
+      # Step 5 – Metal initialisation smoke test.
+      # ace-qwen3 initialises the Metal device at process start-up, before it
+      # reads any model file.  Running it against a nonexistent path is
+      # therefore enough to detect the SIGABRT (exit 134) caused by the Metal
+      # JIT shader-compilation failure from the original bug report.
+      # -----------------------------------------------------------------------
+      - name: Metal initialisation smoke test
+        run: |
+          BUILD="$GITHUB_WORKSPACE/custom_nodes/acestep-cpp-comfyui/acestep.cpp/build"
+          if   [ -f "$BUILD/ace-qwen3" ];     then BIN="$BUILD/ace-qwen3"
+          elif [ -f "$BUILD/bin/ace-qwen3" ]; then BIN="$BUILD/bin/ace-qwen3"
+          else echo "ace-qwen3 not found" && exit 1
+          fi
+
+          echo '{"task_type":"text2music","caption":"ci","seed":42}' \
+            > /tmp/smoke.json
+
+          set +e
+          "$BIN" --request /tmp/smoke.json --model /tmp/no-such-model.gguf \
+            2>&1 | head -60
+          EXIT=$?
+          set -e
+
+          if [ "$EXIT" = "134" ]; then
+            echo "FAIL: ace-qwen3 exited with SIGABRT (134)."
+            echo "      Metal shader JIT compilation failed (MTLLibraryErrorDomain)."
+            echo "      Rebuild with GGML_METAL_EMBED_LIBRARY=ON (now the default)"
+            echo "      or use the Builder node with backend=cpu."
+            exit 1
+          fi
+          echo "OK: Metal initialised successfully (exit $EXIT; model-not-found is expected)"
+
+      # -----------------------------------------------------------------------
+      # Step 6 – Download the smallest viable GGUF model set with caching.
+      # We pick the 0.6B LM and Q4_K_M DiT to minimise download size while
+      # still exercising the full ace-qwen3 → dit-vae pipeline.
+      # -----------------------------------------------------------------------
+      - name: Cache GGUF models
+        uses: actions/cache@v4
+        with:
+          path: ~/gguf-model-cache
+          key: gguf-models-0.6B-Q4KM-v1
+
+      - name: Download GGUF models
+        run: |
+          BASE="https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF/resolve/main"
+          CACHE="$HOME/gguf-model-cache"
+          DEST="$GITHUB_WORKSPACE/models/text_encoders"
+          mkdir -p "$CACHE" "$DEST"
+
+          fetch() {
+            local name="$1"
+            if [ -f "$CACHE/$name" ]; then
+              echo "✓ $name (cached)"
+            else
+              echo "↓ $name"
+              curl -fL -o "$CACHE/$name" "$BASE/$name"
+            fi
+            cp "$CACHE/$name" "$DEST/$name"
+          }
+
+          fetch "vae-BF16.gguf"
+          fetch "Qwen3-Embedding-0.6B-Q8_0.gguf"
+          fetch "acestep-5Hz-lm-0.6B-Q8_0.gguf"
+          fetch "acestep-v15-turbo-Q4_K_M.gguf"
+
+      # -----------------------------------------------------------------------
+      # Step 7 – Run the text2music workflow via Comfy-Org/comfy-action.
+      #
+      # At this point $GITHUB_WORKSPACE IS a valid ComfyUI installation
+      # (main.py, requirements.txt, models/, custom_nodes/ all present).
+      # comfy-action will:
+      #   a) re-checkout this repo (our custom node) — harmless because we
+      #      already copied the node into custom_nodes/ above;
+      #   b) install ComfyUI Python deps (requirements.txt already present);
+      #   c) start `python main.py` from $GITHUB_WORKSPACE (ComfyUI root);
+      #   d) submit acestep-cpp-text2music.json via the HTTP API;
+      #   e) verify the output audio file is produced.
+      #
+      # A Metal JIT failure in ace-qwen3 / dit-vae would surface here as the
+      # workflow node raising a RuntimeError, failing the CI job.
+      # -----------------------------------------------------------------------
+      - name: Run text2music workflow via comfy-action (MPS/Metal)
+        uses: Comfy-Org/comfy-action@main
+        with:
+          os: macos
+          python_version: "3.11"
+          torch_version: stable
+          workflow_filenames: acestep-cpp-text2music.json
+          comfyui_flags: "--force-fp16"
+          timeout: "600"
+          models-json: >-
+            {
+              "vae-BF16.gguf":                      { "url": "https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF/resolve/main/vae-BF16.gguf",                      "directory": "text_encoders" },
+              "Qwen3-Embedding-0.6B-Q8_0.gguf":     { "url": "https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF/resolve/main/Qwen3-Embedding-0.6B-Q8_0.gguf",     "directory": "text_encoders" },
+              "acestep-5Hz-lm-0.6B-Q8_0.gguf":      { "url": "https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF/resolve/main/acestep-5Hz-lm-0.6B-Q8_0.gguf",      "directory": "text_encoders" },
+              "acestep-v15-turbo-Q4_K_M.gguf":      { "url": "https://huggingface.co/Serveurperso/ACE-Step-1.5-GGUF/resolve/main/acestep-v15-turbo-Q4_K_M.gguf",      "directory": "text_encoders" }
+            }
diff --git a/install.py b/install.py
@@ -51,7 +51,12 @@ def _detect_backend() -> str:
 def _cmake_flags(backend: str):
     return {
         "cuda":  ["-DGGML_CUDA=ON"],
-        "metal": [],
+        # GGML_METAL_EMBED_LIBRARY=ON pre-compiles Metal shaders at build time
+        # and embeds the resulting binary .metallib in the executable.  This
+        # avoids the Metal JIT compilation that happens at runtime when only
+        # source is embedded, which can fail on certain macOS/Xcode SDK
+        # combinations with template type-mismatch errors in the Metal shader.
+        "metal": ["-DGGML_METAL_EMBED_LIBRARY=ON"],
         "blas":  ["-DGGML_BLAS=ON"],
         "cpu":   [],
     }.get(backend, [])

diff --git a/nodes.py b/nodes.py
@@ -385,7 +385,12 @@ def _cmake_flags(backend: str) -> List[str]:
         """Translate backend name to CMake -D flags."""
         mapping = {
             "cuda":  ["-DGGML_CUDA=ON"],
-            "metal": [],  # Metal is auto-enabled on macOS by ggml
+            # GGML_METAL_EMBED_LIBRARY=ON pre-compiles Metal shaders at build
+            # time and embeds the resulting binary .metallib in the executable.
+            # This avoids the Metal JIT compilation that happens at runtime when
+            # only source is embedded, which can fail on certain macOS/Xcode SDK
+            # combinations with template type-mismatch errors in the Metal shader.
+            "metal": ["-DGGML_METAL_EMBED_LIBRARY=ON"],
             "blas":  ["-DGGML_BLAS=ON"],
             "cpu":   [],
         }
@@ -1033,9 +1038,25 @@ def generate(
                 lm_cmd, capture_output=True, text=True, cwd=tmpdir
             )
             if lm_result.returncode != 0:
+                stderr = lm_result.stderr
+                # Detect Metal shader JIT compilation failure on macOS and give
+                # a targeted hint: rebuild with GGML_METAL_EMBED_LIBRARY=ON (the
+                # default when using the Builder node) so shaders are compiled at
+                # build time rather than at runtime, avoiding this crash.
+                metal_hint = ""
+                if platform.system() == "Darwin" and "MTLLibraryErrorDomain" in stderr:
+                    metal_hint = (
+                        "\n\nMetal shader compilation failed at runtime. "
+                        "Rebuild the binaries using the 'Acestep.cpp Builder' node "
+                        "(or re-run install.py) so that Metal shaders are compiled "
+                        "at build time and embedded as a binary .metallib — this "
+                        "avoids the Metal JIT compilation step that is failing here. "
+                        "Alternatively, select backend='cpu' in the Builder node to "
+                        "skip Metal entirely."
+                    )
                 raise RuntimeError(
                     f"ace-qwen3 failed (exit {lm_result.returncode}):\n"
-                    f"{lm_result.stderr}"
+                    f"{stderr}{metal_hint}"
                 )
 
             lm_output = os.path.join(tmpdir, "request0.json")

diff --git a/tests/test_nodes.py b/tests/test_nodes.py
@@ -279,10 +279,13 @@ def test_cmake_flags_cuda(self):
 
     def test_cmake_flags_blas(self):
         assert "-DGGML_BLAS=ON" in nodes.AcestepCPPBuilder._cmake_flags("blas")
+    def test_cmake_flags_metal_embeds_library(self):
+        """metal backend must pre-compile Metal shaders at build time to avoid
+        runtime JIT compilation failures (MTLLibraryErrorDomain errors) on
+        certain macOS/Xcode SDK combinations."""
+        assert "-DGGML_METAL_EMBED_LIBRARY=ON" in nodes.AcestepCPPBuilder._cmake_flags("metal")
 
 
-# ===========================================================================
-# _binary_in_build — shared helper for multi-location binary search
 # ===========================================================================
 
 class TestBinaryInBuild: