From e464b1835a9ecae449f2f7b5c4a57aafbebc1341 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Mar 2026 16:56:10 +0000
Subject: [PATCH 1/3] Initial plan


From d13135d6de57c3d90ec6e6cb7a644cb68f53c438 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Mar 2026 17:07:33 +0000
Subject: [PATCH 2/3] Fix dit-vae Metal tensor API crash on pre-M5 Apple
 Silicon (M3 Pro Max)

Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com>
---
 .env.example                   | 14 +++++++
 server/src/services/acestep.ts | 77 ++++++++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/.env.example b/.env.example
index ab45d3b..0baaa43 100644
--- a/.env.example
+++ b/.env.example
@@ -45,3 +45,17 @@ JWT_SECRET=ace-step-ui-local-secret
 # ── Optional ──────────────────────────────────────────────────────────────────
 # Pexels API key for video backgrounds — https://www.pexels.com/api/
 # PEXELS_API_KEY=
+
+# ── Binary extra arguments ────────────────────────────────────────────────────
+# Append extra CLI flags to the ace-qwen3 or dit-vae spawn invocations.
+# Useful for hardware-specific tuning or debugging.
+#
+# Apple Silicon M1–M4 users: if dit-vae fails with
+#   "tensor API disabled for pre-M5 and pre-A19 devices"
+# the server will automatically retry with -ngl 0 (CPU-only mode).
+# You can force CPU-only from the start with:
+#   DIT_VAE_EXTRA_ARGS=-ngl 0
+#   ACE_QWEN3_EXTRA_ARGS=-ngl 0
+#
+# DIT_VAE_EXTRA_ARGS=
+# ACE_QWEN3_EXTRA_ARGS=
diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts
index 6259582..1737dc1 100644
--- a/server/src/services/acestep.ts
+++ b/server/src/services/acestep.ts
@@ -199,16 +199,80 @@ function resolveAudioPath(audioUrl: string): string {
 //   <tmpDir>/request0.json → dit-vae   → <tmpDir>/request00.wav
 // ---------------------------------------------------------------------------
 
+/**
+ * Parse a space-separated list of extra CLI arguments from an env variable.
+ * Supports simple quoting: "hello world" is treated as a single argument.
+ * Example: ACE_QWEN3_EXTRA_ARGS="--threads 4" → ['--threads', '4']
+ */
+function parseExtraArgs(envVar: string | undefined): string[] {
+  if (!envVar?.trim()) return [];
+  const args: string[] = [];
+  const re = /(?:[^\s"']+|"[^"]*"|'[^']*')+/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(envVar)) !== null) {
+    args.push(m[0].replace(/^["']|["']$/g, ''));
+  }
+  return args;
+}
+
+// Sentinel text emitted by ggml-metal when the device is pre-M5 / pre-A19.
+// Seeing this in stderr on a non-zero exit means GPU init failed; we retry
+// with -ngl 0 so the binary falls back to CPU-only execution.
+const METAL_TENSOR_API_DISABLED_MSG = 'tensor API disabled for pre-M5 and pre-A19 devices';
+
+/** Build a human-readable error message from a failed binary run (max 2000 chars). */
+function buildBinaryError(label: string, result: { exitCode: number | null; stdout: string; stderr: string }): Error {
+  const msg = (result.stderr || result.stdout || `exit code ${result.exitCode}`).slice(0, 2000);
+  return new Error(`${label} failed: ${msg}`);
+}
+
+/** Returns true when the args already include a GPU-layers disable flag. */
+function hasNglFlag(args: string[]): boolean {
+  return args.includes('-ngl') || args.includes('--n-gpu-layers') || args.includes('--ngl');
+}
+
 /** Run a binary and return its captured stdout/stderr. Throws on non-zero exit. */
 async function runBinary(
   bin: string,
   args: string[],
   label: string,
+  env?: NodeJS.ProcessEnv,
 ): Promise<{ stdout: string; stderr: string }> {
+  const result = await runBinaryOnce(bin, args, label, env);
+
+  // Auto-retry without GPU layers when the Metal tensor API is unavailable
+  // (affects Apple Silicon M1–M4 with newer ggml builds).
+  // Note: extra args from DIT_VAE_EXTRA_ARGS / ACE_QWEN3_EXTRA_ARGS are already
+  // included in `args`, so hasNglFlag() correctly covers user-supplied flags too.
+  if (
+    result.exitCode !== 0 &&
+    result.stderr.includes(METAL_TENSOR_API_DISABLED_MSG) &&
+    !hasNglFlag(args)
+  ) {
+    console.warn(
+      `[Spawn] ${label}: Metal tensor API unavailable on this device — retrying with -ngl 0 (CPU-only)`,
+    );
+    const retry = await runBinaryOnce(bin, ['-ngl', '0', ...args], label, env);
+    if (retry.exitCode !== 0) throw buildBinaryError(label, retry);
+    return { stdout: retry.stdout, stderr: retry.stderr };
+  }
+
+  if (result.exitCode !== 0) throw buildBinaryError(label, result);
+
+  return { stdout: result.stdout, stderr: result.stderr };
+}
+
+/** Internal: spawn once and collect output. Never throws on non-zero exit. */
+function runBinaryOnce(
+  bin: string,
+  args: string[],
+  label: string,
+  env?: NodeJS.ProcessEnv,
+): Promise<{ exitCode: number | null; stdout: string; stderr: string }> {
   return new Promise((resolve, reject) => {
     const proc = spawn(bin, args, {
       shell: false,
-      env:   { ...process.env },
+      env:   { ...process.env, ...env },
       stdio: ['ignore', 'pipe', 'pipe'],
     });
 
@@ -217,14 +281,7 @@ async function runBinary(
     proc.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); });
     proc.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); });
 
-    proc.on('close', (code) => {
-      if (code === 0) {
-        resolve({ stdout, stderr });
-      } else {
-        const msg = (stderr || stdout || `exit code ${code}`).slice(0, 500);
-        reject(new Error(`${label} failed: ${msg}`));
-      }
-    });
+    proc.on('close', (code) => resolve({ exitCode: code, stdout, stderr }));
     proc.on('error', (err) => reject(new Error(`${label} process error: ${err.message}`)));
   });
 }
@@ -294,6 +351,7 @@ async function runViaSpawn(
 
       const batchSize = Math.min(Math.max(params.batchSize ?? 1, 1), 8);
       if (batchSize > 1) lmArgs.push('--batch', String(batchSize));
+      lmArgs.push(...parseExtraArgs(process.env.ACE_QWEN3_EXTRA_ARGS));
 
       console.log(`[Spawn] Job ${jobId}: ace-qwen3 ${lmArgs.slice(0, 6).join(' ')} …`);
       await runBinary(lmBin, lmArgs, 'ace-qwen3');
@@ -344,6 +402,7 @@ async function runViaSpawn(
                                    ditArgs.push('--repainting-start', String(params.repaintingStart));
     if (params.repaintingEnd && params.repaintingEnd > 0)
                                    ditArgs.push('--repainting-end',   String(params.repaintingEnd));
+    ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS));
 
     console.log(`[Spawn] Job ${jobId}: dit-vae ${ditArgs.slice(0, 6).join(' ')} …`);
     await runBinary(ditVaeBin, ditArgs, 'dit-vae');

From 28c4ea9ce837d7270d76758749f83e692684819c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Mar 2026 17:56:08 +0000
Subject: [PATCH 3/3] Remove wrong Metal retry; add live pipeline progress from
 binary stderr

Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com>
---
 .env.example                   |  11 +-
 server/src/services/acestep.ts | 197 +++++++++++++++++++++++++--------
 2 files changed, 153 insertions(+), 55 deletions(-)

diff --git a/.env.example b/.env.example
index 0baaa43..7aebdae 100644
--- a/.env.example
+++ b/.env.example
@@ -48,14 +48,9 @@ JWT_SECRET=ace-step-ui-local-secret
 
 # ── Binary extra arguments ────────────────────────────────────────────────────
 # Append extra CLI flags to the ace-qwen3 or dit-vae spawn invocations.
-# Useful for hardware-specific tuning or debugging.
-#
-# Apple Silicon M1–M4 users: if dit-vae fails with
-#   "tensor API disabled for pre-M5 and pre-A19 devices"
-# the server will automatically retry with -ngl 0 (CPU-only mode).
-# You can force CPU-only from the start with:
-#   DIT_VAE_EXTRA_ARGS=-ngl 0
-#   ACE_QWEN3_EXTRA_ARGS=-ngl 0
+# Useful for hardware-specific tuning or debugging, e.g. limit CPU threads:
+#   ACE_QWEN3_EXTRA_ARGS=--threads 4
+#   DIT_VAE_EXTRA_ARGS=--threads 4
 #
 # DIT_VAE_EXTRA_ARGS=
 # ACE_QWEN3_EXTRA_ARGS=
diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts
index 1737dc1..13f64dc 100644
--- a/server/src/services/acestep.ts
+++ b/server/src/services/acestep.ts
@@ -215,60 +215,23 @@ function parseExtraArgs(envVar: string | undefined): string[] {
   return args;
 }
 
-// Sentinel text emitted by ggml-metal when the device is pre-M5 / pre-A19.
-// Seeing this in stderr on a non-zero exit means GPU init failed; we retry
-// with -ngl 0 so the binary falls back to CPU-only execution.
-const METAL_TENSOR_API_DISABLED_MSG = 'tensor API disabled for pre-M5 and pre-A19 devices';
-
 /** Build a human-readable error message from a failed binary run (max 2000 chars). */
 function buildBinaryError(label: string, result: { exitCode: number | null; stdout: string; stderr: string }): Error {
   const msg = (result.stderr || result.stdout || `exit code ${result.exitCode}`).slice(0, 2000);
   return new Error(`${label} failed: ${msg}`);
 }
 
-/** Returns true when the args already include a GPU-layers disable flag. */
-function hasNglFlag(args: string[]): boolean {
-  return args.includes('-ngl') || args.includes('--n-gpu-layers') || args.includes('--ngl');
-}
-
-/** Run a binary and return its captured stdout/stderr. Throws on non-zero exit. */
-async function runBinary(
+/**
+ * Run a binary, streaming stderr lines to an optional callback, and return
+ * captured output. Throws with a detailed message on non-zero exit.
+ */
+function runBinary(
   bin: string,
   args: string[],
   label: string,
   env?: NodeJS.ProcessEnv,
+  onLine?: (line: string) => void,
 ): Promise<{ stdout: string; stderr: string }> {
-  const result = await runBinaryOnce(bin, args, label, env);
-
-  // Auto-retry without GPU layers when the Metal tensor API is unavailable
-  // (affects Apple Silicon M1–M4 with newer ggml builds).
-  // Note: extra args from DIT_VAE_EXTRA_ARGS / ACE_QWEN3_EXTRA_ARGS are already
-  // included in `args`, so hasNglFlag() correctly covers user-supplied flags too.
-  if (
-    result.exitCode !== 0 &&
-    result.stderr.includes(METAL_TENSOR_API_DISABLED_MSG) &&
-    !hasNglFlag(args)
-  ) {
-    console.warn(
-      `[Spawn] ${label}: Metal tensor API unavailable on this device — retrying with -ngl 0 (CPU-only)`,
-    );
-    const retry = await runBinaryOnce(bin, ['-ngl', '0', ...args], label, env);
-    if (retry.exitCode !== 0) throw buildBinaryError(label, retry);
-    return { stdout: retry.stdout, stderr: retry.stderr };
-  }
-
-  if (result.exitCode !== 0) throw buildBinaryError(label, result);
-
-  return { stdout: result.stdout, stderr: result.stderr };
-}
-
-/** Internal: spawn once and collect output. Never throws on non-zero exit. */
-function runBinaryOnce(
-  bin: string,
-  args: string[],
-  label: string,
-  env?: NodeJS.ProcessEnv,
-): Promise<{ exitCode: number | null; stdout: string; stderr: string }> {
   return new Promise((resolve, reject) => {
     const proc = spawn(bin, args, {
       shell: false,
@@ -278,14 +241,154 @@ function runBinaryOnce(
 
     let stdout = '';
     let stderr = '';
+    let lineBuf = '';
+
     proc.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); });
-    proc.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); });
+    proc.stderr.on('data', (chunk: Buffer) => {
+      const text = chunk.toString();
+      stderr += text;
+      if (onLine) {
+        lineBuf += text;
+        const lines = lineBuf.split('\n');
+        lineBuf = lines.pop() ?? '';
+        for (const line of lines) {
+          const trimmed = line.trim();
+          if (trimmed) onLine(trimmed);
+        }
+      }
+    });
 
-    proc.on('close', (code) => resolve({ exitCode: code, stdout, stderr }));
+    proc.on('close', (code) => {
+      if (code === 0) {
+        resolve({ stdout, stderr });
+      } else {
+        reject(buildBinaryError(label, { exitCode: code, stdout, stderr }));
+      }
+    });
     proc.on('error', (err) => reject(new Error(`${label} process error: ${err.message}`)));
   });
 }
 
+// ---------------------------------------------------------------------------
+// Live progress parsing — translates binary stderr lines into job.stage /
+// job.progress updates that the polling API can return to the frontend.
+//
+// ace-qwen3 progress lines (all on stderr):
+//   [Phase1] step 100, 1 active, 19.0 tok/s   — lyrics LM decode
+//   [Phase1] Decode 15871ms                    — Phase1 complete
+//   [Phase2] max_tokens: 800, …               — captures audio-codes budget
+//   [Decode] step 50, 1 active, 51 total codes, 20.1 tok/s — audio LM decode
+//
+// dit-vae progress lines (all on stderr):
+//   [DiT] Starting: T=…, steps=8, …           — captures DiT step count
+//   [DiT] step 1/8 t=1.000                    — DiT diffusion step N/M
+//   [DiT] Total generation: …                 — DiT complete
+//   [VAE] Tiled decode: 28 tiles …            — VAE starting
+//   [VAE] Tiled decode done: 28 tiles → …    — VAE complete
+//
+// Progress scale:  0–50% ace-qwen3 | 50–100% dit-vae
+// ---------------------------------------------------------------------------
+
+// Progress budget across the two-binary pipeline (must sum to 100):
+//   0–30%   ace-qwen3 Phase1  (lyrics LM decode — step count varies, ~200–400)
+//  30–50%   ace-qwen3 Phase2  (audio-codes LM decode)
+//  50–85%   dit-vae DiT       (diffusion steps — exact N/M known at runtime)
+//  85–100%  dit-vae VAE       (tiled audio decode)
+const PROGRESS_LM_PHASE1_MAX   = 30;  // % at end of Phase1
+const PROGRESS_LM_PHASE2_END   = 50;  // % at end of Phase2 (= start of dit-vae)
+const PROGRESS_DIT_END         = 85;  // % at end of DiT diffusion
+const PROGRESS_VAE_END         = 98;  // % at end of VAE decode (100 set on job success)
+
+/**
+ * Returns an onLine callback for ace-qwen3 stderr that updates job.stage and
+ * job.progress as the LM pipeline progresses (contributes 0–50% overall).
+ */
+function makeLmProgressHandler(job: ActiveJob): (line: string) => void {
+  let phase2MaxTokens = 800;
+  // Phase1 step ceiling: ace-qwen3 typically produces 200–350 lyrics tokens.
+  // 400 is a generous upper bound so the bar reaches ~28% by the end of Phase1.
+  const PHASE1_STEP_CEIL = 400;
+
+  return (line: string) => {
+    // Phase1 LM decode: "[Phase1] step 100, 1 active, 19.0 tok/s"
+    const p1 = line.match(/^\[Phase1\] step (\d+),.*?([\d.]+) tok\/s/);
+    if (p1) {
+      const step = parseInt(p1[1], 10);
+      const rate = p1[2];
+      job.progress = Math.min(PROGRESS_LM_PHASE1_MAX - 2, Math.round((step / PHASE1_STEP_CEIL) * (PROGRESS_LM_PHASE1_MAX - 2)));
+      job.stage    = `LLM: generating lyrics — step ${step} (${rate} tok/s)`;
+      return;
+    }
+    // Phase1 done: "[Phase1] Decode 15871ms"
+    if (/^\[Phase1\] Decode/.test(line)) {
+      job.progress = PROGRESS_LM_PHASE1_MAX;
+      job.stage    = 'LLM: lyrics complete — generating audio codes…';
+      return;
+    }
+    // Phase2 max tokens: "[Phase2] max_tokens: 800, …"
+    const p2m = line.match(/^\[Phase2\] max_tokens:\s*(\d+)/);
+    if (p2m) {
+      phase2MaxTokens = parseInt(p2m[1], 10) || 800;
+      return;
+    }
+    // Phase2 audio-codes decode: "[Decode] step 50, 1 active, 51 total codes, 20.1 tok/s"
+    const p2d = line.match(/^\[Decode\] step \d+,.*?(\d+) total codes,.*?([\d.]+) tok\/s/);
+    if (p2d) {
+      const codes = parseInt(p2d[1], 10);
+      const rate  = p2d[2];
+      const phase2Range = PROGRESS_LM_PHASE2_END - PROGRESS_LM_PHASE1_MAX;
+      job.progress = PROGRESS_LM_PHASE1_MAX + Math.min(phase2Range, Math.round((codes / phase2MaxTokens) * phase2Range));
+      job.stage    = `LLM: audio codes — ${codes}/${phase2MaxTokens} (${rate} tok/s)`;
+    }
+  };
+}
+
+/**
+ * Returns an onLine callback for dit-vae stderr that updates job.stage and
+ * job.progress as the DiT+VAE pipeline progresses (contributes 50–100% overall).
+ */
+function makeDitVaeProgressHandler(job: ActiveJob): (line: string) => void {
+  let ditTotalSteps = 8;
+
+  return (line: string) => {
+    // DiT starting — capture step count: "[DiT] Starting: T=3470, S=1735, …, steps=8, …"
+    const ditStart = line.match(/^\[DiT\] Starting:.*?steps=(\d+)/);
+    if (ditStart) {
+      ditTotalSteps = parseInt(ditStart[1], 10) || 8;
+      return;
+    }
+    // DiT step: "[DiT] step 1/8 t=1.000"
+    const ditStep = line.match(/^\[DiT\] step (\d+)\/(\d+)/);
+    if (ditStep) {
+      const step  = parseInt(ditStep[1], 10);
+      const total = parseInt(ditStep[2], 10);
+      ditTotalSteps = total;
+      const ditRange = PROGRESS_DIT_END - PROGRESS_LM_PHASE2_END;
+      job.progress = PROGRESS_LM_PHASE2_END + Math.round((step / total) * ditRange);
+      job.stage    = `DiT: step ${step}/${total}`;
+      return;
+    }
+    // DiT complete: "[DiT] Total generation: 16200.0 ms …"
+    if (/^\[DiT\] Total generation/.test(line)) {
+      job.progress = PROGRESS_DIT_END;
+      job.stage    = 'VAE: decoding audio…';
+      return;
+    }
+    // VAE starting: "[VAE] Tiled decode: 28 tiles (chunk=256, overlap=64, stride=128)"
+    const vaeStart = line.match(/^\[VAE\] Tiled decode:\s*(\d+) tiles/);
+    if (vaeStart) {
+      job.progress = PROGRESS_DIT_END;
+      job.stage    = `VAE: decoding ${vaeStart[1]} tiles…`;
+      return;
+    }
+    // VAE done: "[VAE] Tiled decode done: 28 tiles → T_audio=…"
+    if (/^\[VAE\] Tiled decode done/.test(line)) {
+      job.progress = PROGRESS_VAE_END;
+      job.stage    = 'VAE: decode complete — writing audio…';
+    }
+  };
+}
+
 async function runViaSpawn(
   jobId: string,
   params: GenerationParams,
@@ -354,7 +457,7 @@ async function runViaSpawn(
       lmArgs.push(...parseExtraArgs(process.env.ACE_QWEN3_EXTRA_ARGS));
 
       console.log(`[Spawn] Job ${jobId}: ace-qwen3 ${lmArgs.slice(0, 6).join(' ')} …`);
-      await runBinary(lmBin, lmArgs, 'ace-qwen3');
+      await runBinary(lmBin, lmArgs, 'ace-qwen3', undefined, makeLmProgressHandler(job));
 
       // Collect enriched JSON files produced by ace-qwen3:
       // request.json → request0.json [, request1.json, …] (placed alongside request.json)
@@ -405,7 +508,7 @@ async function runViaSpawn(
     ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS));
 
     console.log(`[Spawn] Job ${jobId}: dit-vae ${ditArgs.slice(0, 6).join(' ')} …`);
-    await runBinary(ditVaeBin, ditArgs, 'dit-vae');
+    await runBinary(ditVaeBin, ditArgs, 'dit-vae', undefined, makeDitVaeProgressHandler(job));
 
     // ── Collect generated WAV files ─────────────────────────────────────────
     // dit-vae places output WAVs alongside each enriched JSON: