From e464b1835a9ecae449f2f7b5c4a57aafbebc1341 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 16:56:10 +0000 Subject: [PATCH 1/3] Initial plan From d13135d6de57c3d90ec6e6cb7a644cb68f53c438 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 17:07:33 +0000 Subject: [PATCH 2/3] Fix dit-vae Metal tensor API crash on pre-M5 Apple Silicon (M3 Pro Max) Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- .env.example | 14 +++++++ server/src/services/acestep.ts | 77 ++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index ab45d3b..0baaa43 100644 --- a/.env.example +++ b/.env.example @@ -45,3 +45,17 @@ JWT_SECRET=ace-step-ui-local-secret # ── Optional ────────────────────────────────────────────────────────────────── # Pexels API key for video backgrounds — https://www.pexels.com/api/ # PEXELS_API_KEY= + +# ── Binary extra arguments ──────────────────────────────────────────────────── +# Append extra CLI flags to the ace-qwen3 or dit-vae spawn invocations. +# Useful for hardware-specific tuning or debugging. +# +# Apple Silicon M1–M4 users: if dit-vae fails with +# "tensor API disabled for pre-M5 and pre-A19 devices" +# the server will automatically retry with -ngl 0 (CPU-only mode). +# You can force CPU-only from the start with: +# DIT_VAE_EXTRA_ARGS=-ngl 0 +# ACE_QWEN3_EXTRA_ARGS=-ngl 0 +# +# DIT_VAE_EXTRA_ARGS= +# ACE_QWEN3_EXTRA_ARGS= diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts index 6259582..1737dc1 100644 --- a/server/src/services/acestep.ts +++ b/server/src/services/acestep.ts @@ -199,16 +199,80 @@ function resolveAudioPath(audioUrl: string): string { // /request0.json → dit-vae → /request00.wav // --------------------------------------------------------------------------- +/** + * Parse a space-separated list of extra CLI arguments from an env variable. + * Supports simple quoting: "hello world" is treated as a single argument. + * Example: ACE_QWEN3_EXTRA_ARGS="--threads 4" → ['--threads', '4'] + */ +function parseExtraArgs(envVar: string | undefined): string[] { + if (!envVar?.trim()) return []; + const args: string[] = []; + const re = /(?:[^\s"']+|"[^"]*"|'[^']*')+/g; + let m: RegExpExecArray | null; + while ((m = re.exec(envVar)) !== null) { + args.push(m[0].replace(/^["']|["']$/g, '')); + } + return args; +} + +// Sentinel text emitted by ggml-metal when the device is pre-M5 / pre-A19. +// Seeing this in stderr on a non-zero exit means GPU init failed; we retry +// with -ngl 0 so the binary falls back to CPU-only execution. +const METAL_TENSOR_API_DISABLED_MSG = 'tensor API disabled for pre-M5 and pre-A19 devices'; + +/** Build a human-readable error message from a failed binary run (max 2000 chars). */ +function buildBinaryError(label: string, result: { exitCode: number | null; stdout: string; stderr: string }): Error { + const msg = (result.stderr || result.stdout || `exit code ${result.exitCode}`).slice(0, 2000); + return new Error(`${label} failed: ${msg}`); +} + +/** Returns true when the args already include a GPU-layers disable flag. */ +function hasNglFlag(args: string[]): boolean { + return args.includes('-ngl') || args.includes('--n-gpu-layers') || args.includes('--ngl'); +} + /** Run a binary and return its captured stdout/stderr. Throws on non-zero exit. */ async function runBinary( bin: string, args: string[], label: string, + env?: NodeJS.ProcessEnv, ): Promise<{ stdout: string; stderr: string }> { + const result = await runBinaryOnce(bin, args, label, env); + + // Auto-retry without GPU layers when the Metal tensor API is unavailable + // (affects Apple Silicon M1–M4 with newer ggml builds). + // Note: extra args from DIT_VAE_EXTRA_ARGS / ACE_QWEN3_EXTRA_ARGS are already + // included in `args`, so hasNglFlag() correctly covers user-supplied flags too. + if ( + result.exitCode !== 0 && + result.stderr.includes(METAL_TENSOR_API_DISABLED_MSG) && + !hasNglFlag(args) + ) { + console.warn( + `[Spawn] ${label}: Metal tensor API unavailable on this device — retrying with -ngl 0 (CPU-only)`, + ); + const retry = await runBinaryOnce(bin, ['-ngl', '0', ...args], label, env); + if (retry.exitCode !== 0) throw buildBinaryError(label, retry); + return { stdout: retry.stdout, stderr: retry.stderr }; + } + + if (result.exitCode !== 0) throw buildBinaryError(label, result); + + return { stdout: result.stdout, stderr: result.stderr }; +} + +/** Internal: spawn once and collect output. Never throws on non-zero exit. */ +function runBinaryOnce( + bin: string, + args: string[], + label: string, + env?: NodeJS.ProcessEnv, +): Promise<{ exitCode: number | null; stdout: string; stderr: string }> { return new Promise((resolve, reject) => { const proc = spawn(bin, args, { shell: false, - env: { ...process.env }, + env: { ...process.env, ...env }, stdio: ['ignore', 'pipe', 'pipe'], }); @@ -217,14 +281,7 @@ async function runBinary( proc.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); }); proc.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); }); - proc.on('close', (code) => { - if (code === 0) { - resolve({ stdout, stderr }); - } else { - const msg = (stderr || stdout || `exit code ${code}`).slice(0, 500); - reject(new Error(`${label} failed: ${msg}`)); - } - }); + proc.on('close', (code) => resolve({ exitCode: code, stdout, stderr })); proc.on('error', (err) => reject(new Error(`${label} process error: ${err.message}`))); }); } @@ -294,6 +351,7 @@ async function runViaSpawn( const batchSize = Math.min(Math.max(params.batchSize ?? 1, 1), 8); if (batchSize > 1) lmArgs.push('--batch', String(batchSize)); + lmArgs.push(...parseExtraArgs(process.env.ACE_QWEN3_EXTRA_ARGS)); console.log(`[Spawn] Job ${jobId}: ace-qwen3 ${lmArgs.slice(0, 6).join(' ')} …`); await runBinary(lmBin, lmArgs, 'ace-qwen3'); @@ -344,6 +402,7 @@ async function runViaSpawn( ditArgs.push('--repainting-start', String(params.repaintingStart)); if (params.repaintingEnd && params.repaintingEnd > 0) ditArgs.push('--repainting-end', String(params.repaintingEnd)); + ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS)); console.log(`[Spawn] Job ${jobId}: dit-vae ${ditArgs.slice(0, 6).join(' ')} …`); await runBinary(ditVaeBin, ditArgs, 'dit-vae'); From 28c4ea9ce837d7270d76758749f83e692684819c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 17:56:08 +0000 Subject: [PATCH 3/3] Remove wrong Metal retry; add live pipeline progress from binary stderr Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- .env.example | 11 +- server/src/services/acestep.ts | 197 +++++++++++++++++++++++++-------- 2 files changed, 153 insertions(+), 55 deletions(-) diff --git a/.env.example b/.env.example index 0baaa43..7aebdae 100644 --- a/.env.example +++ b/.env.example @@ -48,14 +48,9 @@ JWT_SECRET=ace-step-ui-local-secret # ── Binary extra arguments ──────────────────────────────────────────────────── # Append extra CLI flags to the ace-qwen3 or dit-vae spawn invocations. -# Useful for hardware-specific tuning or debugging. -# -# Apple Silicon M1–M4 users: if dit-vae fails with -# "tensor API disabled for pre-M5 and pre-A19 devices" -# the server will automatically retry with -ngl 0 (CPU-only mode). -# You can force CPU-only from the start with: -# DIT_VAE_EXTRA_ARGS=-ngl 0 -# ACE_QWEN3_EXTRA_ARGS=-ngl 0 +# Useful for hardware-specific tuning or debugging, e.g. limit CPU threads: +# ACE_QWEN3_EXTRA_ARGS=--threads 4 +# DIT_VAE_EXTRA_ARGS=--threads 4 # # DIT_VAE_EXTRA_ARGS= # ACE_QWEN3_EXTRA_ARGS= diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts index 1737dc1..13f64dc 100644 --- a/server/src/services/acestep.ts +++ b/server/src/services/acestep.ts @@ -215,60 +215,23 @@ function parseExtraArgs(envVar: string | undefined): string[] { return args; } -// Sentinel text emitted by ggml-metal when the device is pre-M5 / pre-A19. -// Seeing this in stderr on a non-zero exit means GPU init failed; we retry -// with -ngl 0 so the binary falls back to CPU-only execution. -const METAL_TENSOR_API_DISABLED_MSG = 'tensor API disabled for pre-M5 and pre-A19 devices'; - /** Build a human-readable error message from a failed binary run (max 2000 chars). */ function buildBinaryError(label: string, result: { exitCode: number | null; stdout: string; stderr: string }): Error { const msg = (result.stderr || result.stdout || `exit code ${result.exitCode}`).slice(0, 2000); return new Error(`${label} failed: ${msg}`); } -/** Returns true when the args already include a GPU-layers disable flag. */ -function hasNglFlag(args: string[]): boolean { - return args.includes('-ngl') || args.includes('--n-gpu-layers') || args.includes('--ngl'); -} - -/** Run a binary and return its captured stdout/stderr. Throws on non-zero exit. */ -async function runBinary( +/** + * Run a binary, streaming stderr lines to an optional callback, and return + * captured output. Throws with a detailed message on non-zero exit. + */ +function runBinary( bin: string, args: string[], label: string, env?: NodeJS.ProcessEnv, + onLine?: (line: string) => void, ): Promise<{ stdout: string; stderr: string }> { - const result = await runBinaryOnce(bin, args, label, env); - - // Auto-retry without GPU layers when the Metal tensor API is unavailable - // (affects Apple Silicon M1–M4 with newer ggml builds). - // Note: extra args from DIT_VAE_EXTRA_ARGS / ACE_QWEN3_EXTRA_ARGS are already - // included in `args`, so hasNglFlag() correctly covers user-supplied flags too. - if ( - result.exitCode !== 0 && - result.stderr.includes(METAL_TENSOR_API_DISABLED_MSG) && - !hasNglFlag(args) - ) { - console.warn( - `[Spawn] ${label}: Metal tensor API unavailable on this device — retrying with -ngl 0 (CPU-only)`, - ); - const retry = await runBinaryOnce(bin, ['-ngl', '0', ...args], label, env); - if (retry.exitCode !== 0) throw buildBinaryError(label, retry); - return { stdout: retry.stdout, stderr: retry.stderr }; - } - - if (result.exitCode !== 0) throw buildBinaryError(label, result); - - return { stdout: result.stdout, stderr: result.stderr }; -} - -/** Internal: spawn once and collect output. Never throws on non-zero exit. */ -function runBinaryOnce( - bin: string, - args: string[], - label: string, - env?: NodeJS.ProcessEnv, -): Promise<{ exitCode: number | null; stdout: string; stderr: string }> { return new Promise((resolve, reject) => { const proc = spawn(bin, args, { shell: false, @@ -278,14 +241,154 @@ function runBinaryOnce( let stdout = ''; let stderr = ''; + let lineBuf = ''; + proc.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); }); - proc.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); }); + proc.stderr.on('data', (chunk: Buffer) => { + const text = chunk.toString(); + stderr += text; + if (onLine) { + lineBuf += text; + const lines = lineBuf.split('\n'); + lineBuf = lines.pop() ?? ''; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed) onLine(trimmed); + } + } + }); - proc.on('close', (code) => resolve({ exitCode: code, stdout, stderr })); + proc.on('close', (code) => { + if (code === 0) { + resolve({ stdout, stderr }); + } else { + reject(buildBinaryError(label, { exitCode: code, stdout, stderr })); + } + }); proc.on('error', (err) => reject(new Error(`${label} process error: ${err.message}`))); }); } +// --------------------------------------------------------------------------- +// Live progress parsing — translates binary stderr lines into job.stage / +// job.progress updates that the polling API can return to the frontend. +// +// ace-qwen3 progress lines (all on stderr): +// [Phase1] step 100, 1 active, 19.0 tok/s — lyrics LM decode +// [Phase1] Decode 15871ms — Phase1 complete +// [Phase2] max_tokens: 800, … — captures audio-codes budget +// [Decode] step 50, 1 active, 51 total codes, 20.1 tok/s — audio LM decode +// +// dit-vae progress lines (all on stderr): +// [DiT] Starting: T=…, steps=8, … — captures DiT step count +// [DiT] step 1/8 t=1.000 — DiT diffusion step N/M +// [DiT] Total generation: … — DiT complete +// [VAE] Tiled decode: 28 tiles … — VAE starting +// [VAE] Tiled decode done: 28 tiles → … — VAE complete +// +// Progress scale: 0–50% ace-qwen3 | 50–100% dit-vae +// --------------------------------------------------------------------------- + +// Progress budget across the two-binary pipeline (must sum to 100): +// 0–30% ace-qwen3 Phase1 (lyrics LM decode — step count varies, ~200–400) +// 30–50% ace-qwen3 Phase2 (audio-codes LM decode) +// 50–85% dit-vae DiT (diffusion steps — exact N/M known at runtime) +// 85–100% dit-vae VAE (tiled audio decode) +const PROGRESS_LM_PHASE1_MAX = 30; // % at end of Phase1 +const PROGRESS_LM_PHASE2_END = 50; // % at end of Phase2 (= start of dit-vae) +const PROGRESS_DIT_END = 85; // % at end of DiT diffusion +const PROGRESS_VAE_END = 98; // % at end of VAE decode (100 set on job success) + +/** + * Returns an onLine callback for ace-qwen3 stderr that updates job.stage and + * job.progress as the LM pipeline progresses (contributes 0–50% overall). + */ +function makeLmProgressHandler(job: ActiveJob): (line: string) => void { + let phase2MaxTokens = 800; + // Phase1 step ceiling: ace-qwen3 typically produces 200–350 lyrics tokens. + // 400 is a generous upper bound so the bar reaches ~28% by the end of Phase1. + const PHASE1_STEP_CEIL = 400; + + return (line: string) => { + // Phase1 LM decode: "[Phase1] step 100, 1 active, 19.0 tok/s" + const p1 = line.match(/^\[Phase1\] step (\d+),.*?([\d.]+) tok\/s/); + if (p1) { + const step = parseInt(p1[1], 10); + const rate = p1[2]; + job.progress = Math.min(PROGRESS_LM_PHASE1_MAX - 2, Math.round((step / PHASE1_STEP_CEIL) * (PROGRESS_LM_PHASE1_MAX - 2))); + job.stage = `LLM: generating lyrics — step ${step} (${rate} tok/s)`; + return; + } + // Phase1 done: "[Phase1] Decode 15871ms" + if (/^\[Phase1\] Decode/.test(line)) { + job.progress = PROGRESS_LM_PHASE1_MAX; + job.stage = 'LLM: lyrics complete — generating audio codes…'; + return; + } + // Phase2 max tokens: "[Phase2] max_tokens: 800, …" + const p2m = line.match(/^\[Phase2\] max_tokens:\s*(\d+)/); + if (p2m) { + phase2MaxTokens = parseInt(p2m[1], 10) || 800; + return; + } + // Phase2 audio-codes decode: "[Decode] step 50, 1 active, 51 total codes, 20.1 tok/s" + const p2d = line.match(/^\[Decode\] step \d+,.*?(\d+) total codes,.*?([\d.]+) tok\/s/); + if (p2d) { + const codes = parseInt(p2d[1], 10); + const rate = p2d[2]; + const phase2Range = PROGRESS_LM_PHASE2_END - PROGRESS_LM_PHASE1_MAX; + job.progress = PROGRESS_LM_PHASE1_MAX + Math.min(phase2Range, Math.round((codes / phase2MaxTokens) * phase2Range)); + job.stage = `LLM: audio codes — ${codes}/${phase2MaxTokens} (${rate} tok/s)`; + } + }; +} + +/** + * Returns an onLine callback for dit-vae stderr that updates job.stage and + * job.progress as the DiT+VAE pipeline progresses (contributes 50–100% overall). + */ +function makeDitVaeProgressHandler(job: ActiveJob): (line: string) => void { + let ditTotalSteps = 8; + + return (line: string) => { + // DiT starting — capture step count: "[DiT] Starting: T=3470, S=1735, …, steps=8, …" + const ditStart = line.match(/^\[DiT\] Starting:.*?steps=(\d+)/); + if (ditStart) { + ditTotalSteps = parseInt(ditStart[1], 10) || 8; + return; + } + // DiT step: "[DiT] step 1/8 t=1.000" + const ditStep = line.match(/^\[DiT\] step (\d+)\/(\d+)/); + if (ditStep) { + const step = parseInt(ditStep[1], 10); + const total = parseInt(ditStep[2], 10); + ditTotalSteps = total; + const ditRange = PROGRESS_DIT_END - PROGRESS_LM_PHASE2_END; + job.progress = PROGRESS_LM_PHASE2_END + Math.round((step / total) * ditRange); + job.stage = `DiT: step ${step}/${total}`; + return; + } + // DiT complete: "[DiT] Total generation: 16200.0 ms …" + if (/^\[DiT\] Total generation/.test(line)) { + job.progress = PROGRESS_DIT_END; + job.stage = 'VAE: decoding audio…'; + return; + } + // VAE starting: "[VAE] Tiled decode: 28 tiles (chunk=256, overlap=64, stride=128)" + const vaeStart = line.match(/^\[VAE\] Tiled decode:\s*(\d+) tiles/); + if (vaeStart) { + job.progress = PROGRESS_DIT_END; + job.stage = `VAE: decoding ${vaeStart[1]} tiles…`; + return; + } + // VAE done: "[VAE] Tiled decode done: 28 tiles → T_audio=…" + if (/^\[VAE\] Tiled decode done/.test(line)) { + job.progress = PROGRESS_VAE_END; + job.stage = 'VAE: decode complete — writing audio…'; + } + }; +} + async function runViaSpawn( jobId: string, params: GenerationParams, @@ -354,7 +457,7 @@ async function runViaSpawn( lmArgs.push(...parseExtraArgs(process.env.ACE_QWEN3_EXTRA_ARGS)); console.log(`[Spawn] Job ${jobId}: ace-qwen3 ${lmArgs.slice(0, 6).join(' ')} …`); - await runBinary(lmBin, lmArgs, 'ace-qwen3'); + await runBinary(lmBin, lmArgs, 'ace-qwen3', undefined, makeLmProgressHandler(job)); // Collect enriched JSON files produced by ace-qwen3: // request.json → request0.json [, request1.json, …] (placed alongside request.json) @@ -405,7 +508,7 @@ async function runViaSpawn( ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS)); console.log(`[Spawn] Job ${jobId}: dit-vae ${ditArgs.slice(0, 6).join(' ')} …`); - await runBinary(ditVaeBin, ditArgs, 'dit-vae'); + await runBinary(ditVaeBin, ditArgs, 'dit-vae', undefined, makeDitVaeProgressHandler(job)); // ── Collect generated WAV files ───────────────────────────────────────── // dit-vae places output WAVs alongside each enriched JSON: