From 353b5bf957b1f5fde3eeb9e06e074f217cdca2e2 Mon Sep 17 00:00:00 2001 From: Terry Carson YM Date: Wed, 13 May 2026 11:51:21 +0800 Subject: [PATCH 1/2] fix: sanitize lone Unicode surrogates to prevent JSON serialization errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #1440 When gstack captures pages containing lone Unicode surrogate characters (unpaired \uD800-\uDFFF range), JSON serialization fails with: "API Error: 400 The request body is not valid JSON: no low surrogate in string" This typically occurs with special characters, emoji, or malformed text in page content, screenshots, or DOM text that gets serialized and sent to the Claude API. ## Solution Added `sanitizeLoneSurrogates()` function that: - Detects lone surrogate characters (high surrogates without following low surrogates, or low surrogates without preceding high surrogates) - Replaces them with \uFFFD (Unicode replacement character) - Preserves valid surrogate pairs (properly paired high+low surrogates) Applied sanitization in `handleCommand()` before creating HTTP responses, ensuring all command results are safe for JSON serialization. ## Impact - Prevents 400 errors when browsing pages with special Unicode characters - No user-visible change for valid Unicode content - Lone surrogates (which are invalid Unicode anyway) are replaced with � Co-Authored-By: Claude Opus 4.6 (1M context) --- browse/src/server.ts | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/browse/src/server.ts b/browse/src/server.ts index 81af14acdb..fcbbacd9fa 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -59,6 +59,34 @@ import * as net from 'net'; import * as path from 'path'; import * as crypto from 'crypto'; +// ─── Unicode Sanitization ─────────────────────────────────────── +/** + * Remove lone Unicode surrogate characters (unpaired \uD800–\uDFFF) from a string. + * Lone surrogates cause JSON.stringify to fail with "no low surrogate in string" errors. + * Replaces them with \uFFFD (Unicode replacement character). + */ +function sanitizeLoneSurrogates(str: string): string { + return str.replace(/[\uD800-\uDFFF]/g, (match, offset) => { + const code = match.charCodeAt(0); + // High surrogate (0xD800-0xDBFF) should be followed by low surrogate (0xDC00-0xDFFF) + if (code >= 0xD800 && code <= 0xDBFF) { + const next = str.charCodeAt(offset + 1); + if (next >= 0xDC00 && next <= 0xDFFF) { + return match; // Valid surrogate pair, keep it + } + } + // Low surrogate (0xDC00-0xDFFF) should be preceded by high surrogate + if (code >= 0xDC00 && code <= 0xDFFF) { + const prev = str.charCodeAt(offset - 1); + if (prev >= 0xD800 && prev <= 0xDBFF) { + return match; // Valid surrogate pair, keep it + } + } + // Lone surrogate - replace with replacement character + return '\uFFFD'; + }); +} + // ─── Config ───────────────────────────────────────────────────── const config = resolveConfig(); ensureStateDir(config); @@ -928,7 +956,9 @@ async function handleCommandInternal( async function handleCommand(body: any, tokenInfo?: TokenInfo | null): Promise { const cr = await handleCommandInternal(body, tokenInfo); const contentType = cr.json ? 'application/json' : 'text/plain'; - return new Response(cr.result, { + // Sanitize lone Unicode surrogates to prevent JSON serialization errors + const sanitizedResult = sanitizeLoneSurrogates(cr.result); + return new Response(sanitizedResult, { status: cr.status, headers: { 'Content-Type': contentType, ...cr.headers }, }); From 35251b407a47a35905e2c361050510982b1201a2 Mon Sep 17 00:00:00 2001 From: Terry Carson YM Date: Wed, 13 May 2026 12:52:13 +0800 Subject: [PATCH 2/2] chore: bump VERSION to 1.33.3.0 Co-Authored-By: Claude Opus 4.6 (1M context) --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0df2c524d3..06277cf607 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.33.2.0 +1.33.3.0