From c69c75508607f35130eb9c7ddbbf05b82340dd3c Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Thu, 21 May 2026 00:11:34 +0800 Subject: [PATCH] fix(kosong): sanitize surrogates before Kimi requests --- .../kosong/src/kosong/chat_provider/kimi.py | 19 +++++++++-- packages/kosong/tests/test_chat_provider.py | 32 +++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/packages/kosong/src/kosong/chat_provider/kimi.py b/packages/kosong/src/kosong/chat_provider/kimi.py index 82b6e5ac3..acae745b4 100644 --- a/packages/kosong/src/kosong/chat_provider/kimi.py +++ b/packages/kosong/src/kosong/chat_provider/kimi.py @@ -157,7 +157,9 @@ async def generate( ) -> "KimiStreamedMessage": messages: list[ChatCompletionMessageParam] = [] if system_prompt: - messages.append({"role": "system", "content": system_prompt}) + messages.append( + {"role": "system", "content": _sanitize_surrogate_strings(system_prompt)} + ) messages.extend(_convert_message(message) for message in history) generation_kwargs: dict[str, Any] = { @@ -329,7 +331,20 @@ def _convert_message(message: Message) -> ChatCompletionMessageParam: dumped_message.pop("content", None) if reasoning_content: dumped_message["reasoning_content"] = reasoning_content - return cast(ChatCompletionMessageParam, dumped_message) + return cast(ChatCompletionMessageParam, _sanitize_surrogate_strings(dumped_message)) + + +def _sanitize_surrogate_strings(value: Any) -> Any: + if isinstance(value, str): + return value.encode("utf-8", errors="surrogatepass").decode("utf-8", errors="replace") + if isinstance(value, list): + return [_sanitize_surrogate_strings(item) for item in value] + if isinstance(value, dict): + return { + _sanitize_surrogate_strings(key): _sanitize_surrogate_strings(item) + for key, item in value.items() + } + return value def _is_effectively_empty_content_parts(content: Sequence[ContentPart]) -> bool: diff --git a/packages/kosong/tests/test_chat_provider.py b/packages/kosong/tests/test_chat_provider.py index 79747ca52..b0d735d3a 100644 --- a/packages/kosong/tests/test_chat_provider.py +++ b/packages/kosong/tests/test_chat_provider.py @@ -1,4 +1,7 @@ import asyncio +import json +from types import SimpleNamespace +from typing import Any, cast from kosong.chat_provider import APIStatusError, StreamedMessagePart from kosong.chat_provider.chaos import ChaosChatProvider, ChaosConfig @@ -41,3 +44,32 @@ async def test_chaos_chat_provider(): raise AssertionError("Expected APIStatusError") except APIStatusError: pass + + +async def test_kimi_sanitizes_surrogates_before_request(): + class FakeCompletions: + def __init__(self) -> None: + self.kwargs: dict[str, Any] = {} + + async def create(self, **kwargs: Any) -> Any: + request = dict(kwargs) + request["tools"] = list(request["tools"]) + json.dumps(request, ensure_ascii=False).encode("utf-8") + self.kwargs = request + return SimpleNamespace() + + completions = FakeCompletions() + chat_provider = Kimi(model="dummy", api_key="sk-1234567890") + chat_provider.client = cast( + Any, SimpleNamespace(chat=SimpleNamespace(completions=completions)) + ) + + await chat_provider.generate( + system_prompt="system\udca9", + tools=[], + history=[Message(role="user", content=[TextPart(text="hello\udca9")])], + ) + + messages = completions.kwargs["messages"] + assert "\ufffd" in messages[0]["content"] + assert "\ufffd" in messages[1]["content"]