From 950c6b0af1b7f957c78fd1058c7ad5036e62b03e Mon Sep 17 00:00:00 2001 From: RayNa Date: Fri, 3 Apr 2026 21:37:05 +0800 Subject: [PATCH 1/4] fix: filter unsupported content types and add reasoning_key support Fixes #796: Filter out VideoURLPart/AudioURLPart when sending to APIs that don't support them. - Add `supported_content_types` parameter to OpenAILegacy constructor - Auto-detect supported types from model capabilities (image_in) - Filter unsupported content in `_convert_message` Fixes #1155: Add `reasoning_key` config field for OpenAI-compatible APIs. - Add `reasoning_key` field to LLMProvider config - Pass `reasoning_key` to OpenAILegacy to extract thinking content Changes: - packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py: Add supported_content_types param - src/kimi_cli/config.py: Add reasoning_key and supported_content_types fields - src/kimi_cli/llm.py: Pass reasoning_key and auto-detect content types from model caps Co-Authored-By: Claude --- .../contrib/chat_provider/openai_legacy.py | 34 +++++++++++++++++-- src/kimi_cli/config.py | 9 +++++ src/kimi_cli/llm.py | 26 ++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py index ef736360b..864f5820a 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py @@ -30,7 +30,15 @@ tool_to_openai, ) from kosong.contrib.chat_provider.common import ToolMessageConversion -from kosong.message import ContentPart, Message, TextPart, ThinkPart, ToolCall, ToolCallPart +from kosong.message import ( + ContentPart, + ImageURLPart, + Message, + TextPart, + ThinkPart, + ToolCall, + ToolCallPart, +) from kosong.tooling import Tool if TYPE_CHECKING: @@ -77,6 +85,7 @@ def __init__( stream: bool = True, reasoning_key: str | None = None, tool_message_conversion: ToolMessageConversion | None = None, + supported_content_types: set[type[ContentPart]] | None = None, **client_kwargs: Any, ): """ @@ -84,6 +93,11 @@ def __init__( To support OpenAI-compatible APIs that inject reasoning content in a extra field in the message, such as `{"reasoning": ...}`, `reasoning_key` can be set to the key name. + + Args: + supported_content_types: Set of ContentPart types that the API supports. + If None, defaults to {TextPart, ImageURLPart, ToolCall, ToolCallPart}. + Set to {TextPart, ToolCall, ToolCallPart} for APIs that don't support images. """ self.model = model self.stream = stream @@ -100,6 +114,13 @@ def __init__( self._reasoning_key = reasoning_key self._tool_message_conversion: ToolMessageConversion | None = tool_message_conversion self._generation_kwargs: OpenAILegacy.GenerationKwargs = {} + # Default supported content types: TextPart, ImageURLPart (OpenAI standard), and tool-related + self._supported_content_types: set[type[ContentPart]] = supported_content_types or { + TextPart, + ImageURLPart, + ToolCall, + ToolCallPart, + } @property def model_name(self) -> str: @@ -193,7 +214,12 @@ def model_parameters(self) -> dict[str, Any]: return model_parameters def _convert_message(self, message: Message) -> ChatCompletionMessageParam: - """Convert a Kosong message to OpenAI message.""" + """Convert a Kosong message to OpenAI message. + + Filters out content parts that are not supported by the API (e.g., VideoURLPart, AudioURLPart). + This prevents API errors when sending messages containing multimedia content to APIs that + don't support them. See: https://github.com/MoonshotAI/kimi-cli/issues/796 + """ # Note: for openai, `developer` role is more standard, but `system` is still accepted. # And many openai-compatible models do not accept `developer` role. # So we use `system` role here. OpenAIResponses will use `developer` role. @@ -204,8 +230,10 @@ def _convert_message(self, message: Message) -> ChatCompletionMessageParam: for part in message.content: if isinstance(part, ThinkPart): reasoning_content += part.think - else: + elif type(part) in self._supported_content_types: + # Only include content types supported by this API content.append(part) + # else: filter out unsupported content types (e.g., VideoURLPart, AudioURLPart) # if tool message and `tool_result_conversion` is `extract_text`, patch all text parts into # one so that we can make use of the serialization process of `Message` to output string if message.role == "tool" and self._tool_message_conversion == "extract_text": diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py index 78019555a..a73ef5caa 100644 --- a/src/kimi_cli/config.py +++ b/src/kimi_cli/config.py @@ -47,6 +47,15 @@ class LLMProvider(BaseModel): """Custom headers to include in API requests""" oauth: OAuthRef | None = None """OAuth credential reference (do not store tokens here).""" + reasoning_key: str | None = None + """Key name for reasoning/thinking content in API response (e.g., 'reasoning_content'). + Used by openai_legacy provider to extract thinking content from OpenAI-compatible APIs. + See: https://github.com/MoonshotAI/kimi-cli/issues/1155""" + supported_content_types: list[str] | None = None + """List of supported content part types for openai_legacy provider. + Valid values: 'text', 'image_url'. If not specified, defaults to ['text', 'image_url']. + Note: 'video_url' and 'audio_url' are filtered out by default as most APIs don't support them. + See: https://github.com/MoonshotAI/kimi-cli/issues/796""" @field_serializer("api_key", when_used="json") def dump_secret(self, v: SecretStr): diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py index 4e9801965..9a0780c5e 100644 --- a/src/kimi_cli/llm.py +++ b/src/kimi_cli/llm.py @@ -147,12 +147,38 @@ def create_llm( chat_provider = chat_provider.with_generation_kwargs(**gen_kwargs) case "openai_legacy": from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy + from kosong.message import ContentPart, ImageURLPart, TextPart + + # Build supported content types based on config or model capabilities + supported_types: set[type[ContentPart]] | None = None + if provider.supported_content_types is not None: + # User explicitly specified supported types + type_map: dict[str, type[ContentPart]] = { + "text": TextPart, + "image_url": ImageURLPart, + } + supported_types = { + type_map[t] for t in provider.supported_content_types if t in type_map + } + else: + # Auto-detect from model capabilities + model_caps = derive_model_capabilities(model) + supported_types = {TextPart} + if "image_in" in model_caps: + supported_types.add(ImageURLPart) + + # Always include ToolCall and ToolCallPart for tool support + from kosong.message import ToolCall, ToolCallPart + + supported_types.update({ToolCall, ToolCallPart}) chat_provider = OpenAILegacy( model=model.model, base_url=provider.base_url, api_key=resolved_api_key, default_headers=dict(provider.custom_headers) if provider.custom_headers else None, + reasoning_key=provider.reasoning_key, + supported_content_types=supported_types, ) case "openai_responses": from kosong.contrib.chat_provider.openai_responses import OpenAIResponses From c7196cb31512880f1774222641a79331181cb800 Mon Sep 17 00:00:00 2001 From: RayNa Date: Sat, 4 Apr 2026 09:46:56 +0800 Subject: [PATCH 2/4] fix: remove ToolCall/ToolCallPart from supported_content_types ToolCall and ToolCallPart are NOT ContentPart subclasses. They should not be in supported_content_types as they are handled separately by message serialization. This fixes a regression that caused "Backend buffer overflow" errors. Co-Authored-By: Claude --- .../contrib/chat_provider/openai_legacy.py | 20 +++++++++++-------- src/kimi_cli/llm.py | 13 +++++------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py index 864f5820a..38a619200 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py @@ -48,6 +48,11 @@ def type_check(openai_legacy: "OpenAILegacy"): _: RetryableChatProvider = openai_legacy +# ContentPart subclasses that can be filtered by supported_content_types +# Note: ToolCall and ToolCallPart are NOT ContentPart subclasses, they are handled separately +_CONTENT_PART_TYPES: set[type] = {TextPart, ImageURLPart} + + class OpenAILegacy: """ A chat provider that uses the OpenAI Chat Completions API. @@ -85,7 +90,7 @@ def __init__( stream: bool = True, reasoning_key: str | None = None, tool_message_conversion: ToolMessageConversion | None = None, - supported_content_types: set[type[ContentPart]] | None = None, + supported_content_types: set[type] | None = None, **client_kwargs: Any, ): """ @@ -95,9 +100,10 @@ def __init__( the message, such as `{"reasoning": ...}`, `reasoning_key` can be set to the key name. Args: - supported_content_types: Set of ContentPart types that the API supports. - If None, defaults to {TextPart, ImageURLPart, ToolCall, ToolCallPart}. - Set to {TextPart, ToolCall, ToolCallPart} for APIs that don't support images. + supported_content_types: Set of ContentPart subclass types that the API supports. + If None, defaults to {TextPart, ImageURLPart}. + Only ContentPart subclasses (TextPart, ImageURLPart, etc.) should be included. + ToolCall and ToolCallPart are handled separately by message serialization. """ self.model = model self.stream = stream @@ -114,12 +120,10 @@ def __init__( self._reasoning_key = reasoning_key self._tool_message_conversion: ToolMessageConversion | None = tool_message_conversion self._generation_kwargs: OpenAILegacy.GenerationKwargs = {} - # Default supported content types: TextPart, ImageURLPart (OpenAI standard), and tool-related - self._supported_content_types: set[type[ContentPart]] = supported_content_types or { + # Default supported content types: TextPart and ImageURLPart (OpenAI standard) + self._supported_content_types: set[type] = supported_content_types or { TextPart, ImageURLPart, - ToolCall, - ToolCallPart, } @property diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py index 9a0780c5e..3c763e577 100644 --- a/src/kimi_cli/llm.py +++ b/src/kimi_cli/llm.py @@ -147,13 +147,15 @@ def create_llm( chat_provider = chat_provider.with_generation_kwargs(**gen_kwargs) case "openai_legacy": from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy - from kosong.message import ContentPart, ImageURLPart, TextPart + from kosong.message import ImageURLPart, TextPart # Build supported content types based on config or model capabilities - supported_types: set[type[ContentPart]] | None = None + # Note: Only ContentPart subclasses (TextPart, ImageURLPart) should be here. + # ToolCall and ToolCallPart are handled separately by the message serialization. + supported_types: set[type] | None = None if provider.supported_content_types is not None: # User explicitly specified supported types - type_map: dict[str, type[ContentPart]] = { + type_map: dict[str, type] = { "text": TextPart, "image_url": ImageURLPart, } @@ -167,11 +169,6 @@ def create_llm( if "image_in" in model_caps: supported_types.add(ImageURLPart) - # Always include ToolCall and ToolCallPart for tool support - from kosong.message import ToolCall, ToolCallPart - - supported_types.update({ToolCall, ToolCallPart}) - chat_provider = OpenAILegacy( model=model.model, base_url=provider.base_url, From 1a922157d5eaf7bae23603ed9720f37f7bb814db Mon Sep 17 00:00:00 2001 From: RayNa Date: Sun, 5 Apr 2026 06:38:37 +0800 Subject: [PATCH 3/4] fix: address Devin Review feedback for supported_content_types Fixes three issues identified by Devin Review: 1. **Always include TextPart**: TextPart is now always included in supported_content_types to ensure text content is never dropped. Previously, if user configured only ["image_url"], all text would be silently filtered out. 2. **Explicit None check**: Use `is not None` instead of `or` when checking supported_content_types. Empty set is falsy in Python, which would incorrectly trigger the default fallback. 3. **Validate configured values**: Unknown content type names now raise ValueError at startup instead of being silently ignored. This helps users catch typos like "image" instead of "image_url". Changes: - src/kimi_cli/llm.py: Validate config, always include TextPart - packages/kosong/contrib/chat_provider/openai_legacy.py: Fix None check - src/kimi_cli/config.py: Update documentation Co-Authored-By: Claude --- .../contrib/chat_provider/openai_legacy.py | 12 ++++---- src/kimi_cli/config.py | 6 ++-- src/kimi_cli/llm.py | 29 ++++++++++++------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py index 38a619200..60553f972 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py @@ -120,11 +120,13 @@ def __init__( self._reasoning_key = reasoning_key self._tool_message_conversion: ToolMessageConversion | None = tool_message_conversion self._generation_kwargs: OpenAILegacy.GenerationKwargs = {} - # Default supported content types: TextPart and ImageURLPart (OpenAI standard) - self._supported_content_types: set[type] = supported_content_types or { - TextPart, - ImageURLPart, - } + # Use explicit None check instead of `or` to handle empty set correctly. + # An empty set is falsy in Python, which would incorrectly trigger the default. + self._supported_content_types: set[type] = ( + supported_content_types + if supported_content_types is not None + else {TextPart, ImageURLPart} + ) @property def model_name(self) -> str: diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py index a73ef5caa..d2bd9a032 100644 --- a/src/kimi_cli/config.py +++ b/src/kimi_cli/config.py @@ -53,8 +53,10 @@ class LLMProvider(BaseModel): See: https://github.com/MoonshotAI/kimi-cli/issues/1155""" supported_content_types: list[str] | None = None """List of supported content part types for openai_legacy provider. - Valid values: 'text', 'image_url'. If not specified, defaults to ['text', 'image_url']. - Note: 'video_url' and 'audio_url' are filtered out by default as most APIs don't support them. + Valid values: 'text', 'image_url'. Invalid values will raise ValueError at startup. + Note: 'text' is ALWAYS included to ensure text content is never dropped. + If not specified, auto-detects from model capabilities (image_in). + 'video_url' and 'audio_url' are filtered out as most APIs don't support them. See: https://github.com/MoonshotAI/kimi-cli/issues/796""" @field_serializer("api_key", when_used="json") diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py index 3c763e577..282b9e757 100644 --- a/src/kimi_cli/llm.py +++ b/src/kimi_cli/llm.py @@ -149,19 +149,26 @@ def create_llm( from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy from kosong.message import ImageURLPart, TextPart - # Build supported content types based on config or model capabilities + # Build supported content types based on config or model capabilities. + # TextPart is ALWAYS included to ensure text content is never dropped. # Note: Only ContentPart subclasses (TextPart, ImageURLPart) should be here. - # ToolCall and ToolCallPart are handled separately by the message serialization. - supported_types: set[type] | None = None + # ToolCall and ToolCallPart are handled separately by message serialization. + + # Valid content type names that users can configure + VALID_CONTENT_TYPES = {"text", "image_url"} + if provider.supported_content_types is not None: - # User explicitly specified supported types - type_map: dict[str, type] = { - "text": TextPart, - "image_url": ImageURLPart, - } - supported_types = { - type_map[t] for t in provider.supported_content_types if t in type_map - } + # Validate configured values + unknown_types = set(provider.supported_content_types) - VALID_CONTENT_TYPES + if unknown_types: + raise ValueError( + f"Invalid supported_content_types in provider config: {unknown_types}. " + f"Valid values are: {VALID_CONTENT_TYPES}" + ) + # Build supported types from user config, always including TextPart + supported_types: set[type] = {TextPart} + if "image_url" in provider.supported_content_types: + supported_types.add(ImageURLPart) else: # Auto-detect from model capabilities model_caps = derive_model_capabilities(model) From 7258022c13f332534f16d961b0b1a5432c531455 Mon Sep 17 00:00:00 2001 From: RayNa Date: Sun, 5 Apr 2026 07:09:53 +0800 Subject: [PATCH 4/4] fix: omit tools parameter when empty to avoid API errors When compaction runs with EmptyToolset(), it passes tools=[] to the API. This causes errors with some OpenAI-compatible APIs: - DashScope: "[] is too short - 'tools'" - Xunfei: "EngineInternalError: Bad Request" (code 10012) Solution: When tools list is empty, use `omit` instead of passing an empty array. This removes the tools field from the request entirely, which is accepted by all OpenAI-compatible APIs. Fixes context compaction failures on DashScope and Xunfei APIs. Related: https://github.com/MoonshotAI/kimi-cli/issues/1344 Co-Authored-By: Claude --- .../contrib/chat_provider/openai_legacy.py | 10 +++++++- .../api_snapshot_tests/test_openai_legacy.py | 24 +++++-------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py index 60553f972..1ba804001 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py @@ -166,10 +166,18 @@ async def generate( reasoning_effort = "medium" try: + # Build tools parameter: omit if empty to avoid API errors. + # Some APIs (DashScope, Xunfei) reject empty tools array with errors like: + # - "[] is too short - 'tools'" (DashScope) + # - "EngineInternalError: Bad Request" (Xunfei) + # See: https://github.com/MoonshotAI/kimi-cli/issues/1344 + tools_param = ( + [tool_to_openai(tool) for tool in tools] if tools else omit + ) response = await self.client.chat.completions.create( model=self.model, messages=messages, - tools=(tool_to_openai(tool) for tool in tools), + tools=tools_param, stream=self.stream, stream_options={"include_usage": True} if self.stream else omit, reasoning_effort=reasoning_effort, diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py index 2e2020d79..c5321d1da 100644 --- a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py +++ b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py @@ -27,26 +27,20 @@ async def test_openai_legacy_message_conversion(): "messages": [ {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello!"}, - ], - "tools": [], - }, + ]}, "multi_turn_conversation": { "messages": [ {"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "2+2 equals 4."}, {"role": "user", "content": "And 3+3?"}, - ], - "tools": [], - }, + ]}, "multi_turn_with_system": { "messages": [ {"role": "system", "content": "You are a math tutor."}, {"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "2+2 equals 4."}, {"role": "user", "content": "And 3+3?"}, - ], - "tools": [], - }, + ]}, "image_url": { "messages": [ { @@ -62,9 +56,7 @@ async def test_openai_legacy_message_conversion(): }, ], } - ], - "tools": [], - }, + ]}, "tool_definition": { "messages": [{"role": "user", "content": "Add 2 and 3"}], "tools": [ @@ -134,9 +126,7 @@ async def test_openai_legacy_message_conversion(): ], "tool_call_id": "call_abc123", }, - ], - "tools": [], - }, + ]}, "tool_call": { "messages": [ {"role": "user", "content": "Add 2 and 3"}, @@ -152,9 +142,7 @@ async def test_openai_legacy_message_conversion(): ], }, {"role": "tool", "content": "5", "tool_call_id": "call_abc123"}, - ], - "tools": [], - }, + ]}, "parallel_tool_calls": { "messages": [ {"role": "user", "content": "Calculate 2+3 and 4*5"},