MoonshotAI · 17Mojo · Apr 3, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
@@ -30,7 +30,15 @@
     tool_to_openai,
 )
 from kosong.contrib.chat_provider.common import ToolMessageConversion
-from kosong.message import ContentPart, Message, TextPart, ThinkPart, ToolCall, ToolCallPart
+from kosong.message import (
+    ContentPart,
+    ImageURLPart,
+    Message,
+    TextPart,
+    ThinkPart,
+    ToolCall,
+    ToolCallPart,
+)
 from kosong.tooling import Tool
 
 if TYPE_CHECKING:
@@ -40,6 +48,11 @@ def type_check(openai_legacy: "OpenAILegacy"):
         _: RetryableChatProvider = openai_legacy
 
 
+# ContentPart subclasses that can be filtered by supported_content_types
+# Note: ToolCall and ToolCallPart are NOT ContentPart subclasses, they are handled separately
+_CONTENT_PART_TYPES: set[type] = {TextPart, ImageURLPart}
+
+
 class OpenAILegacy:
     """
     A chat provider that uses the OpenAI Chat Completions API.
@@ -77,13 +90,20 @@ def __init__(
         stream: bool = True,
         reasoning_key: str | None = None,
         tool_message_conversion: ToolMessageConversion | None = None,
+        supported_content_types: set[type] | None = None,
         **client_kwargs: Any,
     ):
         """
         Initialize the OpenAILegacy chat provider.
 
         To support OpenAI-compatible APIs that inject reasoning content in a extra field in
         the message, such as `{"reasoning": ...}`, `reasoning_key` can be set to the key name.
+
+        Args:
+            supported_content_types: Set of ContentPart subclass types that the API supports.
+                If None, defaults to {TextPart, ImageURLPart}.
+                Only ContentPart subclasses (TextPart, ImageURLPart, etc.) should be included.
+                ToolCall and ToolCallPart are handled separately by message serialization.
         """
         self.model = model
         self.stream = stream
@@ -100,6 +120,13 @@ def __init__(
         self._reasoning_key = reasoning_key
         self._tool_message_conversion: ToolMessageConversion | None = tool_message_conversion
         self._generation_kwargs: OpenAILegacy.GenerationKwargs = {}
+        # Use explicit None check instead of `or` to handle empty set correctly.
+        # An empty set is falsy in Python, which would incorrectly trigger the default.
+        self._supported_content_types: set[type] = (
+            supported_content_types
+            if supported_content_types is not None
+            else {TextPart, ImageURLPart}
+        )
 
     @property
     def model_name(self) -> str:
@@ -139,10 +166,18 @@ async def generate(
                 reasoning_effort = "medium"
 
         try:
+            # Build tools parameter: omit if empty to avoid API errors.
+            # Some APIs (DashScope, Xunfei) reject empty tools array with errors like:
+            # - "[] is too short - 'tools'" (DashScope)
+            # - "EngineInternalError: Bad Request" (Xunfei)
+            # See: https://github.com/MoonshotAI/kimi-cli/issues/1344
+            tools_param = (
+                [tool_to_openai(tool) for tool in tools] if tools else omit
+            )
             response = await self.client.chat.completions.create(
                 model=self.model,
                 messages=messages,
-                tools=(tool_to_openai(tool) for tool in tools),
+                tools=tools_param,
                 stream=self.stream,
                 stream_options={"include_usage": True} if self.stream else omit,
                 reasoning_effort=reasoning_effort,
@@ -193,7 +228,12 @@ def model_parameters(self) -> dict[str, Any]:
         return model_parameters
 
     def _convert_message(self, message: Message) -> ChatCompletionMessageParam:
-        """Convert a Kosong message to OpenAI message."""
+        """Convert a Kosong message to OpenAI message.
+
+        Filters out content parts that are not supported by the API (e.g., VideoURLPart, AudioURLPart).
+        This prevents API errors when sending messages containing multimedia content to APIs that
+        don't support them. See: https://github.com/MoonshotAI/kimi-cli/issues/796
+        """
         # Note: for openai, `developer` role is more standard, but `system` is still accepted.
         # And many openai-compatible models do not accept `developer` role.
         # So we use `system` role here. OpenAIResponses will use `developer` role.
@@ -204,8 +244,10 @@ def _convert_message(self, message: Message) -> ChatCompletionMessageParam:
         for part in message.content:
             if isinstance(part, ThinkPart):
                 reasoning_content += part.think
-            else:
+            elif type(part) in self._supported_content_types:
+                # Only include content types supported by this API
                 content.append(part)
+            # else: filter out unsupported content types (e.g., VideoURLPart, AudioURLPart)
         # if tool message and `tool_result_conversion` is `extract_text`, patch all text parts into
         # one so that we can make use of the serialization process of `Message` to output string
         if message.role == "tool" and self._tool_message_conversion == "extract_text":

diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
@@ -27,26 +27,20 @@ async def test_openai_legacy_message_conversion():
                     "messages": [
                         {"role": "system", "content": "You are helpful."},
                         {"role": "user", "content": "Hello!"},
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "multi_turn_conversation": {
                     "messages": [
                         {"role": "user", "content": "What is 2+2?"},
                         {"role": "assistant", "content": "2+2 equals 4."},
                         {"role": "user", "content": "And 3+3?"},
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "multi_turn_with_system": {
                     "messages": [
                         {"role": "system", "content": "You are a math tutor."},
                         {"role": "user", "content": "What is 2+2?"},
                         {"role": "assistant", "content": "2+2 equals 4."},
                         {"role": "user", "content": "And 3+3?"},
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "image_url": {
                     "messages": [
                         {
@@ -62,9 +56,7 @@ async def test_openai_legacy_message_conversion():
                                 },
                             ],
                         }
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "tool_definition": {
                     "messages": [{"role": "user", "content": "Add 2 and 3"}],
                     "tools": [
@@ -134,9 +126,7 @@ async def test_openai_legacy_message_conversion():
                             ],
                             "tool_call_id": "call_abc123",
                         },
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "tool_call": {
                     "messages": [
                         {"role": "user", "content": "Add 2 and 3"},
@@ -152,9 +142,7 @@ async def test_openai_legacy_message_conversion():
                             ],
                         },
                         {"role": "tool", "content": "5", "tool_call_id": "call_abc123"},
-                    ],
-                    "tools": [],
-                },
+                    ]},
                 "parallel_tool_calls": {
                     "messages": [
                         {"role": "user", "content": "Calculate 2+3 and 4*5"},

diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py
@@ -47,6 +47,17 @@ class LLMProvider(BaseModel):
     """Custom headers to include in API requests"""
     oauth: OAuthRef | None = None
     """OAuth credential reference (do not store tokens here)."""
+    reasoning_key: str | None = None
+    """Key name for reasoning/thinking content in API response (e.g., 'reasoning_content').
+    Used by openai_legacy provider to extract thinking content from OpenAI-compatible APIs.
+    See: https://github.com/MoonshotAI/kimi-cli/issues/1155"""
+    supported_content_types: list[str] | None = None
+    """List of supported content part types for openai_legacy provider.
+    Valid values: 'text', 'image_url'. Invalid values will raise ValueError at startup.
+    Note: 'text' is ALWAYS included to ensure text content is never dropped.
+    If not specified, auto-detects from model capabilities (image_in).
+    'video_url' and 'audio_url' are filtered out as most APIs don't support them.
+    See: https://github.com/MoonshotAI/kimi-cli/issues/796"""
 
     @field_serializer("api_key", when_used="json")
     def dump_secret(self, v: SecretStr):

diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py
@@ -147,12 +147,42 @@ def create_llm(
                 chat_provider = chat_provider.with_generation_kwargs(**gen_kwargs)
         case "openai_legacy":
             from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
+            from kosong.message import ImageURLPart, TextPart
+
+            # Build supported content types based on config or model capabilities.
+            # TextPart is ALWAYS included to ensure text content is never dropped.
+            # Note: Only ContentPart subclasses (TextPart, ImageURLPart) should be here.
+            # ToolCall and ToolCallPart are handled separately by message serialization.
+
+            # Valid content type names that users can configure
+            VALID_CONTENT_TYPES = {"text", "image_url"}
+
+            if provider.supported_content_types is not None:
+                # Validate configured values
+                unknown_types = set(provider.supported_content_types) - VALID_CONTENT_TYPES
+                if unknown_types:
+                    raise ValueError(
+                        f"Invalid supported_content_types in provider config: {unknown_types}. "
+                        f"Valid values are: {VALID_CONTENT_TYPES}"
+                    )
+                # Build supported types from user config, always including TextPart
+                supported_types: set[type] = {TextPart}
+                if "image_url" in provider.supported_content_types:
+                    supported_types.add(ImageURLPart)
+            else:
+                # Auto-detect from model capabilities
+                model_caps = derive_model_capabilities(model)
+                supported_types = {TextPart}
+                if "image_in" in model_caps:
+                    supported_types.add(ImageURLPart)
 
             chat_provider = OpenAILegacy(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
                 default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
+                reasoning_key=provider.reasoning_key,
+                supported_content_types=supported_types,
             )
         case "openai_responses":
             from kosong.contrib.chat_provider.openai_responses import OpenAIResponses