Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,15 @@
tool_to_openai,
)
from kosong.contrib.chat_provider.common import ToolMessageConversion
from kosong.message import ContentPart, Message, TextPart, ThinkPart, ToolCall, ToolCallPart
from kosong.message import (
ContentPart,
ImageURLPart,
Message,
TextPart,
ThinkPart,
ToolCall,
ToolCallPart,
)
from kosong.tooling import Tool

if TYPE_CHECKING:
Expand All @@ -40,6 +48,11 @@ def type_check(openai_legacy: "OpenAILegacy"):
_: RetryableChatProvider = openai_legacy


# ContentPart subclasses that can be filtered by supported_content_types
# Note: ToolCall and ToolCallPart are NOT ContentPart subclasses, they are handled separately
_CONTENT_PART_TYPES: set[type] = {TextPart, ImageURLPart}


class OpenAILegacy:
"""
A chat provider that uses the OpenAI Chat Completions API.
Expand Down Expand Up @@ -77,13 +90,20 @@ def __init__(
stream: bool = True,
reasoning_key: str | None = None,
tool_message_conversion: ToolMessageConversion | None = None,
supported_content_types: set[type] | None = None,
**client_kwargs: Any,
):
"""
Initialize the OpenAILegacy chat provider.

To support OpenAI-compatible APIs that inject reasoning content in a extra field in
the message, such as `{"reasoning": ...}`, `reasoning_key` can be set to the key name.

Args:
supported_content_types: Set of ContentPart subclass types that the API supports.
If None, defaults to {TextPart, ImageURLPart}.
Only ContentPart subclasses (TextPart, ImageURLPart, etc.) should be included.
ToolCall and ToolCallPart are handled separately by message serialization.
"""
self.model = model
self.stream = stream
Expand All @@ -100,6 +120,13 @@ def __init__(
self._reasoning_key = reasoning_key
self._tool_message_conversion: ToolMessageConversion | None = tool_message_conversion
self._generation_kwargs: OpenAILegacy.GenerationKwargs = {}
# Use explicit None check instead of `or` to handle empty set correctly.
# An empty set is falsy in Python, which would incorrectly trigger the default.
self._supported_content_types: set[type] = (
supported_content_types
if supported_content_types is not None
else {TextPart, ImageURLPart}
)

@property
def model_name(self) -> str:
Expand Down Expand Up @@ -139,10 +166,18 @@ async def generate(
reasoning_effort = "medium"

try:
# Build tools parameter: omit if empty to avoid API errors.
# Some APIs (DashScope, Xunfei) reject empty tools array with errors like:
# - "[] is too short - 'tools'" (DashScope)
# - "EngineInternalError: Bad Request" (Xunfei)
# See: https://github.com/MoonshotAI/kimi-cli/issues/1344
tools_param = (
[tool_to_openai(tool) for tool in tools] if tools else omit
)
response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
tools=(tool_to_openai(tool) for tool in tools),
tools=tools_param,
stream=self.stream,
stream_options={"include_usage": True} if self.stream else omit,
reasoning_effort=reasoning_effort,
Expand Down Expand Up @@ -193,7 +228,12 @@ def model_parameters(self) -> dict[str, Any]:
return model_parameters

def _convert_message(self, message: Message) -> ChatCompletionMessageParam:
"""Convert a Kosong message to OpenAI message."""
"""Convert a Kosong message to OpenAI message.

Filters out content parts that are not supported by the API (e.g., VideoURLPart, AudioURLPart).
This prevents API errors when sending messages containing multimedia content to APIs that
don't support them. See: https://github.com/MoonshotAI/kimi-cli/issues/796
"""
# Note: for openai, `developer` role is more standard, but `system` is still accepted.
# And many openai-compatible models do not accept `developer` role.
# So we use `system` role here. OpenAIResponses will use `developer` role.
Expand All @@ -204,8 +244,10 @@ def _convert_message(self, message: Message) -> ChatCompletionMessageParam:
for part in message.content:
if isinstance(part, ThinkPart):
reasoning_content += part.think
else:
elif type(part) in self._supported_content_types:
# Only include content types supported by this API
content.append(part)
# else: filter out unsupported content types (e.g., VideoURLPart, AudioURLPart)
# if tool message and `tool_result_conversion` is `extract_text`, patch all text parts into
# one so that we can make use of the serialization process of `Message` to output string
if message.role == "tool" and self._tool_message_conversion == "extract_text":
Expand Down
24 changes: 6 additions & 18 deletions packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,20 @@ async def test_openai_legacy_message_conversion():
"messages": [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"},
],
"tools": [],
},
]},
"multi_turn_conversation": {
"messages": [
{"role": "user", "content": "What is 2+2?"},
{"role": "assistant", "content": "2+2 equals 4."},
{"role": "user", "content": "And 3+3?"},
],
"tools": [],
},
]},
"multi_turn_with_system": {
"messages": [
{"role": "system", "content": "You are a math tutor."},
{"role": "user", "content": "What is 2+2?"},
{"role": "assistant", "content": "2+2 equals 4."},
{"role": "user", "content": "And 3+3?"},
],
"tools": [],
},
]},
"image_url": {
"messages": [
{
Expand All @@ -62,9 +56,7 @@ async def test_openai_legacy_message_conversion():
},
],
}
],
"tools": [],
},
]},
"tool_definition": {
"messages": [{"role": "user", "content": "Add 2 and 3"}],
"tools": [
Expand Down Expand Up @@ -134,9 +126,7 @@ async def test_openai_legacy_message_conversion():
],
"tool_call_id": "call_abc123",
},
],
"tools": [],
},
]},
"tool_call": {
"messages": [
{"role": "user", "content": "Add 2 and 3"},
Expand All @@ -152,9 +142,7 @@ async def test_openai_legacy_message_conversion():
],
},
{"role": "tool", "content": "5", "tool_call_id": "call_abc123"},
],
"tools": [],
},
]},
"parallel_tool_calls": {
"messages": [
{"role": "user", "content": "Calculate 2+3 and 4*5"},
Expand Down
11 changes: 11 additions & 0 deletions src/kimi_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ class LLMProvider(BaseModel):
"""Custom headers to include in API requests"""
oauth: OAuthRef | None = None
"""OAuth credential reference (do not store tokens here)."""
reasoning_key: str | None = None
"""Key name for reasoning/thinking content in API response (e.g., 'reasoning_content').
Used by openai_legacy provider to extract thinking content from OpenAI-compatible APIs.
See: https://github.com/MoonshotAI/kimi-cli/issues/1155"""
supported_content_types: list[str] | None = None
"""List of supported content part types for openai_legacy provider.
Valid values: 'text', 'image_url'. Invalid values will raise ValueError at startup.
Note: 'text' is ALWAYS included to ensure text content is never dropped.
If not specified, auto-detects from model capabilities (image_in).
'video_url' and 'audio_url' are filtered out as most APIs don't support them.
See: https://github.com/MoonshotAI/kimi-cli/issues/796"""

@field_serializer("api_key", when_used="json")
def dump_secret(self, v: SecretStr):
Expand Down
30 changes: 30 additions & 0 deletions src/kimi_cli/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,42 @@ def create_llm(
chat_provider = chat_provider.with_generation_kwargs(**gen_kwargs)
case "openai_legacy":
from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
from kosong.message import ImageURLPart, TextPart

# Build supported content types based on config or model capabilities.
# TextPart is ALWAYS included to ensure text content is never dropped.
# Note: Only ContentPart subclasses (TextPart, ImageURLPart) should be here.
# ToolCall and ToolCallPart are handled separately by message serialization.

# Valid content type names that users can configure
VALID_CONTENT_TYPES = {"text", "image_url"}

if provider.supported_content_types is not None:
# Validate configured values
unknown_types = set(provider.supported_content_types) - VALID_CONTENT_TYPES
if unknown_types:
raise ValueError(
f"Invalid supported_content_types in provider config: {unknown_types}. "
f"Valid values are: {VALID_CONTENT_TYPES}"
)
# Build supported types from user config, always including TextPart
supported_types: set[type] = {TextPart}
if "image_url" in provider.supported_content_types:
supported_types.add(ImageURLPart)
Comment on lines +169 to +171
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Align reported capabilities with content-type filter

When provider.supported_content_types excludes image_url, this branch narrows supported_types for OpenAILegacy, but create_llm still returns LLM.capabilities from derive_model_capabilities(model) (which can still include image_in). In that configuration, upstream checks (e.g. message capability gating and image tooling) continue to allow image input, then _convert_message silently strips ImageURLPart before the API call. This can drop user-provided image context (or produce empty user content) without an explicit error; intersect the exposed model capabilities with supported_types (or fail fast on contradictory config) to keep behavior consistent.

Useful? React with 👍 / 👎.

else:
# Auto-detect from model capabilities
model_caps = derive_model_capabilities(model)
supported_types = {TextPart}
if "image_in" in model_caps:
supported_types.add(ImageURLPart)

chat_provider = OpenAILegacy(
model=model.model,
base_url=provider.base_url,
api_key=resolved_api_key,
default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
reasoning_key=provider.reasoning_key,
supported_content_types=supported_types,
)
case "openai_responses":
from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
Expand Down