diff --git a/aiprompts/aisdk-streaming.md b/aiprompts/aisdk-streaming.md new file mode 100644 index 0000000000..57ab550ba1 --- /dev/null +++ b/aiprompts/aisdk-streaming.md @@ -0,0 +1,185 @@ +Data Stream Protocol +A data stream follows a special protocol that the AI SDK provides to send information to the frontend. + +The data stream protocol uses Server-Sent Events (SSE) format for improved standardization, keep-alive through ping, reconnect capabilities, and better cache handling. + +When you provide data streams from a custom backend, you need to set the x-vercel-ai-ui-message-stream header to v1. + +The following stream parts are currently supported: + +Message Start Part +Indicates the beginning of a new message with metadata. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"start","messageId":"..."} +Text Parts +Text content is streamed using a start/delta/end pattern with unique IDs for each text block. + +Text Start Part +Indicates the beginning of a text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-start","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d"} +Text Delta Part +Contains incremental text content for the text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-delta","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d","delta":"Hello"} +Text End Part +Indicates the completion of a text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-end","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d"} +Reasoning Parts +Reasoning content is streamed using a start/delta/end pattern with unique IDs for each reasoning block. + +Reasoning Start Part +Indicates the beginning of a reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-start","id":"reasoning_123"} +Reasoning Delta Part +Contains incremental reasoning content for the reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-delta","id":"reasoning_123","delta":"This is some reasoning"} +Reasoning End Part +Indicates the completion of a reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-end","id":"reasoning_123"} +Source Parts +Source parts provide references to external content sources. + +Source URL Part +References to external URLs. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"source-url","sourceId":"https://example.com","url":"https://example.com"} +Source Document Part +References to documents or files. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"source-document","sourceId":"https://example.com","mediaType":"file","title":"Title"} +File Part +The file parts contain references to files with their media type. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"file","url":"https://example.com/file.png","mediaType":"image/png"} +Data Parts +Custom data parts allow streaming of arbitrary structured data with type-specific handling. + +Format: Server-Sent Event with JSON object where the type includes a custom suffix + +Example: + +data: {"type":"data-weather","data":{"location":"SF","temperature":100}} +The data-\* type pattern allows you to define custom data types that your frontend can handle specifically. + +Error Part +The error parts are appended to the message as they are received. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"error","errorText":"error message"} +Tool Input Start Part +Indicates the beginning of tool input streaming. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-start","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","toolName":"getWeatherInformation"} +Tool Input Delta Part +Incremental chunks of tool input as it's being generated. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-delta","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","inputTextDelta":"San Francisco"} +Tool Input Available Part +Indicates that tool input is complete and ready for execution. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-available","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","toolName":"getWeatherInformation","input":{"city":"San Francisco"}} +Tool Output Available Part +Contains the result of tool execution. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-output-available","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","output":{"city":"San Francisco","weather":"sunny"}} +Start Step Part +A part indicating the start of a step. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"start-step"} +Finish Step Part +A part indicating that a step (i.e., one LLM API call in the backend) has been completed. + +This part is necessary to correctly process multiple stitched assistant calls, e.g. when calling tools in the backend, and using steps in useChat at the same time. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"finish-step"} +Finish Message Part +A part indicating the completion of a message. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"finish"} +Stream Termination +The stream ends with a special [DONE] marker. + +Format: Server-Sent Event with literal [DONE] + +Example: + +data: [DONE] +The data stream protocol is supported by useChat and useCompletion on the frontend and used by default. useCompletion only supports the text and data stream parts. + +On the backend, you can use toUIMessageStreamResponse() from the streamText result object to return a streaming HTTP response. diff --git a/aiprompts/anthropic-messages-api.md b/aiprompts/anthropic-messages-api.md new file mode 100644 index 0000000000..3d487891b9 --- /dev/null +++ b/aiprompts/anthropic-messages-api.md @@ -0,0 +1,3746 @@ +# Messages + +> Send a structured list of input messages with text and/or image content, and the model will generate the next message in the conversation. + +The Messages API can be used for either single queries or stateless multi-turn conversations. + +Learn more about the Messages API in our [user guide](/en/docs/initial-setup) + +## OpenAPI + +````yaml post /v1/messages +paths: + path: /v1/messages + method: post + servers: + - url: https://api.anthropic.com + request: + security: [] + parameters: + path: {} + query: {} + header: + anthropic-beta: + schema: + - type: array + items: + allOf: + - type: string + required: false + title: Anthropic-Beta + description: >- + Optional header to specify the beta version(s) you want to use. + + + To use multiple betas, use a comma separated list like + `beta1,beta2` or specify the header multiple times for each + beta. + anthropic-version: + schema: + - type: string + required: true + title: Anthropic-Version + description: >- + The version of the Anthropic API you want to use. + + + Read more about versioning and our version history + [here](https://docs.anthropic.com/en/api/versioning). + x-api-key: + schema: + - type: string + required: true + title: X-Api-Key + description: >- + Your unique API key for authentication. + + + This key is required in the header of all API requests, to + authenticate your account and access Anthropic's services. Get + your API key through the + [Console](https://console.anthropic.com/settings/keys). Each key + is scoped to a Workspace. + cookie: {} + body: + application/json: + schemaArray: + - type: object + properties: + model: + allOf: + - description: >- + The model that will complete your prompt. + + + See + [models](https://docs.anthropic.com/en/docs/models-overview) + for additional details and options. + examples: + - claude-sonnet-4-20250514 + maxLength: 256 + minLength: 1 + title: Model + type: string + messages: + allOf: + - description: >- + Input messages. + + + Our models are trained to operate on alternating `user` + and `assistant` conversational turns. When creating a new + `Message`, you specify the prior conversational turns with + the `messages` parameter, and the model then generates the + next `Message` in the conversation. Consecutive `user` or + `assistant` turns in your request will be combined into a + single turn. + + + Each input message must be an object with a `role` and + `content`. You can specify a single `user`-role message, + or you can include multiple `user` and `assistant` + messages. + + + If the final message uses the `assistant` role, the + response content will continue immediately from the + content in that message. This can be used to constrain + part of the model's response. + + + Example with a single `user` message: + + + ```json + + [{"role": "user", "content": "Hello, Claude"}] + + ``` + + + Example with multiple conversational turns: + + + ```json + + [ + {"role": "user", "content": "Hello there."}, + {"role": "assistant", "content": "Hi, I'm Claude. How can I help you?"}, + {"role": "user", "content": "Can you explain LLMs in plain English?"}, + ] + + ``` + + + Example with a partially-filled response from Claude: + + + ```json + + [ + {"role": "user", "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"}, + {"role": "assistant", "content": "The best answer is ("}, + ] + + ``` + + + Each input message `content` may be either a single + `string` or an array of content blocks, where each block + has a specific `type`. Using a `string` for `content` is + shorthand for an array of one content block of type + `"text"`. The following input messages are equivalent: + + + ```json + + {"role": "user", "content": "Hello, Claude"} + + ``` + + + ```json + + {"role": "user", "content": [{"type": "text", "text": + "Hello, Claude"}]} + + ``` + + + See + [examples](https://docs.anthropic.com/en/api/messages-examples) + for more input examples. + + + Note that if you want to include a [system + prompt](https://docs.anthropic.com/en/docs/system-prompts), + you can use the top-level `system` parameter — there is no + `"system"` role for input messages in the Messages API. + + + There is a limit of 100,000 messages in a single request. + items: + $ref: "#/components/schemas/InputMessage" + title: Messages + type: array + container: + allOf: + - anyOf: + - type: string + - type: "null" + description: Container identifier for reuse across requests. + title: Container + max_tokens: + allOf: + - description: >- + The maximum number of tokens to generate before stopping. + + + Note that our models may stop _before_ reaching this + maximum. This parameter only specifies the absolute + maximum number of tokens to generate. + + + Different models have different maximum values for this + parameter. See + [models](https://docs.anthropic.com/en/docs/models-overview) + for details. + examples: + - 1024 + minimum: 1 + title: Max Tokens + type: integer + mcp_servers: + allOf: + - description: MCP servers to be utilized in this request + items: + $ref: "#/components/schemas/RequestMCPServerURLDefinition" + maxItems: 20 + title: Mcp Servers + type: array + metadata: + allOf: + - $ref: "#/components/schemas/Metadata" + description: An object describing metadata about the request. + service_tier: + allOf: + - description: >- + Determines whether to use priority capacity (if available) + or standard capacity for this request. + + + Anthropic offers different levels of service for your API + requests. See + [service-tiers](https://docs.anthropic.com/en/api/service-tiers) + for details. + enum: + - auto + - standard_only + title: Service Tier + type: string + stop_sequences: + allOf: + - description: >- + Custom text sequences that will cause the model to stop + generating. + + + Our models will normally stop when they have naturally + completed their turn, which will result in a response + `stop_reason` of `"end_turn"`. + + + If you want the model to stop generating when it + encounters custom strings of text, you can use the + `stop_sequences` parameter. If the model encounters one of + the custom sequences, the response `stop_reason` value + will be `"stop_sequence"` and the response `stop_sequence` + value will contain the matched stop sequence. + items: + type: string + title: Stop Sequences + type: array + stream: + allOf: + - description: >- + Whether to incrementally stream the response using + server-sent events. + + + See + [streaming](https://docs.anthropic.com/en/api/messages-streaming) + for details. + title: Stream + type: boolean + system: + allOf: + - anyOf: + - type: string + - items: + $ref: "#/components/schemas/RequestTextBlock" + type: array + description: >- + System prompt. + + + A system prompt is a way of providing context and + instructions to Claude, such as specifying a particular + goal or role. See our [guide to system + prompts](https://docs.anthropic.com/en/docs/system-prompts). + examples: + - - text: Today's date is 2024-06-01. + type: text + - Today's date is 2023-01-01. + title: System + temperature: + allOf: + - description: >- + Amount of randomness injected into the response. + + + Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use + `temperature` closer to `0.0` for analytical / multiple + choice, and closer to `1.0` for creative and generative + tasks. + + + Note that even with `temperature` of `0.0`, the results + will not be fully deterministic. + examples: + - 1 + maximum: 1 + minimum: 0 + title: Temperature + type: number + thinking: + allOf: + - description: >- + Configuration for enabling Claude's extended thinking. + + + When enabled, responses include `thinking` content blocks + showing Claude's thinking process before the final answer. + Requires a minimum budget of 1,024 tokens and counts + towards your `max_tokens` limit. + + + See [extended + thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) + for details. + discriminator: + mapping: + disabled: "#/components/schemas/ThinkingConfigDisabled" + enabled: "#/components/schemas/ThinkingConfigEnabled" + propertyName: type + oneOf: + - $ref: "#/components/schemas/ThinkingConfigEnabled" + - $ref: "#/components/schemas/ThinkingConfigDisabled" + tool_choice: + allOf: + - description: >- + How the model should use the provided tools. The model can + use a specific tool, any available tool, decide by itself, + or not use tools at all. + discriminator: + mapping: + any: "#/components/schemas/ToolChoiceAny" + auto: "#/components/schemas/ToolChoiceAuto" + none: "#/components/schemas/ToolChoiceNone" + tool: "#/components/schemas/ToolChoiceTool" + propertyName: type + oneOf: + - $ref: "#/components/schemas/ToolChoiceAuto" + - $ref: "#/components/schemas/ToolChoiceAny" + - $ref: "#/components/schemas/ToolChoiceTool" + - $ref: "#/components/schemas/ToolChoiceNone" + tools: + allOf: + - description: >- + Definitions of tools that the model may use. + + + If you include `tools` in your API request, the model may + return `tool_use` content blocks that represent the + model's use of those tools. You can then run those tools + using the tool input generated by the model and then + optionally return results back to the model using + `tool_result` content blocks. + + + There are two types of tools: **client tools** and + **server tools**. The behavior described below applies to + client tools. For [server + tools](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview\#server-tools), + see their individual documentation as each has its own + behavior (e.g., the [web search + tool](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-search-tool)). + + + Each tool definition includes: + + + * `name`: Name of the tool. + + * `description`: Optional, but strongly-recommended + description of the tool. + + * `input_schema`: [JSON + schema](https://json-schema.org/draft/2020-12) for the + tool `input` shape that the model will produce in + `tool_use` output content blocks. + + + For example, if you defined `tools` as: + + + ```json + + [ + { + "name": "get_stock_price", + "description": "Get the current stock price for a given ticker symbol.", + "input_schema": { + "type": "object", + "properties": { + "ticker": { + "type": "string", + "description": "The stock ticker symbol, e.g. AAPL for Apple Inc." + } + }, + "required": ["ticker"] + } + } + ] + + ``` + + + And then asked the model "What's the S&P 500 at today?", + the model might produce `tool_use` content blocks in the + response like this: + + + ```json + + [ + { + "type": "tool_use", + "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV", + "name": "get_stock_price", + "input": { "ticker": "^GSPC" } + } + ] + + ``` + + + You might then run your `get_stock_price` tool with + `{"ticker": "^GSPC"}` as an input, and return the + following back to the model in a subsequent `user` + message: + + + ```json + + [ + { + "type": "tool_result", + "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV", + "content": "259.75 USD" + } + ] + + ``` + + + Tools can be used for workflows that include running + client-side tools and functions, or more generally + whenever you want the model to produce a particular JSON + structure of output. + + + See our + [guide](https://docs.anthropic.com/en/docs/tool-use) for + more details. + examples: + - description: Get the current weather in a given location + input_schema: + properties: + location: + description: The city and state, e.g. San Francisco, CA + type: string + unit: + description: >- + Unit for the output - one of (celsius, + fahrenheit) + type: string + required: + - location + type: object + name: get_weather + items: + oneOf: + - $ref: "#/components/schemas/Tool" + - $ref: "#/components/schemas/BashTool_20241022" + - $ref: "#/components/schemas/BashTool_20250124" + - $ref: "#/components/schemas/CodeExecutionTool_20250522" + - $ref: "#/components/schemas/ComputerUseTool_20241022" + - $ref: "#/components/schemas/ComputerUseTool_20250124" + - $ref: "#/components/schemas/TextEditor_20241022" + - $ref: "#/components/schemas/TextEditor_20250124" + - $ref: "#/components/schemas/TextEditor_20250429" + - $ref: "#/components/schemas/TextEditor_20250728" + - $ref: "#/components/schemas/WebSearchTool_20250305" + title: Tools + type: array + top_k: + allOf: + - description: >- + Only sample from the top K options for each subsequent + token. + + + Used to remove "long tail" low probability responses. + [Learn more technical details + here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277). + + + Recommended for advanced use cases only. You usually only + need to use `temperature`. + examples: + - 5 + minimum: 0 + title: Top K + type: integer + top_p: + allOf: + - description: >- + Use nucleus sampling. + + + In nucleus sampling, we compute the cumulative + distribution over all the options for each subsequent + token in decreasing probability order and cut it off once + it reaches a particular probability specified by `top_p`. + You should either alter `temperature` or `top_p`, but not + both. + + + Recommended for advanced use cases only. You usually only + need to use `temperature`. + examples: + - 0.7 + maximum: 1 + minimum: 0 + title: Top P + type: number + required: true + title: CreateMessageParams + requiredProperties: + - model + - messages + - max_tokens + additionalProperties: false + example: + max_tokens: 1024 + messages: + - content: Hello, world + role: user + model: claude-sonnet-4-20250514 + examples: + example: + value: + max_tokens: 1024 + messages: + - content: Hello, world + role: user + model: claude-sonnet-4-20250514 + codeSamples: + - lang: bash + source: |- + curl https://api.anthropic.com/v1/messages \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-sonnet-4-20250514", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Hello, world"} + ] + }' + - lang: python + source: |- + import anthropic + + anthropic.Anthropic().messages.create( + model="claude-sonnet-4-20250514", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Hello, world"} + ] + ) + - lang: javascript + source: |- + import { Anthropic } from '@anthropic-ai/sdk'; + + const anthropic = new Anthropic(); + + await anthropic.messages.create({ + model: "claude-sonnet-4-20250514", + max_tokens: 1024, + messages: [ + {"role": "user", "content": "Hello, world"} + ] + }); + response: + "200": + application/json: + schemaArray: + - type: object + properties: + id: + allOf: + - description: |- + Unique object identifier. + + The format and length of IDs may change over time. + examples: + - msg_013Zva2CMHLNnXjNJJKqJ2EF + title: Id + type: string + type: + allOf: + - const: message + default: message + description: |- + Object type. + + For Messages, this is always `"message"`. + enum: + - message + title: Type + type: string + role: + allOf: + - const: assistant + default: assistant + description: |- + Conversational role of the generated message. + + This will always be `"assistant"`. + enum: + - assistant + title: Role + type: string + content: + allOf: + - description: >- + Content generated by the model. + + + This is an array of content blocks, each of which has a + `type` that determines its shape. + + + Example: + + + ```json + + [{"type": "text", "text": "Hi, I'm Claude."}] + + ``` + + + If the request input `messages` ended with an `assistant` + turn, then the response `content` will continue directly + from that last turn. You can use this to constrain the + model's output. + + + For example, if the input `messages` were: + + ```json + + [ + {"role": "user", "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"}, + {"role": "assistant", "content": "The best answer is ("} + ] + + ``` + + + Then the response `content` might be: + + + ```json + + [{"type": "text", "text": "B)"}] + + ``` + examples: + - - text: Hi! My name is Claude. + type: text + items: + discriminator: + mapping: + code_execution_tool_result: >- + #/components/schemas/ResponseCodeExecutionToolResultBlock + container_upload: "#/components/schemas/ResponseContainerUploadBlock" + mcp_tool_result: "#/components/schemas/ResponseMCPToolResultBlock" + mcp_tool_use: "#/components/schemas/ResponseMCPToolUseBlock" + redacted_thinking: "#/components/schemas/ResponseRedactedThinkingBlock" + server_tool_use: "#/components/schemas/ResponseServerToolUseBlock" + text: "#/components/schemas/ResponseTextBlock" + thinking: "#/components/schemas/ResponseThinkingBlock" + tool_use: "#/components/schemas/ResponseToolUseBlock" + web_search_tool_result: >- + #/components/schemas/ResponseWebSearchToolResultBlock + propertyName: type + oneOf: + - $ref: "#/components/schemas/ResponseTextBlock" + - $ref: "#/components/schemas/ResponseThinkingBlock" + - $ref: "#/components/schemas/ResponseRedactedThinkingBlock" + - $ref: "#/components/schemas/ResponseToolUseBlock" + - $ref: "#/components/schemas/ResponseServerToolUseBlock" + - $ref: >- + #/components/schemas/ResponseWebSearchToolResultBlock + - $ref: >- + #/components/schemas/ResponseCodeExecutionToolResultBlock + - $ref: "#/components/schemas/ResponseMCPToolUseBlock" + - $ref: "#/components/schemas/ResponseMCPToolResultBlock" + - $ref: "#/components/schemas/ResponseContainerUploadBlock" + title: Content + type: array + model: + allOf: + - description: The model that handled the request. + examples: + - claude-sonnet-4-20250514 + maxLength: 256 + minLength: 1 + title: Model + type: string + stop_reason: + allOf: + - anyOf: + - enum: + - end_turn + - max_tokens + - stop_sequence + - tool_use + - pause_turn + - refusal + type: string + - type: "null" + description: >- + The reason that we stopped. + + + This may be one the following values: + + * `"end_turn"`: the model reached a natural stopping point + + * `"max_tokens"`: we exceeded the requested `max_tokens` + or the model's maximum + + * `"stop_sequence"`: one of your provided custom + `stop_sequences` was generated + + * `"tool_use"`: the model invoked one or more tools + + * `"pause_turn"`: we paused a long-running turn. You may + provide the response back as-is in a subsequent request to + let the model continue. + + * `"refusal"`: when streaming classifiers intervene to + handle potential policy violations + + + In non-streaming mode this value is always non-null. In + streaming mode, it is null in the `message_start` event + and non-null otherwise. + title: Stop Reason + stop_sequence: + allOf: + - anyOf: + - type: string + - type: "null" + default: null + description: >- + Which custom stop sequence was generated, if any. + + + This value will be a non-null string if one of your custom + stop sequences was generated. + title: Stop Sequence + usage: + allOf: + - $ref: "#/components/schemas/Usage" + description: >- + Billing and rate-limit usage. + + + Anthropic's API bills and rate-limits by token counts, as + tokens represent the underlying cost to our systems. + + + Under the hood, the API transforms requests into a format + suitable for the model. The model's output then goes + through a parsing stage before becoming an API response. + As a result, the token counts in `usage` will not match + one-to-one with the exact visible content of an API + request or response. + + + For example, `output_tokens` will be non-zero, even for an + empty string response from Claude. + + + Total input tokens in a request is the summation of + `input_tokens`, `cache_creation_input_tokens`, and + `cache_read_input_tokens`. + examples: + - input_tokens: 2095 + output_tokens: 503 + container: + allOf: + - anyOf: + - $ref: "#/components/schemas/Container" + - type: "null" + default: null + description: >- + Information about the container used in this request. + + + This will be non-null if a container tool (e.g. code + execution) was used. + title: Message + examples: + - content: &ref_0 + - text: Hi! My name is Claude. + type: text + id: msg_013Zva2CMHLNnXjNJJKqJ2EF + model: claude-sonnet-4-20250514 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: &ref_1 + input_tokens: 2095 + output_tokens: 503 + requiredProperties: + - id + - type + - role + - content + - model + - stop_reason + - stop_sequence + - usage + - container + example: + content: *ref_0 + id: msg_013Zva2CMHLNnXjNJJKqJ2EF + model: claude-sonnet-4-20250514 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: *ref_1 + examples: + example: + value: + content: + - text: Hi! My name is Claude. + type: text + id: msg_013Zva2CMHLNnXjNJJKqJ2EF + model: claude-sonnet-4-20250514 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + input_tokens: 2095 + output_tokens: 503 + description: Message object. + 4XX: + application/json: + schemaArray: + - type: object + properties: + error: + allOf: + - discriminator: + mapping: + api_error: "#/components/schemas/APIError" + authentication_error: "#/components/schemas/AuthenticationError" + billing_error: "#/components/schemas/BillingError" + invalid_request_error: "#/components/schemas/InvalidRequestError" + not_found_error: "#/components/schemas/NotFoundError" + overloaded_error: "#/components/schemas/OverloadedError" + permission_error: "#/components/schemas/PermissionError" + rate_limit_error: "#/components/schemas/RateLimitError" + timeout_error: "#/components/schemas/GatewayTimeoutError" + propertyName: type + oneOf: + - $ref: "#/components/schemas/InvalidRequestError" + - $ref: "#/components/schemas/AuthenticationError" + - $ref: "#/components/schemas/BillingError" + - $ref: "#/components/schemas/PermissionError" + - $ref: "#/components/schemas/NotFoundError" + - $ref: "#/components/schemas/RateLimitError" + - $ref: "#/components/schemas/GatewayTimeoutError" + - $ref: "#/components/schemas/APIError" + - $ref: "#/components/schemas/OverloadedError" + title: Error + type: + allOf: + - const: error + default: error + enum: + - error + title: Type + type: string + title: ErrorResponse + requiredProperties: + - error + - type + examples: + example: + value: + error: + message: Invalid request + type: invalid_request_error + type: error + description: >- + Error response. + + + See our [errors + documentation](https://docs.anthropic.com/en/api/errors) for more + details. + deprecated: false + type: path +components: + schemas: + APIError: + properties: + message: + default: Internal server error + title: Message + type: string + type: + const: api_error + default: api_error + enum: + - api_error + title: Type + type: string + required: + - message + - type + title: APIError + type: object + AuthenticationError: + properties: + message: + default: Authentication error + title: Message + type: string + type: + const: authentication_error + default: authentication_error + enum: + - authentication_error + title: Type + type: string + required: + - message + - type + title: AuthenticationError + type: object + Base64ImageSource: + additionalProperties: false + properties: + data: + format: byte + title: Data + type: string + media_type: + enum: + - image/jpeg + - image/png + - image/gif + - image/webp + title: Media Type + type: string + type: + const: base64 + enum: + - base64 + title: Type + type: string + required: + - data + - media_type + - type + title: Base64ImageSource + type: object + Base64PDFSource: + additionalProperties: false + properties: + data: + format: byte + title: Data + type: string + media_type: + const: application/pdf + enum: + - application/pdf + title: Media Type + type: string + type: + const: base64 + enum: + - base64 + title: Type + type: string + required: + - data + - media_type + - type + title: PDF (base64) + type: object + BashTool_20241022: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: bash + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - bash + title: Name + type: string + type: + const: bash_20241022 + enum: + - bash_20241022 + title: Type + type: string + required: + - name + - type + title: Bash tool (2024-10-22) + type: object + BashTool_20250124: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: bash + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - bash + title: Name + type: string + type: + const: bash_20250124 + enum: + - bash_20250124 + title: Type + type: string + required: + - name + - type + title: Bash tool (2025-01-24) + type: object + BillingError: + properties: + message: + default: Billing error + title: Message + type: string + type: + const: billing_error + default: billing_error + enum: + - billing_error + title: Type + type: string + required: + - message + - type + title: BillingError + type: object + CacheControlEphemeral: + additionalProperties: false + properties: + ttl: + description: |- + The time-to-live for the cache control breakpoint. + + This may be one the following values: + - `5m`: 5 minutes + - `1h`: 1 hour + + Defaults to `5m`. + enum: + - 5m + - 1h + title: Ttl + type: string + type: + const: ephemeral + enum: + - ephemeral + title: Type + type: string + required: + - type + title: CacheControlEphemeral + type: object + CacheCreation: + properties: + ephemeral_1h_input_tokens: + default: 0 + description: The number of input tokens used to create the 1 hour cache entry. + minimum: 0 + title: Ephemeral 1H Input Tokens + type: integer + ephemeral_5m_input_tokens: + default: 0 + description: The number of input tokens used to create the 5 minute cache entry. + minimum: 0 + title: Ephemeral 5M Input Tokens + type: integer + required: + - ephemeral_1h_input_tokens + - ephemeral_5m_input_tokens + title: CacheCreation + type: object + CodeExecutionToolResultErrorCode: + enum: + - invalid_tool_input + - unavailable + - too_many_requests + - execution_time_exceeded + title: CodeExecutionToolResultErrorCode + type: string + CodeExecutionTool_20250522: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: code_execution + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - code_execution + title: Name + type: string + type: + const: code_execution_20250522 + enum: + - code_execution_20250522 + title: Type + type: string + required: + - name + - type + title: Code execution tool (2025-05-22) + type: object + ComputerUseTool_20241022: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + display_height_px: + description: The height of the display in pixels. + minimum: 1 + title: Display Height Px + type: integer + display_number: + anyOf: + - minimum: 0 + type: integer + - type: "null" + description: The X11 display number (e.g. 0, 1) for the display. + title: Display Number + display_width_px: + description: The width of the display in pixels. + minimum: 1 + title: Display Width Px + type: integer + name: + const: computer + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - computer + title: Name + type: string + type: + const: computer_20241022 + enum: + - computer_20241022 + title: Type + type: string + required: + - display_height_px + - display_width_px + - name + - type + title: Computer use tool (2024-01-22) + type: object + ComputerUseTool_20250124: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + display_height_px: + description: The height of the display in pixels. + minimum: 1 + title: Display Height Px + type: integer + display_number: + anyOf: + - minimum: 0 + type: integer + - type: "null" + description: The X11 display number (e.g. 0, 1) for the display. + title: Display Number + display_width_px: + description: The width of the display in pixels. + minimum: 1 + title: Display Width Px + type: integer + name: + const: computer + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - computer + title: Name + type: string + type: + const: computer_20250124 + enum: + - computer_20250124 + title: Type + type: string + required: + - display_height_px + - display_width_px + - name + - type + title: Computer use tool (2025-01-24) + type: object + Container: + description: >- + Information about the container used in the request (for the code + execution tool) + properties: + expires_at: + description: The time at which the container will expire. + format: date-time + title: Expires At + type: string + id: + description: Identifier for the container used in this request + title: Id + type: string + required: + - expires_at + - id + title: Container + type: object + ContentBlockSource: + additionalProperties: false + properties: + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + image: "#/components/schemas/RequestImageBlock" + text: "#/components/schemas/RequestTextBlock" + propertyName: type + oneOf: + - $ref: "#/components/schemas/RequestTextBlock" + - $ref: "#/components/schemas/RequestImageBlock" + type: array + title: Content + type: + const: content + enum: + - content + title: Type + type: string + required: + - content + - type + title: Content block + type: object + FileDocumentSource: + additionalProperties: false + properties: + file_id: + title: File Id + type: string + type: + const: file + enum: + - file + title: Type + type: string + required: + - file_id + - type + title: File document + type: object + FileImageSource: + additionalProperties: false + properties: + file_id: + title: File Id + type: string + type: + const: file + enum: + - file + title: Type + type: string + required: + - file_id + - type + title: FileImageSource + type: object + GatewayTimeoutError: + properties: + message: + default: Request timeout + title: Message + type: string + type: + const: timeout_error + default: timeout_error + enum: + - timeout_error + title: Type + type: string + required: + - message + - type + title: GatewayTimeoutError + type: object + InputMessage: + additionalProperties: false + properties: + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + code_execution_tool_result: "#/components/schemas/RequestCodeExecutionToolResultBlock" + container_upload: "#/components/schemas/RequestContainerUploadBlock" + document: "#/components/schemas/RequestDocumentBlock" + image: "#/components/schemas/RequestImageBlock" + mcp_tool_result: "#/components/schemas/RequestMCPToolResultBlock" + mcp_tool_use: "#/components/schemas/RequestMCPToolUseBlock" + redacted_thinking: "#/components/schemas/RequestRedactedThinkingBlock" + search_result: "#/components/schemas/RequestSearchResultBlock" + server_tool_use: "#/components/schemas/RequestServerToolUseBlock" + text: "#/components/schemas/RequestTextBlock" + thinking: "#/components/schemas/RequestThinkingBlock" + tool_result: "#/components/schemas/RequestToolResultBlock" + tool_use: "#/components/schemas/RequestToolUseBlock" + web_search_tool_result: "#/components/schemas/RequestWebSearchToolResultBlock" + propertyName: type + oneOf: + - $ref: "#/components/schemas/RequestTextBlock" + description: Regular text content. + - $ref: "#/components/schemas/RequestImageBlock" + description: >- + Image content specified directly as base64 data or as a + reference via a URL. + - $ref: "#/components/schemas/RequestDocumentBlock" + description: >- + Document content, either specified directly as base64 + data, as text, or as a reference via a URL. + - $ref: "#/components/schemas/RequestSearchResultBlock" + description: >- + A search result block containing source, title, and + content from search operations. + - $ref: "#/components/schemas/RequestThinkingBlock" + description: A block specifying internal thinking by the model. + - $ref: "#/components/schemas/RequestRedactedThinkingBlock" + description: >- + A block specifying internal, redacted thinking by the + model. + - $ref: "#/components/schemas/RequestToolUseBlock" + description: A block indicating a tool use by the model. + - $ref: "#/components/schemas/RequestToolResultBlock" + description: A block specifying the results of a tool use by the model. + - $ref: "#/components/schemas/RequestServerToolUseBlock" + - $ref: "#/components/schemas/RequestWebSearchToolResultBlock" + - $ref: "#/components/schemas/RequestCodeExecutionToolResultBlock" + - $ref: "#/components/schemas/RequestMCPToolUseBlock" + - $ref: "#/components/schemas/RequestMCPToolResultBlock" + - $ref: "#/components/schemas/RequestContainerUploadBlock" + type: array + title: Content + role: + enum: + - user + - assistant + title: Role + type: string + required: + - content + - role + title: InputMessage + type: object + InputSchema: + additionalProperties: true + properties: + properties: + anyOf: + - type: object + - type: "null" + title: Properties + required: + anyOf: + - items: + type: string + type: array + - type: "null" + title: Required + type: + const: object + enum: + - object + title: Type + type: string + required: + - type + title: InputSchema + type: object + InvalidRequestError: + properties: + message: + default: Invalid request + title: Message + type: string + type: + const: invalid_request_error + default: invalid_request_error + enum: + - invalid_request_error + title: Type + type: string + required: + - message + - type + title: InvalidRequestError + type: object + Metadata: + additionalProperties: false + properties: + user_id: + anyOf: + - maxLength: 256 + type: string + - type: "null" + description: >- + An external identifier for the user who is associated with the + request. + + + This should be a uuid, hash value, or other opaque identifier. + Anthropic may use this id to help detect abuse. Do not include any + identifying information such as name, email address, or phone + number. + examples: + - 13803d75-b4b5-4c3e-b2a2-6f21399b021b + title: User Id + title: Metadata + type: object + NotFoundError: + properties: + message: + default: Not found + title: Message + type: string + type: + const: not_found_error + default: not_found_error + enum: + - not_found_error + title: Type + type: string + required: + - message + - type + title: NotFoundError + type: object + OverloadedError: + properties: + message: + default: Overloaded + title: Message + type: string + type: + const: overloaded_error + default: overloaded_error + enum: + - overloaded_error + title: Type + type: string + required: + - message + - type + title: OverloadedError + type: object + PermissionError: + properties: + message: + default: Permission denied + title: Message + type: string + type: + const: permission_error + default: permission_error + enum: + - permission_error + title: Type + type: string + required: + - message + - type + title: PermissionError + type: object + PlainTextSource: + additionalProperties: false + properties: + data: + title: Data + type: string + media_type: + const: text/plain + enum: + - text/plain + title: Media Type + type: string + type: + const: text + enum: + - text + title: Type + type: string + required: + - data + - media_type + - type + title: Plain text + type: object + RateLimitError: + properties: + message: + default: Rate limited + title: Message + type: string + type: + const: rate_limit_error + default: rate_limit_error + enum: + - rate_limit_error + title: Type + type: string + required: + - message + - type + title: RateLimitError + type: object + RequestCharLocationCitation: + additionalProperties: false + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + title: Document Title + end_char_index: + title: End Char Index + type: integer + start_char_index: + minimum: 0 + title: Start Char Index + type: integer + type: + const: char_location + enum: + - char_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_char_index + - start_char_index + - type + title: Character location + type: object + RequestCitationsConfig: + additionalProperties: false + properties: + enabled: + title: Enabled + type: boolean + title: RequestCitationsConfig + type: object + RequestCodeExecutionOutputBlock: + additionalProperties: false + properties: + file_id: + title: File Id + type: string + type: + const: code_execution_output + enum: + - code_execution_output + title: Type + type: string + required: + - file_id + - type + title: RequestCodeExecutionOutputBlock + type: object + RequestCodeExecutionResultBlock: + additionalProperties: false + properties: + content: + items: + $ref: "#/components/schemas/RequestCodeExecutionOutputBlock" + title: Content + type: array + return_code: + title: Return Code + type: integer + stderr: + title: Stderr + type: string + stdout: + title: Stdout + type: string + type: + const: code_execution_result + enum: + - code_execution_result + title: Type + type: string + required: + - content + - return_code + - stderr + - stdout + - type + title: Code execution result + type: object + RequestCodeExecutionToolResultBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + content: + anyOf: + - $ref: "#/components/schemas/RequestCodeExecutionToolResultError" + - $ref: "#/components/schemas/RequestCodeExecutionResultBlock" + title: Content + tool_use_id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Tool Use Id + type: string + type: + const: code_execution_tool_result + enum: + - code_execution_tool_result + title: Type + type: string + required: + - content + - tool_use_id + - type + title: Code execution tool result + type: object + RequestCodeExecutionToolResultError: + additionalProperties: false + properties: + error_code: + $ref: "#/components/schemas/CodeExecutionToolResultErrorCode" + type: + const: code_execution_tool_result_error + enum: + - code_execution_tool_result_error + title: Type + type: string + required: + - error_code + - type + title: Code execution tool error + type: object + RequestContainerUploadBlock: + additionalProperties: false + description: >- + A content block that represents a file to be uploaded to the container + + Files uploaded via this block will be available in the container's input + directory. + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + file_id: + title: File Id + type: string + type: + const: container_upload + enum: + - container_upload + title: Type + type: string + required: + - file_id + - type + title: Container upload + type: object + RequestContentBlockLocationCitation: + additionalProperties: false + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + title: Document Title + end_block_index: + title: End Block Index + type: integer + start_block_index: + minimum: 0 + title: Start Block Index + type: integer + type: + const: content_block_location + enum: + - content_block_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_block_index + - start_block_index + - type + title: Content block location + type: object + RequestDocumentBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + citations: + $ref: "#/components/schemas/RequestCitationsConfig" + context: + anyOf: + - minLength: 1 + type: string + - type: "null" + title: Context + source: + discriminator: + mapping: + base64: "#/components/schemas/Base64PDFSource" + content: "#/components/schemas/ContentBlockSource" + file: "#/components/schemas/FileDocumentSource" + text: "#/components/schemas/PlainTextSource" + url: "#/components/schemas/URLPDFSource" + propertyName: type + oneOf: + - $ref: "#/components/schemas/Base64PDFSource" + - $ref: "#/components/schemas/PlainTextSource" + - $ref: "#/components/schemas/ContentBlockSource" + - $ref: "#/components/schemas/URLPDFSource" + - $ref: "#/components/schemas/FileDocumentSource" + title: + anyOf: + - maxLength: 500 + minLength: 1 + type: string + - type: "null" + title: Title + type: + const: document + enum: + - document + title: Type + type: string + required: + - source + - type + title: Document + type: object + RequestImageBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + source: + discriminator: + mapping: + base64: "#/components/schemas/Base64ImageSource" + file: "#/components/schemas/FileImageSource" + url: "#/components/schemas/URLImageSource" + propertyName: type + oneOf: + - $ref: "#/components/schemas/Base64ImageSource" + - $ref: "#/components/schemas/URLImageSource" + - $ref: "#/components/schemas/FileImageSource" + title: Source + type: + const: image + enum: + - image + title: Type + type: string + required: + - source + - type + title: Image + type: object + RequestMCPServerToolConfiguration: + additionalProperties: false + properties: + allowed_tools: + anyOf: + - items: + type: string + type: array + - type: "null" + title: Allowed Tools + enabled: + anyOf: + - type: boolean + - type: "null" + title: Enabled + title: RequestMCPServerToolConfiguration + type: object + RequestMCPServerURLDefinition: + additionalProperties: false + properties: + authorization_token: + anyOf: + - type: string + - type: "null" + title: Authorization Token + name: + title: Name + type: string + tool_configuration: + anyOf: + - $ref: "#/components/schemas/RequestMCPServerToolConfiguration" + - type: "null" + type: + const: url + enum: + - url + title: Type + type: string + url: + title: Url + type: string + required: + - name + - type + - url + title: RequestMCPServerURLDefinition + type: object + RequestMCPToolResultBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + content: + anyOf: + - type: string + - items: + $ref: "#/components/schemas/RequestTextBlock" + type: array + title: Content + is_error: + title: Is Error + type: boolean + tool_use_id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Tool Use Id + type: string + type: + const: mcp_tool_result + enum: + - mcp_tool_result + title: Type + type: string + required: + - tool_use_id + - type + title: MCP tool result + type: object + RequestMCPToolUseBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Id + type: string + input: + title: Input + type: object + name: + title: Name + type: string + server_name: + description: The name of the MCP server + title: Server Name + type: string + type: + const: mcp_tool_use + enum: + - mcp_tool_use + title: Type + type: string + required: + - id + - input + - name + - server_name + - type + title: MCP tool use + type: object + RequestPageLocationCitation: + additionalProperties: false + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + title: Document Title + end_page_number: + title: End Page Number + type: integer + start_page_number: + minimum: 1 + title: Start Page Number + type: integer + type: + const: page_location + enum: + - page_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_page_number + - start_page_number + - type + title: Page location + type: object + RequestRedactedThinkingBlock: + additionalProperties: false + properties: + data: + title: Data + type: string + type: + const: redacted_thinking + enum: + - redacted_thinking + title: Type + type: string + required: + - data + - type + title: Redacted thinking + type: object + RequestSearchResultBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + citations: + $ref: "#/components/schemas/RequestCitationsConfig" + content: + items: + $ref: "#/components/schemas/RequestTextBlock" + title: Content + type: array + source: + title: Source + type: string + title: + title: Title + type: string + type: + const: search_result + enum: + - search_result + title: Type + type: string + required: + - content + - source + - title + - type + title: Search result + type: object + RequestSearchResultLocationCitation: + additionalProperties: false + properties: + cited_text: + title: Cited Text + type: string + end_block_index: + title: End Block Index + type: integer + search_result_index: + minimum: 0 + title: Search Result Index + type: integer + source: + title: Source + type: string + start_block_index: + minimum: 0 + title: Start Block Index + type: integer + title: + anyOf: + - type: string + - type: "null" + title: Title + type: + const: search_result_location + enum: + - search_result_location + title: Type + type: string + required: + - cited_text + - end_block_index + - search_result_index + - source + - start_block_index + - title + - type + title: RequestSearchResultLocationCitation + type: object + RequestServerToolUseBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Id + type: string + input: + title: Input + type: object + name: + enum: + - web_search + - code_execution + title: Name + type: string + type: + const: server_tool_use + enum: + - server_tool_use + title: Type + type: string + required: + - id + - input + - name + - type + title: Server tool use + type: object + RequestTextBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + citations: + anyOf: + - items: + discriminator: + mapping: + char_location: "#/components/schemas/RequestCharLocationCitation" + content_block_location: "#/components/schemas/RequestContentBlockLocationCitation" + page_location: "#/components/schemas/RequestPageLocationCitation" + search_result_location: "#/components/schemas/RequestSearchResultLocationCitation" + web_search_result_location: >- + #/components/schemas/RequestWebSearchResultLocationCitation + propertyName: type + oneOf: + - $ref: "#/components/schemas/RequestCharLocationCitation" + - $ref: "#/components/schemas/RequestPageLocationCitation" + - $ref: "#/components/schemas/RequestContentBlockLocationCitation" + - $ref: >- + #/components/schemas/RequestWebSearchResultLocationCitation + - $ref: "#/components/schemas/RequestSearchResultLocationCitation" + type: array + - type: "null" + title: Citations + text: + minLength: 1 + title: Text + type: string + type: + const: text + enum: + - text + title: Type + type: string + required: + - text + - type + title: Text + type: object + RequestThinkingBlock: + additionalProperties: false + properties: + signature: + title: Signature + type: string + thinking: + title: Thinking + type: string + type: + const: thinking + enum: + - thinking + title: Type + type: string + required: + - signature + - thinking + - type + title: Thinking + type: object + RequestToolResultBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + image: "#/components/schemas/RequestImageBlock" + search_result: "#/components/schemas/RequestSearchResultBlock" + text: "#/components/schemas/RequestTextBlock" + propertyName: type + oneOf: + - $ref: "#/components/schemas/RequestTextBlock" + - $ref: "#/components/schemas/RequestImageBlock" + - $ref: "#/components/schemas/RequestSearchResultBlock" + type: array + title: Content + is_error: + title: Is Error + type: boolean + tool_use_id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Tool Use Id + type: string + type: + const: tool_result + enum: + - tool_result + title: Type + type: string + required: + - tool_use_id + - type + title: Tool result + type: object + RequestToolUseBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Id + type: string + input: + title: Input + type: object + name: + maxLength: 200 + minLength: 1 + title: Name + type: string + type: + const: tool_use + enum: + - tool_use + title: Type + type: string + required: + - id + - input + - name + - type + title: Tool use + type: object + RequestWebSearchResultBlock: + additionalProperties: false + properties: + encrypted_content: + title: Encrypted Content + type: string + page_age: + anyOf: + - type: string + - type: "null" + title: Page Age + title: + title: Title + type: string + type: + const: web_search_result + enum: + - web_search_result + title: Type + type: string + url: + title: Url + type: string + required: + - encrypted_content + - title + - type + - url + title: RequestWebSearchResultBlock + type: object + RequestWebSearchResultLocationCitation: + additionalProperties: false + properties: + cited_text: + title: Cited Text + type: string + encrypted_index: + title: Encrypted Index + type: string + title: + anyOf: + - maxLength: 512 + minLength: 1 + type: string + - type: "null" + title: Title + type: + const: web_search_result_location + enum: + - web_search_result_location + title: Type + type: string + url: + maxLength: 2048 + minLength: 1 + title: Url + type: string + required: + - cited_text + - encrypted_index + - title + - type + - url + title: RequestWebSearchResultLocationCitation + type: object + RequestWebSearchToolResultBlock: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + content: + anyOf: + - items: + $ref: "#/components/schemas/RequestWebSearchResultBlock" + type: array + - $ref: "#/components/schemas/RequestWebSearchToolResultError" + title: Content + tool_use_id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Tool Use Id + type: string + type: + const: web_search_tool_result + enum: + - web_search_tool_result + title: Type + type: string + required: + - content + - tool_use_id + - type + title: Web search tool result + type: object + RequestWebSearchToolResultError: + additionalProperties: false + properties: + error_code: + $ref: "#/components/schemas/WebSearchToolResultErrorCode" + type: + const: web_search_tool_result_error + enum: + - web_search_tool_result_error + title: Type + type: string + required: + - error_code + - type + title: RequestWebSearchToolResultError + type: object + ResponseCharLocationCitation: + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - type: string + - type: "null" + title: Document Title + end_char_index: + title: End Char Index + type: integer + file_id: + anyOf: + - type: string + - type: "null" + default: null + title: File Id + start_char_index: + minimum: 0 + title: Start Char Index + type: integer + type: + const: char_location + default: char_location + enum: + - char_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_char_index + - file_id + - start_char_index + - type + title: Character location + type: object + ResponseCodeExecutionOutputBlock: + properties: + file_id: + title: File Id + type: string + type: + const: code_execution_output + default: code_execution_output + enum: + - code_execution_output + title: Type + type: string + required: + - file_id + - type + title: ResponseCodeExecutionOutputBlock + type: object + ResponseCodeExecutionResultBlock: + properties: + content: + items: + $ref: "#/components/schemas/ResponseCodeExecutionOutputBlock" + title: Content + type: array + return_code: + title: Return Code + type: integer + stderr: + title: Stderr + type: string + stdout: + title: Stdout + type: string + type: + const: code_execution_result + default: code_execution_result + enum: + - code_execution_result + title: Type + type: string + required: + - content + - return_code + - stderr + - stdout + - type + title: Code execution result + type: object + ResponseCodeExecutionToolResultBlock: + properties: + content: + anyOf: + - $ref: "#/components/schemas/ResponseCodeExecutionToolResultError" + - $ref: "#/components/schemas/ResponseCodeExecutionResultBlock" + title: Content + tool_use_id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Tool Use Id + type: string + type: + const: code_execution_tool_result + default: code_execution_tool_result + enum: + - code_execution_tool_result + title: Type + type: string + required: + - content + - tool_use_id + - type + title: Code execution tool result + type: object + ResponseCodeExecutionToolResultError: + properties: + error_code: + $ref: "#/components/schemas/CodeExecutionToolResultErrorCode" + type: + const: code_execution_tool_result_error + default: code_execution_tool_result_error + enum: + - code_execution_tool_result_error + title: Type + type: string + required: + - error_code + - type + title: Code execution tool error + type: object + ResponseContainerUploadBlock: + description: Response model for a file uploaded to the container. + properties: + file_id: + title: File Id + type: string + type: + const: container_upload + default: container_upload + enum: + - container_upload + title: Type + type: string + required: + - file_id + - type + title: Container upload + type: object + ResponseContentBlockLocationCitation: + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - type: string + - type: "null" + title: Document Title + end_block_index: + title: End Block Index + type: integer + file_id: + anyOf: + - type: string + - type: "null" + default: null + title: File Id + start_block_index: + minimum: 0 + title: Start Block Index + type: integer + type: + const: content_block_location + default: content_block_location + enum: + - content_block_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_block_index + - file_id + - start_block_index + - type + title: Content block location + type: object + ResponseMCPToolResultBlock: + properties: + content: + anyOf: + - type: string + - items: + $ref: "#/components/schemas/ResponseTextBlock" + type: array + title: Content + is_error: + default: false + title: Is Error + type: boolean + tool_use_id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Tool Use Id + type: string + type: + const: mcp_tool_result + default: mcp_tool_result + enum: + - mcp_tool_result + title: Type + type: string + required: + - content + - is_error + - tool_use_id + - type + title: MCP tool result + type: object + ResponseMCPToolUseBlock: + properties: + id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Id + type: string + input: + title: Input + type: object + name: + description: The name of the MCP tool + title: Name + type: string + server_name: + description: The name of the MCP server + title: Server Name + type: string + type: + const: mcp_tool_use + default: mcp_tool_use + enum: + - mcp_tool_use + title: Type + type: string + required: + - id + - input + - name + - server_name + - type + title: MCP tool use + type: object + ResponsePageLocationCitation: + properties: + cited_text: + title: Cited Text + type: string + document_index: + minimum: 0 + title: Document Index + type: integer + document_title: + anyOf: + - type: string + - type: "null" + title: Document Title + end_page_number: + title: End Page Number + type: integer + file_id: + anyOf: + - type: string + - type: "null" + default: null + title: File Id + start_page_number: + minimum: 1 + title: Start Page Number + type: integer + type: + const: page_location + default: page_location + enum: + - page_location + title: Type + type: string + required: + - cited_text + - document_index + - document_title + - end_page_number + - file_id + - start_page_number + - type + title: Page location + type: object + ResponseRedactedThinkingBlock: + properties: + data: + title: Data + type: string + type: + const: redacted_thinking + default: redacted_thinking + enum: + - redacted_thinking + title: Type + type: string + required: + - data + - type + title: Redacted thinking + type: object + ResponseSearchResultLocationCitation: + properties: + cited_text: + title: Cited Text + type: string + end_block_index: + title: End Block Index + type: integer + search_result_index: + minimum: 0 + title: Search Result Index + type: integer + source: + title: Source + type: string + start_block_index: + minimum: 0 + title: Start Block Index + type: integer + title: + anyOf: + - type: string + - type: "null" + title: Title + type: + const: search_result_location + default: search_result_location + enum: + - search_result_location + title: Type + type: string + required: + - cited_text + - end_block_index + - search_result_index + - source + - start_block_index + - title + - type + title: ResponseSearchResultLocationCitation + type: object + ResponseServerToolUseBlock: + properties: + id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Id + type: string + input: + title: Input + type: object + name: + enum: + - web_search + - code_execution + title: Name + type: string + type: + const: server_tool_use + default: server_tool_use + enum: + - server_tool_use + title: Type + type: string + required: + - id + - input + - name + - type + title: Server tool use + type: object + ResponseTextBlock: + properties: + citations: + anyOf: + - items: + discriminator: + mapping: + char_location: "#/components/schemas/ResponseCharLocationCitation" + content_block_location: "#/components/schemas/ResponseContentBlockLocationCitation" + page_location: "#/components/schemas/ResponsePageLocationCitation" + search_result_location: "#/components/schemas/ResponseSearchResultLocationCitation" + web_search_result_location: >- + #/components/schemas/ResponseWebSearchResultLocationCitation + propertyName: type + oneOf: + - $ref: "#/components/schemas/ResponseCharLocationCitation" + - $ref: "#/components/schemas/ResponsePageLocationCitation" + - $ref: "#/components/schemas/ResponseContentBlockLocationCitation" + - $ref: >- + #/components/schemas/ResponseWebSearchResultLocationCitation + - $ref: "#/components/schemas/ResponseSearchResultLocationCitation" + type: array + - type: "null" + default: null + description: >- + Citations supporting the text block. + + + The type of citation returned will depend on the type of document + being cited. Citing a PDF results in `page_location`, plain text + results in `char_location`, and content document results in + `content_block_location`. + title: Citations + text: + maxLength: 5000000 + minLength: 0 + title: Text + type: string + type: + const: text + default: text + enum: + - text + title: Type + type: string + required: + - citations + - text + - type + title: Text + type: object + ResponseThinkingBlock: + properties: + signature: + title: Signature + type: string + thinking: + title: Thinking + type: string + type: + const: thinking + default: thinking + enum: + - thinking + title: Type + type: string + required: + - signature + - thinking + - type + title: Thinking + type: object + ResponseToolUseBlock: + properties: + id: + pattern: ^[a-zA-Z0-9_-]+$ + title: Id + type: string + input: + title: Input + type: object + name: + minLength: 1 + title: Name + type: string + type: + const: tool_use + default: tool_use + enum: + - tool_use + title: Type + type: string + required: + - id + - input + - name + - type + title: Tool use + type: object + ResponseWebSearchResultBlock: + properties: + encrypted_content: + title: Encrypted Content + type: string + page_age: + anyOf: + - type: string + - type: "null" + default: null + title: Page Age + title: + title: Title + type: string + type: + const: web_search_result + default: web_search_result + enum: + - web_search_result + title: Type + type: string + url: + title: Url + type: string + required: + - encrypted_content + - page_age + - title + - type + - url + title: ResponseWebSearchResultBlock + type: object + ResponseWebSearchResultLocationCitation: + properties: + cited_text: + title: Cited Text + type: string + encrypted_index: + title: Encrypted Index + type: string + title: + anyOf: + - maxLength: 512 + type: string + - type: "null" + title: Title + type: + const: web_search_result_location + default: web_search_result_location + enum: + - web_search_result_location + title: Type + type: string + url: + title: Url + type: string + required: + - cited_text + - encrypted_index + - title + - type + - url + title: ResponseWebSearchResultLocationCitation + type: object + ResponseWebSearchToolResultBlock: + properties: + content: + anyOf: + - $ref: "#/components/schemas/ResponseWebSearchToolResultError" + - items: + $ref: "#/components/schemas/ResponseWebSearchResultBlock" + type: array + title: Content + tool_use_id: + pattern: ^srvtoolu_[a-zA-Z0-9_]+$ + title: Tool Use Id + type: string + type: + const: web_search_tool_result + default: web_search_tool_result + enum: + - web_search_tool_result + title: Type + type: string + required: + - content + - tool_use_id + - type + title: Web search tool result + type: object + ResponseWebSearchToolResultError: + properties: + error_code: + $ref: "#/components/schemas/WebSearchToolResultErrorCode" + type: + const: web_search_tool_result_error + default: web_search_tool_result_error + enum: + - web_search_tool_result_error + title: Type + type: string + required: + - error_code + - type + title: ResponseWebSearchToolResultError + type: object + ServerToolUsage: + properties: + web_search_requests: + default: 0 + description: The number of web search tool requests. + examples: + - 0 + minimum: 0 + title: Web Search Requests + type: integer + required: + - web_search_requests + title: ServerToolUsage + type: object + TextEditor_20241022: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: str_replace_editor + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - str_replace_editor + title: Name + type: string + type: + const: text_editor_20241022 + enum: + - text_editor_20241022 + title: Type + type: string + required: + - name + - type + title: Text editor tool (2024-10-22) + type: object + TextEditor_20250124: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: str_replace_editor + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - str_replace_editor + title: Name + type: string + type: + const: text_editor_20250124 + enum: + - text_editor_20250124 + title: Type + type: string + required: + - name + - type + title: Text editor tool (2025-01-24) + type: object + TextEditor_20250429: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + name: + const: str_replace_based_edit_tool + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - str_replace_based_edit_tool + title: Name + type: string + type: + const: text_editor_20250429 + enum: + - text_editor_20250429 + title: Type + type: string + required: + - name + - type + title: Text editor tool (2025-04-29) + type: object + TextEditor_20250728: + additionalProperties: false + properties: + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + max_characters: + anyOf: + - minimum: 1 + type: integer + - type: "null" + description: >- + Maximum number of characters to display when viewing a file. If not + specified, defaults to displaying the full file. + title: Max Characters + name: + const: str_replace_based_edit_tool + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - str_replace_based_edit_tool + title: Name + type: string + type: + const: text_editor_20250728 + enum: + - text_editor_20250728 + title: Type + type: string + required: + - name + - type + title: TextEditor_20250728 + type: object + ThinkingConfigDisabled: + additionalProperties: false + properties: + type: + const: disabled + enum: + - disabled + title: Type + type: string + required: + - type + title: Disabled + type: object + ThinkingConfigEnabled: + additionalProperties: false + properties: + budget_tokens: + description: >- + Determines how many tokens Claude can use for its internal reasoning + process. Larger budgets can enable more thorough analysis for + complex problems, improving response quality. + + + Must be ≥1024 and less than `max_tokens`. + + + See [extended + thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) + for details. + minimum: 1024 + title: Budget Tokens + type: integer + type: + const: enabled + enum: + - enabled + title: Type + type: string + required: + - budget_tokens + - type + title: Enabled + type: object + Tool: + additionalProperties: false + properties: + type: + anyOf: + - type: "null" + - const: custom + enum: + - custom + type: string + title: Type + description: + description: >- + Description of what this tool does. + + + Tool descriptions should be as detailed as possible. The more + information that the model has about what the tool is and how to use + it, the better it will perform. You can use natural language + descriptions to reinforce important aspects of the tool input JSON + schema. + examples: + - Get the current weather in a given location + title: Description + type: string + name: + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + maxLength: 128 + minLength: 1 + pattern: ^[a-zA-Z0-9_-]{1,128}$ + title: Name + type: string + input_schema: + $ref: "#/components/schemas/InputSchema" + description: >- + [JSON schema](https://json-schema.org/draft/2020-12) for this tool's + input. + + + This defines the shape of the `input` that your tool accepts and + that the model will produce. + examples: + - properties: + location: + description: The city and state, e.g. San Francisco, CA + type: string + unit: + description: Unit for the output - one of (celsius, fahrenheit) + type: string + required: + - location + type: object + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + required: + - name + - input_schema + title: Custom tool + type: object + ToolChoiceAny: + additionalProperties: false + description: The model will use any available tools. + properties: + disable_parallel_tool_use: + description: >- + Whether to disable parallel tool use. + + + Defaults to `false`. If set to `true`, the model will output exactly + one tool use. + title: Disable Parallel Tool Use + type: boolean + type: + const: any + enum: + - any + title: Type + type: string + required: + - type + title: Any + type: object + ToolChoiceAuto: + additionalProperties: false + description: The model will automatically decide whether to use tools. + properties: + disable_parallel_tool_use: + description: >- + Whether to disable parallel tool use. + + + Defaults to `false`. If set to `true`, the model will output at most + one tool use. + title: Disable Parallel Tool Use + type: boolean + type: + const: auto + enum: + - auto + title: Type + type: string + required: + - type + title: Auto + type: object + ToolChoiceNone: + additionalProperties: false + description: The model will not be allowed to use tools. + properties: + type: + const: none + enum: + - none + title: Type + type: string + required: + - type + title: None + type: object + ToolChoiceTool: + additionalProperties: false + description: The model will use the specified tool with `tool_choice.name`. + properties: + disable_parallel_tool_use: + description: >- + Whether to disable parallel tool use. + + + Defaults to `false`. If set to `true`, the model will output exactly + one tool use. + title: Disable Parallel Tool Use + type: boolean + name: + description: The name of the tool to use. + title: Name + type: string + type: + const: tool + enum: + - tool + title: Type + type: string + required: + - name + - type + title: Tool + type: object + URLImageSource: + additionalProperties: false + properties: + type: + const: url + enum: + - url + title: Type + type: string + url: + title: Url + type: string + required: + - type + - url + title: URLImageSource + type: object + URLPDFSource: + additionalProperties: false + properties: + type: + const: url + enum: + - url + title: Type + type: string + url: + title: Url + type: string + required: + - type + - url + title: PDF (URL) + type: object + Usage: + properties: + cache_creation: + anyOf: + - $ref: "#/components/schemas/CacheCreation" + - type: "null" + default: null + description: Breakdown of cached tokens by TTL + cache_creation_input_tokens: + anyOf: + - minimum: 0 + type: integer + - type: "null" + default: null + description: The number of input tokens used to create the cache entry. + examples: + - 2051 + title: Cache Creation Input Tokens + cache_read_input_tokens: + anyOf: + - minimum: 0 + type: integer + - type: "null" + default: null + description: The number of input tokens read from the cache. + examples: + - 2051 + title: Cache Read Input Tokens + input_tokens: + description: The number of input tokens which were used. + examples: + - 2095 + minimum: 0 + title: Input Tokens + type: integer + output_tokens: + description: The number of output tokens which were used. + examples: + - 503 + minimum: 0 + title: Output Tokens + type: integer + server_tool_use: + anyOf: + - $ref: "#/components/schemas/ServerToolUsage" + - type: "null" + default: null + description: The number of server tool requests. + service_tier: + anyOf: + - enum: + - standard + - priority + - batch + type: string + - type: "null" + default: null + description: If the request used the priority, standard, or batch tier. + title: Service Tier + required: + - cache_creation + - cache_creation_input_tokens + - cache_read_input_tokens + - input_tokens + - output_tokens + - server_tool_use + - service_tier + title: Usage + type: object + UserLocation: + additionalProperties: false + properties: + city: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + description: The city of the user. + examples: + - New York + - Tokyo + - Los Angeles + title: City + country: + anyOf: + - maxLength: 2 + minLength: 2 + type: string + - type: "null" + description: >- + The two letter [ISO country + code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) of the user. + examples: + - US + - JP + - GB + title: Country + region: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + description: The region of the user. + examples: + - California + - Ontario + - Wales + title: Region + timezone: + anyOf: + - maxLength: 255 + minLength: 1 + type: string + - type: "null" + description: The [IANA timezone](https://nodatime.org/TimeZones) of the user. + examples: + - America/New_York + - Asia/Tokyo + - Europe/London + title: Timezone + type: + const: approximate + enum: + - approximate + title: Type + type: string + required: + - type + title: UserLocation + type: object + WebSearchToolResultErrorCode: + enum: + - invalid_tool_input + - unavailable + - max_uses_exceeded + - too_many_requests + - query_too_long + title: WebSearchToolResultErrorCode + type: string + WebSearchTool_20250305: + additionalProperties: false + properties: + allowed_domains: + anyOf: + - items: + type: string + type: array + - type: "null" + description: >- + If provided, only these domains will be included in results. Cannot + be used alongside `blocked_domains`. + title: Allowed Domains + blocked_domains: + anyOf: + - items: + type: string + type: array + - type: "null" + description: >- + If provided, these domains will never appear in results. Cannot be + used alongside `allowed_domains`. + title: Blocked Domains + cache_control: + anyOf: + - discriminator: + mapping: + ephemeral: "#/components/schemas/CacheControlEphemeral" + propertyName: type + oneOf: + - $ref: "#/components/schemas/CacheControlEphemeral" + - type: "null" + description: Create a cache control breakpoint at this content block. + title: Cache Control + max_uses: + anyOf: + - exclusiveMinimum: 0 + type: integer + - type: "null" + description: Maximum number of times the tool can be used in the API request. + title: Max Uses + name: + const: web_search + description: >- + Name of the tool. + + + This is how the tool will be called by the model and in `tool_use` + blocks. + enum: + - web_search + title: Name + type: string + type: + const: web_search_20250305 + enum: + - web_search_20250305 + title: Type + type: string + user_location: + anyOf: + - $ref: "#/components/schemas/UserLocation" + - type: "null" + description: >- + Parameters for the user's location. Used to provide more relevant + search results. + required: + - name + - type + title: Web search tool (2025-03-05) + type: object +```` diff --git a/aiprompts/anthropic-streaming.md b/aiprompts/anthropic-streaming.md new file mode 100644 index 0000000000..8868e7d4d8 --- /dev/null +++ b/aiprompts/anthropic-streaming.md @@ -0,0 +1,631 @@ +# Streaming Messages + +When creating a Message, you can set `"stream": true` to incrementally stream the response using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent%5Fevents/Using%5Fserver-sent%5Fevents) (SSE). + +## Streaming with SDKs + +Our [Python](https://github.com/anthropics/anthropic-sdk-python) and [TypeScript](https://github.com/anthropics/anthropic-sdk-typescript) SDKs offer multiple ways of streaming. The Python SDK allows both sync and async streams. See the documentation in each SDK for details. + + + ```Python Python + import anthropic + +client = anthropic.Anthropic() + +with client.messages.stream( +max_tokens=1024, +messages=[{"role": "user", "content": "Hello"}], +model="claude-opus-4-1-20250805", +) as stream: +for text in stream.text_stream: +print(text, end="", flush=True) + +```` + +```TypeScript TypeScript +import Anthropic from '@anthropic-ai/sdk'; + +const client = new Anthropic(); + +await client.messages.stream({ + messages: [{role: 'user', content: "Hello"}], + model: 'claude-opus-4-1-20250805', + max_tokens: 1024, +}).on('text', (text) => { + console.log(text); +}); +```` + + + +## Event types + +Each server-sent event includes a named event type and associated JSON data. Each event will use an SSE event name (e.g. `event: message_stop`), and include the matching event `type` in its data. + +Each stream uses the following event flow: + +1. `message_start`: contains a `Message` object with empty `content`. +2. A series of content blocks, each of which have a `content_block_start`, one or more `content_block_delta` events, and a `content_block_stop` event. Each content block will have an `index` that corresponds to its index in the final Message `content` array. +3. One or more `message_delta` events, indicating top-level changes to the final `Message` object. +4. A final `message_stop` event. + + + The token counts shown in the `usage` field of the `message_delta` event are *cumulative*. + + +### Ping events + +Event streams may also include any number of `ping` events. + +### Error events + +We may occasionally send [errors](/en/api/errors) in the event stream. For example, during periods of high usage, you may receive an `overloaded_error`, which would normally correspond to an HTTP 529 in a non-streaming context: + +```json Example error +event: error +data: {"type": "error", "error": {"type": "overloaded_error", "message": "Overloaded"}} +``` + +### Other events + +In accordance with our [versioning policy](/en/api/versioning), we may add new event types, and your code should handle unknown event types gracefully. + +## Content block delta types + +Each `content_block_delta` event contains a `delta` of a type that updates the `content` block at a given `index`. + +### Text delta + +A `text` content block delta looks like: + +```JSON Text delta +event: content_block_delta +data: {"type": "content_block_delta","index": 0,"delta": {"type": "text_delta", "text": "ello frien"}} +``` + +### Input JSON delta + +The deltas for `tool_use` content blocks correspond to updates for the `input` field of the block. To support maximum granularity, the deltas are _partial JSON strings_, whereas the final `tool_use.input` is always an _object_. + +You can accumulate the string deltas and parse the JSON once you receive a `content_block_stop` event, by using a library like [Pydantic](https://docs.pydantic.dev/latest/concepts/json/#partial-json-parsing) to do partial JSON parsing, or by using our [SDKs](https://docs.anthropic.com/en/api/client-sdks), which provide helpers to access parsed incremental values. + +A `tool_use` content block delta looks like: + +```JSON Input JSON delta +event: content_block_delta +data: {"type": "content_block_delta","index": 1,"delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}} +``` + +Note: Our current models only support emitting one complete key and value property from `input` at a time. As such, when using tools, there may be delays between streaming events while the model is working. Once an `input` key and value are accumulated, we emit them as multiple `content_block_delta` events with chunked partial json so that the format can automatically support finer granularity in future models. + +### Thinking delta + +When using [extended thinking](/en/docs/build-with-claude/extended-thinking#streaming-thinking) with streaming enabled, you'll receive thinking content via `thinking_delta` events. These deltas correspond to the `thinking` field of the `thinking` content blocks. + +For thinking content, a special `signature_delta` event is sent just before the `content_block_stop` event. This signature is used to verify the integrity of the thinking block. + +A typical thinking delta looks like: + +```JSON Thinking delta +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}} +``` + +The signature delta looks like: + +```JSON Signature delta +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}} +``` + +## Full HTTP Stream response + +We strongly recommend that you use our [client SDKs](/en/api/client-sdks) when using streaming mode. However, if you are building a direct API integration, you will need to handle these events yourself. + +A stream response is comprised of: + +1. A `message_start` event +2. Potentially multiple content blocks, each of which contains: + - A `content_block_start` event + - Potentially multiple `content_block_delta` events + - A `content_block_stop` event +3. A `message_delta` event +4. A `message_stop` event + +There may be `ping` events dispersed throughout the response as well. See [Event types](#event-types) for more details on the format. + +### Basic streaming request + + + ```bash Shell + curl https://api.anthropic.com/v1/messages \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --data \ + '{ + "model": "claude-opus-4-1-20250805", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 256, + "stream": true + }' + ``` + +```python Python +import anthropic + +client = anthropic.Anthropic() + +with client.messages.stream( + model="claude-opus-4-1-20250805", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=256, +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + + + +```json Response +event: message_start +data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-1-20250805", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}} + +event: content_block_start +data: {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}} + +event: ping +data: {"type": "ping"} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "!"}} + +event: content_block_stop +data: {"type": "content_block_stop", "index": 0} + +event: message_delta +data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null}, "usage": {"output_tokens": 15}} + +event: message_stop +data: {"type": "message_stop"} + +``` + +### Streaming request with tool use + + + Tool use now supports fine-grained streaming for parameter values as a beta feature. For more details, see [Fine-grained tool streaming](/en/docs/agents-and-tools/tool-use/fine-grained-tool-streaming). + + +In this request, we ask Claude to use a tool to tell us the weather. + + + ```bash Shell + curl https://api.anthropic.com/v1/messages \ + -H "content-type: application/json" \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -d '{ + "model": "claude-opus-4-1-20250805", + "max_tokens": 1024, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + } + } + ], + "tool_choice": {"type": "any"}, + "messages": [ + { + "role": "user", + "content": "What is the weather like in San Francisco?" + } + ], + "stream": true + }' + ``` + +```python Python +import anthropic + +client = anthropic.Anthropic() + +tools = [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": ["location"] + } + } +] + +with client.messages.stream( + model="claude-opus-4-1-20250805", + max_tokens=1024, + tools=tools, + tool_choice={"type": "any"}, + messages=[ + { + "role": "user", + "content": "What is the weather like in San Francisco?" + } + ], +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + + + +```json Response +event: message_start +data: {"type":"message_start","message":{"id":"msg_014p7gG3wDgGV9EUtLvnow3U","type":"message","role":"assistant","model":"claude-opus-4-1-20250805","stop_sequence":null,"usage":{"input_tokens":472,"output_tokens":2},"content":[],"stop_reason":null}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: ping +data: {"type": "ping"} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Okay"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" let"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" check"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" the"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" weather"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" for"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" San"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Francisco"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" CA"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":":"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"location\":"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" \"San"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" Francisc"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"o,"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" CA\""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":", "}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"unit\": \"fah"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"renheit\"}"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":1} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":89}} + +event: message_stop +data: {"type":"message_stop"} +``` + +### Streaming request with extended thinking + +In this request, we enable extended thinking with streaming to see Claude's step-by-step reasoning. + + + ```bash Shell + curl https://api.anthropic.com/v1/messages \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-opus-4-1-20250805", + "max_tokens": 20000, + "stream": true, + "thinking": { + "type": "enabled", + "budget_tokens": 16000 + }, + "messages": [ + { + "role": "user", + "content": "What is 27 * 453?" + } + ] + }' + ``` + +```python Python +import anthropic + +client = anthropic.Anthropic() + +with client.messages.stream( + model="claude-opus-4-1-20250805", + max_tokens=20000, + thinking={ + "type": "enabled", + "budget_tokens": 16000 + }, + messages=[ + { + "role": "user", + "content": "What is 27 * 453?" + } + ], +) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "thinking_delta": + print(event.delta.thinking, end="", flush=True) + elif event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + + + +```json Response +event: message_start +data: {"type": "message_start", "message": {"id": "msg_01...", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-1-20250805", "stop_reason": null, "stop_sequence": null}} + +event: content_block_start +data: {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking", "thinking": ""}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n2. 453 = 400 + 50 + 3"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n3. 27 * 400 = 10,800"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n4. 27 * 50 = 1,350"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n5. 27 * 3 = 81"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n6. 10,800 + 1,350 + 81 = 12,231"}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}} + +event: content_block_stop +data: {"type": "content_block_stop", "index": 0} + +event: content_block_start +data: {"type": "content_block_start", "index": 1, "content_block": {"type": "text", "text": ""}} + +event: content_block_delta +data: {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "27 * 453 = 12,231"}} + +event: content_block_stop +data: {"type": "content_block_stop", "index": 1} + +event: message_delta +data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": null}} + +event: message_stop +data: {"type": "message_stop"} +``` + +### Streaming request with web search tool use + +In this request, we ask Claude to search the web for current weather information. + + + ```bash Shell + curl https://api.anthropic.com/v1/messages \ + --header "x-api-key: $ANTHROPIC_API_KEY" \ + --header "anthropic-version: 2023-06-01" \ + --header "content-type: application/json" \ + --data \ + '{ + "model": "claude-opus-4-1-20250805", + "max_tokens": 1024, + "stream": true, + "tools": [ + { + "type": "web_search_20250305", + "name": "web_search", + "max_uses": 5 + } + ], + "messages": [ + { + "role": "user", + "content": "What is the weather like in New York City today?" + } + ] + }' + ``` + +```python Python +import anthropic + +client = anthropic.Anthropic() + +with client.messages.stream( + model="claude-opus-4-1-20250805", + max_tokens=1024, + tools=[ + { + "type": "web_search_20250305", + "name": "web_search", + "max_uses": 5 + } + ], + messages=[ + { + "role": "user", + "content": "What is the weather like in New York City today?" + } + ], +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + + + +```json Response +event: message_start +data: {"type":"message_start","message":{"id":"msg_01G...","type":"message","role":"assistant","model":"claude-opus-4-1-20250805","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2679,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":3}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"I'll check"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" the current weather in New York City for you"}} + +event: ping +data: {"type": "ping"} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"."}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"server_tool_use","id":"srvtoolu_014hJH82Qum7Td6UV8gDXThB","name":"web_search","input":{}}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"query"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\":"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" \"weather"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" NY"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"C to"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"day\"}"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":1 } + +event: content_block_start +data: {"type":"content_block_start","index":2,"content_block":{"type":"web_search_tool_result","tool_use_id":"srvtoolu_014hJH82Qum7Td6UV8gDXThB","content":[{"type":"web_search_result","title":"Weather in New York City in May 2025 (New York) - detailed Weather Forecast for a month","url":"https://world-weather.info/forecast/usa/new_york/may-2025/","encrypted_content":"Ev0DCioIAxgCIiQ3NmU4ZmI4OC1k...","page_age":null},...]}} + +event: content_block_stop +data: {"type":"content_block_stop","index":2} + +event: content_block_start +data: {"type":"content_block_start","index":3,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":3,"delta":{"type":"text_delta","text":"Here's the current weather information for New York"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":3,"delta":{"type":"text_delta","text":" City:\n\n# Weather"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":3,"delta":{"type":"text_delta","text":" in New York City"}} + +event: content_block_delta +data: {"type":"content_block_delta","index":3,"delta":{"type":"text_delta","text":"\n\n"}} + +... + +event: content_block_stop +data: {"type":"content_block_stop","index":17} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":10682,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":510,"server_tool_use":{"web_search_requests":1}}} + +event: message_stop +data: {"type":"message_stop"} +``` + +## Error recovery + +When a streaming request is interrupted due to network issues, timeouts, or other errors, you can recover by resuming from where the stream was interrupted. This approach saves you from re-processing the entire response. + +The basic recovery strategy involves: + +1. **Capture the partial response**: Save all content that was successfully received before the error occurred +2. **Construct a continuation request**: Create a new API request that includes the partial assistant response as the beginning of a new assistant message +3. **Resume streaming**: Continue receiving the rest of the response from where it was interrupted + +### Error recovery best practices + +1. **Use SDK features**: Leverage the SDK's built-in message accumulation and error handling capabilities +2. **Handle content types**: Be aware that messages can contain multiple content blocks (`text`, `tool_use`, `thinking`). Tool use and extended thinking blocks cannot be partially recovered. You can resume streaming from the most recent text block. diff --git a/aiprompts/usechat-backend-design.md b/aiprompts/usechat-backend-design.md new file mode 100644 index 0000000000..f5793718c1 --- /dev/null +++ b/aiprompts/usechat-backend-design.md @@ -0,0 +1,463 @@ +# useChat Compatible Backend Design for Wave Terminal + +## Overview + +This document outlines how to create a `useChat()` compatible backend API using Go and Server-Sent Events (SSE) to replace the current complex RPC-based AI chat system. The goal is to leverage Vercel AI SDK's `useChat()` hook while maintaining all existing AI provider functionality. + +## Current vs Target Architecture + +### Current Architecture +``` +Frontend (React) → Custom RPC → Go Backend → AI Providers +- 10+ Jotai atoms for state management +- Custom WaveAIStreamRequest/WaveAIPacketType +- Complex configuration merging in frontend +- Custom streaming protocol over WebSocket +``` + +### Target Architecture +``` +Frontend (useChat) → HTTP/SSE → Go Backend → AI Providers +- Single useChat() hook manages all state +- Standard HTTP POST + SSE streaming +- Backend-driven configuration resolution +- Standard AI SDK streaming format +``` + +## API Design + +### 1. Endpoint Structure + +**Chat Streaming Endpoint:** +``` +POST /api/ai/chat/{blockId}?preset={presetKey} +``` + +**Conversation Persistence Endpoints:** +``` +POST /api/ai/conversations/{blockId} # Save conversation +GET /api/ai/conversations/{blockId} # Load conversation +``` + +**Why this approach:** +- `blockId`: Identifies the conversation context (existing Wave concept) +- `preset`: URL parameter for AI configuration preset +- **Separate persistence**: Clean separation of streaming vs storage +- **Fast localhost calls**: Frontend can call both endpoints quickly +- **Simple backend**: Each endpoint has single responsibility + +### 2. Request Format & Message Flow + +**Simplified Approach:** +- Frontend manages **entire conversation state** (like all modern chat apps) +- Frontend sends **complete message history** with each request +- Backend just processes the messages and streams response +- Frontend handles persistence via existing Wave file system + +**Standard useChat() Request:** +```json +{ + "messages": [ + { + "id": "msg-1", + "role": "user", + "content": "Hello world" + }, + { + "id": "msg-2", + "role": "assistant", + "content": "Hi there!" + }, + { + "id": "msg-3", + "role": "user", + "content": "How are you?" // <- NEW message user just typed + } + ] +} +``` + +**Backend Processing:** +1. **Receive complete conversation** from frontend +2. **Resolve AI configuration** (preset, model, etc.) +3. **Send messages directly** to AI provider +4. **Stream response** back to frontend +5. **Frontend calls separate persistence endpoint** when needed + +**Optional Extensions:** +```json +{ + "messages": [...], + "options": { + "temperature": 0.7, + "maxTokens": 1000, + "model": "gpt-4" // Override preset model + } +} +``` + +### 3. Configuration Resolution + +**Priority Order (backend resolves):** +1. **Request options** (highest priority) +2. **URL preset parameter** +3. **Block metadata** (`block.meta["ai:preset"]`) +4. **Global settings** (`settings["ai:preset"]`) +5. **Default preset** (lowest priority) + +**Backend Logic:** +```go +func resolveAIConfig(blockId, presetKey string, requestOptions map[string]any) (*WaveAIOptsType, error) { + // 1. Load block metadata + block := getBlock(blockId) + blockPreset := block.Meta["ai:preset"] + + // 2. Load global settings + settings := getGlobalSettings() + globalPreset := settings["ai:preset"] + + // 3. Resolve preset hierarchy + finalPreset := presetKey + if finalPreset == "" { + finalPreset = blockPreset + } + if finalPreset == "" { + finalPreset = globalPreset + } + if finalPreset == "" { + finalPreset = "default" + } + + // 4. Load and merge preset config + presetConfig := loadPreset(finalPreset) + + // 5. Apply request overrides + return mergeAIConfig(presetConfig, requestOptions), nil +} +``` + +### 4. Response Format (SSE) + +**Key Insight: Minimal Conversion** +Most AI providers (OpenAI, Anthropic) already return SSE streams. Instead of converting to our custom format and back, we can **proxy/transform** their streams directly to useChat format. + +**Headers:** +``` +Content-Type: text/event-stream +Cache-Control: no-cache +Connection: keep-alive +Access-Control-Allow-Origin: * +``` + +**useChat Expected Format:** +``` +data: {"type":"text","text":"Hello"} + +data: {"type":"text","text":" world"} + +data: {"type":"text","text":"!"} + +data: {"type":"finish","finish_reason":"stop","usage":{"prompt_tokens":10,"completion_tokens":3,"total_tokens":13}} + +data: [DONE] +``` + +**Provider Stream Transformation:** +- **OpenAI**: Already SSE → direct proxy (no conversion needed) +- **Anthropic**: Already SSE → direct proxy (minimal field mapping) +- **Google**: Already streaming → direct proxy +- **Perplexity**: OpenAI-compatible → direct proxy +- **Wave Cloud**: WebSocket → **requires conversion** (only one needing transformation) + +**Error Format:** +``` +data: {"type":"error","error":"API key invalid"} + +data: [DONE] +``` + +## Implementation Plan + +### Phase 1: HTTP Handler + +```go +// Simplified approach: Direct provider streaming with minimal transformation +func (s *WshServer) HandleAIChat(w http.ResponseWriter, r *http.Request) { + // 1. Parse URL parameters + blockId := mux.Vars(r)["blockId"] + presetKey := r.URL.Query().Get("preset") + + // 2. Parse request body + var req struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + Options map[string]any `json:"options,omitempty"` + } + json.NewDecoder(r.Body).Decode(&req) + + // 3. Resolve configuration + aiOpts, err := resolveAIConfig(blockId, presetKey, req.Options) + if err != nil { + http.Error(w, err.Error(), 400) + return + } + + // 4. Set SSE headers + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // 5. Route to provider and stream directly + switch aiOpts.APIType { + case "openai", "perplexity": + // Direct proxy - these are already SSE compatible + streamDirectSSE(w, r.Context(), aiOpts, req.Messages) + case "anthropic": + // Direct proxy with minimal field mapping + streamAnthropicSSE(w, r.Context(), aiOpts, req.Messages) + case "google": + // Direct proxy + streamGoogleSSE(w, r.Context(), aiOpts, req.Messages) + default: + // Wave Cloud - only one requiring conversion (WebSocket → SSE) + if isCloudAIRequest(aiOpts) { + streamWaveCloudToUseChat(w, r.Context(), aiOpts, req.Messages) + } else { + http.Error(w, "Unsupported provider", 400) + } + } +} + +// Example: Direct OpenAI streaming (minimal conversion) +func streamOpenAIToUseChat(w http.ResponseWriter, ctx context.Context, opts *WaveAIOptsType, messages []Message) { + client := openai.NewClient(opts.APIToken) + + stream, err := client.CreateChatCompletionStream(ctx, openai.ChatCompletionRequest{ + Model: opts.Model, + Messages: convertToOpenAIMessages(messages), + Stream: true, + }) + if err != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", err.Error()) + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + defer stream.Close() + + for { + response, err := stream.Recv() + if errors.Is(err, io.EOF) { + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + if err != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", err.Error()) + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + + // Direct transformation: OpenAI format → useChat format + for _, choice := range response.Choices { + if choice.Delta.Content != "" { + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", choice.Delta.Content) + } + if choice.FinishReason != "" { + fmt.Fprintf(w, "data: {\"type\":\"finish\",\"finish_reason\":%q}\n\n", choice.FinishReason) + } + } + + w.(http.Flusher).Flush() + } +} + +// Wave Cloud conversion (only provider needing transformation) +func streamWaveCloudToUseChat(w http.ResponseWriter, ctx context.Context, opts *WaveAIOptsType, messages []Message) { + // Use existing Wave Cloud WebSocket logic + waveReq := wshrpc.WaveAIStreamRequest{ + Opts: opts, + Prompt: convertMessagesToPrompt(messages), + } + + stream := waveai.RunAICommand(ctx, waveReq) // Returns WebSocket stream + + // Convert Wave Cloud packets to useChat SSE format + for packet := range stream { + if packet.Error != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", packet.Error.Error()) + break + } + + resp := packet.Response + if resp.Text != "" { + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", resp.Text) + } + if resp.FinishReason != "" { + usage := "" + if resp.Usage != nil { + usage = fmt.Sprintf(",\"usage\":{\"prompt_tokens\":%d,\"completion_tokens\":%d,\"total_tokens\":%d}", + resp.Usage.PromptTokens, resp.Usage.CompletionTokens, resp.Usage.TotalTokens) + } + fmt.Fprintf(w, "data: {\"type\":\"finish\",\"finish_reason\":%q%s}\n\n", resp.FinishReason, usage) + } + + w.(http.Flusher).Flush() + } + + fmt.Fprintf(w, "data: [DONE]\n\n") +} +``` + +### Phase 2: Frontend Integration + +```typescript +import { useChat } from '@ai-sdk/react'; + +function WaveAI({ blockId }: { blockId: string }) { + // Get current preset from block metadata or settings + const preset = useAtomValue(currentPresetAtom); + + const { messages, input, handleInputChange, handleSubmit, isLoading, error } = useChat({ + api: `/api/ai/chat/${blockId}?preset=${preset}`, + initialMessages: [], // Load from existing aidata file + onFinish: (message) => { + // Save conversation to aidata file + saveConversation(blockId, messages); + } + }); + + return ( +
+
+ {messages.map(message => ( +
+ +
+ ))} + {isLoading && } + {error &&
{error.message}
} +
+ +
+ +
+
+ ); +} +``` + +### Phase 3: Advanced Features + +#### Multi-modal Support +```typescript +// useChat supports multi-modal out of the box +const { messages, append } = useChat({ + api: `/api/ai/chat/${blockId}`, +}); + +// Send image + text +await append({ + role: 'user', + content: [ + { type: 'text', text: 'What do you see in this image?' }, + { type: 'image', image: imageFile } + ] +}); +``` + +#### Thinking Models +```go +// Backend detects thinking models and formats appropriately +if isThinkingModel(aiOpts.Model) { + // Send thinking content separately + fmt.Fprintf(w, "data: {\"type\":\"thinking\",\"text\":%q}\n\n", thinkingText) + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", responseText) +} +``` + +#### Context Injection +```typescript +// Add system messages or context via useChat options +const { messages, append } = useChat({ + api: `/api/ai/chat/${blockId}`, + initialMessages: [ + { + role: 'system', + content: 'You are a helpful terminal assistant...' + } + ] +}); +``` + +## Migration Strategy + +### 1. Parallel Implementation +- Keep existing RPC system running +- Add new HTTP/SSE endpoint alongside +- Feature flag to switch between systems + +### 2. Gradual Migration +- Start with new blocks using useChat +- Migrate existing conversations on first interaction +- Remove RPC system once stable + +### 3. Backward Compatibility +- Existing aidata files work unchanged +- Same provider backends (OpenAI, Anthropic, etc.) +- Same configuration system + +## Benefits + +### Complexity Reduction +- **Frontend**: ~900 lines → ~100 lines (90% reduction) +- **State Management**: 10+ atoms → 1 useChat hook +- **Configuration**: Frontend merging → Backend resolution +- **Streaming**: Custom protocol → Standard SSE + +### Modern Features +- **Multi-modal**: Images, files, audio support +- **Thinking Models**: Built-in reasoning trace support +- **Conversation Management**: Edit, retry, branch conversations +- **Error Handling**: Automatic retry and error boundaries +- **Performance**: Optimized streaming and batching + +### Developer Experience +- **Type Safety**: Full TypeScript support +- **Testing**: Standard HTTP endpoints easier to test +- **Debugging**: Standard browser dev tools work +- **Documentation**: Leverage AI SDK docs and community + +## Configuration Examples + +### URL-based Configuration +``` +POST /api/ai/chat/block-123?preset=claude-coding +POST /api/ai/chat/block-456?preset=gpt4-creative +``` + +### Header-based Overrides +``` +POST /api/ai/chat/block-123 +X-AI-Model: gpt-4-turbo +X-AI-Temperature: 0.8 +``` + +### Request Body Options +```json +{ + "messages": [...], + "options": { + "model": "claude-3-sonnet", + "temperature": 0.7, + "maxTokens": 2000 + } +} +``` + +This design maintains all existing functionality while dramatically simplifying the implementation and adding modern AI chat capabilities. \ No newline at end of file diff --git a/cmd/testai/main-testai.go b/cmd/testai/main-testai.go new file mode 100644 index 0000000000..74af5889d5 --- /dev/null +++ b/cmd/testai/main-testai.go @@ -0,0 +1,226 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + _ "embed" + "encoding/json" + "flag" + "fmt" + "log" + "net/http" + "os" + "time" + + "github.com/wavetermdev/waveterm/pkg/waveai" + "github.com/wavetermdev/waveterm/pkg/wshrpc" +) + +//go:embed testschema.json +var testSchemaJSON string + +// TestResponseWriter implements http.ResponseWriter and additional interfaces for testing +type TestResponseWriter struct { + header http.Header +} + +func (w *TestResponseWriter) Header() http.Header { + if w.header == nil { + w.header = make(http.Header) + } + return w.header +} + +func (w *TestResponseWriter) Write(data []byte) (int, error) { + fmt.Printf("SSE: %s", string(data)) + return len(data), nil +} + +func (w *TestResponseWriter) WriteHeader(statusCode int) { + fmt.Printf("Status: %d\n", statusCode) +} + +// Implement http.Flusher interface +func (w *TestResponseWriter) Flush() { + // No-op for testing +} + +// Implement interfaces needed by http.ResponseController +func (w *TestResponseWriter) SetWriteDeadline(deadline time.Time) error { + // No-op for testing + return nil +} + +func (w *TestResponseWriter) SetReadDeadline(deadline time.Time) error { + // No-op for testing + return nil +} + +func getToolDefinitions() []waveai.ToolDefinition { + var schemas map[string]any + if err := json.Unmarshal([]byte(testSchemaJSON), &schemas); err != nil { + log.Printf("Error parsing schema: %v\n", err) + return nil + } + + var configSchema map[string]any + if rawSchema, ok := schemas["config"]; ok && rawSchema != nil { + if schema, ok := rawSchema.(map[string]any); ok { + configSchema = schema + } + } + if configSchema == nil { + configSchema = map[string]any{"type": "object"} + } + + return []waveai.ToolDefinition{ + { + Name: "get_config", + Description: "Get the current GitHub Actions Monitor configuration settings including repository, workflow, polling interval, and max workflow runs", + InputSchema: map[string]any{ + "type": "object", + }, + }, + { + Name: "update_config", + Description: "Update GitHub Actions Monitor configuration settings", + InputSchema: configSchema, + }, + { + Name: "get_data", + Description: "Get the current GitHub Actions workflow run data including workflow runs, loading state, and errors", + InputSchema: map[string]any{ + "type": "object", + }, + }, + } +} + +func testOpenAI(ctx context.Context, model, message string, tools []waveai.ToolDefinition) { + apiKey := os.Getenv("OPENAI_API_KEY") + if apiKey == "" { + fmt.Println("Error: OPENAI_API_KEY environment variable not set") + os.Exit(1) + } + + opts := &wshrpc.WaveAIOptsType{ + APIToken: apiKey, + Model: model, + MaxTokens: 1000, + } + + messages := []waveai.UseChatMessage{ + { + Role: "user", + Content: message, + }, + } + + fmt.Printf("Testing OpenAI streaming with model: %s\n", model) + fmt.Printf("Message: %s\n", message) + fmt.Println("---") + + testWriter := &TestResponseWriter{} + sseHandler := waveai.MakeSSEHandlerCh(testWriter, ctx) + + err := sseHandler.SetupSSE() + if err != nil { + fmt.Printf("Error setting up SSE: %v\n", err) + return + } + defer sseHandler.Close() + + stopReason, err := waveai.StreamOpenAIToUseChat(ctx, sseHandler, opts, messages, tools) + if err != nil { + fmt.Printf("OpenAI streaming error: %v\n", err) + } + if stopReason != nil { + fmt.Printf("Stop reason: %+v\n", stopReason) + } +} + +func testAnthropic(ctx context.Context, model, message string, tools []waveai.ToolDefinition) { + apiKey := os.Getenv("ANTHROPIC_API_KEY") + if apiKey == "" { + fmt.Println("Error: ANTHROPIC_API_KEY environment variable not set") + os.Exit(1) + } + + opts := &wshrpc.WaveAIOptsType{ + APIToken: apiKey, + Model: model, + MaxTokens: 1000, + } + + messages := []waveai.UseChatMessage{ + { + Role: "user", + Content: message, + }, + } + + fmt.Printf("Testing Anthropic streaming with model: %s\n", model) + fmt.Printf("Message: %s\n", message) + fmt.Println("---") + + testWriter := &TestResponseWriter{} + sseHandler := waveai.MakeSSEHandlerCh(testWriter, ctx) + + err := sseHandler.SetupSSE() + if err != nil { + fmt.Printf("Error setting up SSE: %v\n", err) + return + } + defer sseHandler.Close() + + stopReason, err := waveai.StreamAnthropicResponses(ctx, sseHandler, opts, messages, tools) + if err != nil { + fmt.Printf("Anthropic streaming error: %v\n", err) + } + if stopReason != nil { + fmt.Printf("Stop reason: %+v\n", stopReason) + } +} + +func main() { + var anthropic, tools bool + flag.BoolVar(&anthropic, "anthropic", false, "Use Anthropic API instead of OpenAI") + flag.BoolVar(&tools, "tools", false, "Enable GitHub Actions Monitor tools for testing") + flag.Parse() + + args := flag.Args() + if len(args) < 1 { + fmt.Println("Usage: go run main-testai.go [--anthropic] [--tools] [message]") + fmt.Println("Examples:") + fmt.Println(" go run main-testai.go o4-mini 'What is 2+2?'") + fmt.Println(" go run main-testai.go --anthropic claude-3-5-sonnet-20241022 'What is 2+2?'") + fmt.Println(" go run main-testai.go --tools o4-mini 'Help me configure GitHub Actions monitoring'") + fmt.Println("") + fmt.Println("Environment variables:") + fmt.Println(" OPENAI_API_KEY (for OpenAI models)") + fmt.Println(" ANTHROPIC_API_KEY (for Anthropic models)") + os.Exit(1) + } + + model := args[0] + message := "What is 2+2?" + if len(args) > 1 { + message = args[1] + } + + var toolDefs []waveai.ToolDefinition + if tools { + toolDefs = getToolDefinitions() + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + if anthropic { + testAnthropic(ctx, model, message, toolDefs) + } else { + testOpenAI(ctx, model, message, toolDefs) + } +} diff --git a/cmd/testai/testschema.json b/cmd/testai/testschema.json new file mode 100644 index 0000000000..dc9de2b834 --- /dev/null +++ b/cmd/testai/testschema.json @@ -0,0 +1,104 @@ +{ + "config": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "description": "Application configuration settings", + "properties": { + "maxWorkflowRuns": { + "description": "Maximum number of workflow runs to fetch", + "maximum": 100, + "minimum": 1, + "type": "integer" + }, + "pollInterval": { + "description": "Polling interval for GitHub API requests", + "maximum": 300, + "minimum": 1, + "type": "integer", + "units": "s" + }, + "repository": { + "description": "GitHub repository in owner/repo format", + "pattern": "^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$", + "type": "string" + }, + "workflow": { + "description": "GitHub Actions workflow file name", + "pattern": "^.+\\.(yml|yaml)$", + "type": "string" + } + }, + "title": "Application Configuration", + "type": "object" + }, + "data": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "definitions": { + "WorkflowRun": { + "properties": { + "conclusion": { + "type": "string" + }, + "created_at": { + "format": "date-time", + "type": "string" + }, + "html_url": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "run_number": { + "type": "integer" + }, + "status": { + "type": "string" + }, + "updated_at": { + "format": "date-time", + "type": "string" + } + }, + "required": [ + "id", + "name", + "status", + "conclusion", + "created_at", + "updated_at", + "html_url", + "run_number" + ], + "type": "object" + } + }, + "description": "Application data schema", + "properties": { + "isLoading": { + "description": "Loading state for workflow data fetch", + "type": "boolean" + }, + "lastError": { + "description": "Last error message from GitHub API", + "type": "string" + }, + "lastRefreshTime": { + "description": "Timestamp of last successful data refresh", + "format": "date-time", + "type": "string" + }, + "workflowRuns": { + "description": "List of GitHub Actions workflow runs", + "items": { + "$ref": "#/definitions/WorkflowRun" + }, + "type": "array" + } + }, + "title": "Application Data", + "type": "object" + } +} diff --git a/go.mod b/go.mod index 2d6c621725..08f84a41ff 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,6 @@ require ( github.com/golang-migrate/migrate/v4 v4.19.0 github.com/google/generative-ai-go v0.20.1 github.com/google/uuid v1.6.0 - github.com/gorilla/handlers v1.5.2 github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.3 github.com/invopop/jsonschema v0.13.0 @@ -24,6 +23,7 @@ require ( github.com/kevinburke/ssh_config v1.2.0 github.com/mattn/go-sqlite3 v1.14.32 github.com/mitchellh/mapstructure v1.5.0 + github.com/openai/openai-go/v2 v2.1.1 github.com/sashabaranov/go-openai v1.41.1 github.com/sawka/txwrap v0.2.0 github.com/shirou/gopsutil/v4 v4.25.8 @@ -75,6 +75,7 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/launchdarkly/eventsource v1.10.0 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -82,6 +83,10 @@ require ( github.com/rivo/uniseg v0.4.7 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.9 // indirect + github.com/tidwall/gjson v1.14.4 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/numcpus v0.10.0 // indirect github.com/ubuntu/decorate v0.0.0-20230125165522-2d5b0a9bb117 // indirect diff --git a/go.sum b/go.sum index 586c011226..3ea41b5b19 100644 --- a/go.sum +++ b/go.sum @@ -96,8 +96,6 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= -github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= -github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= @@ -120,6 +118,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/launchdarkly/eventsource v1.10.0 h1:H9Tp6AfGu/G2qzBJC26iperrvwhzdbiA/gx7qE2nDFI= +github.com/launchdarkly/eventsource v1.10.0/go.mod h1:J3oa50bPvJesZqNAJtb5btSIo5N6roDWhiAS3IpsKck= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= @@ -133,6 +133,8 @@ github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuE github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/openai/openai-go/v2 v2.1.1 h1:/RMA/V3D+yF/Cc4jHXFt6lkqSOWRf5roRi+DvZaDYQI= +github.com/openai/openai-go/v2 v2.1.1/go.mod h1:sIUkR+Cu/PMUVkSKhkk742PRURkQOCFhiwJ7eRSBqmk= github.com/photostorm/pty v1.1.19-0.20230903182454-31354506054b h1:cLGKfKb1uk0hxI0Q8L83UAJPpeJ+gSpn3cCU/tjd3eg= github.com/photostorm/pty v1.1.19-0.20230903182454-31354506054b/go.mod h1:KO+FcPtyLAiRC0hJwreJVvfwc7vnNz77UxBTIGHdPVk= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -165,6 +167,16 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= +github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/tklauser/go-sysconf v0.3.15 h1:VE89k0criAymJ/Os65CSn1IXaol+1wrsFHEB8Ol49K4= github.com/tklauser/go-sysconf v0.3.15/go.mod h1:Dmjwr6tYFIseJw7a3dRLJfsHAMXZ3nEnL/aZY+0IuI4= github.com/tklauser/numcpus v0.10.0 h1:18njr6LDBk1zuna922MgdjQuJFjrdppsZG60sHGfjso= diff --git a/pkg/waveai/googlebackend.go b/pkg/waveai/googlebackend.go index eeca86410d..9282bc5f87 100644 --- a/pkg/waveai/googlebackend.go +++ b/pkg/waveai/googlebackend.go @@ -1,3 +1,6 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + package waveai import ( diff --git a/pkg/waveai/ssehandlerch.go b/pkg/waveai/ssehandlerch.go new file mode 100644 index 0000000000..413201fd06 --- /dev/null +++ b/pkg/waveai/ssehandlerch.go @@ -0,0 +1,438 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package waveai + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "sync" + "time" +) + +// see /aiprompts/usechat-streamingproto.md for protocol + +const ( + SSEContentType = "text/event-stream" + SSECacheControl = "no-cache" + SSEConnection = "keep-alive" + SSEKeepaliveMsg = ": keepalive\n\n" + SSEStreamStartMsg = ": stream-start\n\n" + SSEKeepaliveInterval = 1 * time.Second +) + +// SSEMessageType represents the type of message to write +type SSEMessageType string + +const ( + SSEMsgData SSEMessageType = "data" + SSEMsgEvent SSEMessageType = "event" + SSEMsgComment SSEMessageType = "comment" + SSEMsgError SSEMessageType = "error" +) + +// AI message type constants +const ( + AiMsgStart = "start" + AiMsgTextStart = "text-start" + AiMsgTextDelta = "text-delta" + AiMsgTextEnd = "text-end" + AiMsgReasoningStart = "reasoning-start" + AiMsgReasoningDelta = "reasoning-delta" + AiMsgReasoningEnd = "reasoning-end" + AiMsgToolInputStart = "tool-input-start" + AiMsgToolInputDelta = "tool-input-delta" + AiMsgToolInputAvailable = "tool-input-available" + AiMsgToolOutputAvailable = "tool-output-available" // not used here, but reserved + AiMsgStartStep = "start-step" + AiMsgFinishStep = "finish-step" + AiMsgFinish = "finish" + AiMsgError = "error" +) + +// SSEMessage represents a message to be written to the SSE stream +type SSEMessage struct { + Type SSEMessageType + Data string + EventType string // Only used for SSEMsgEvent +} + +// SSEHandlerCh provides channel-based Server-Sent Events functionality +type SSEHandlerCh struct { + w http.ResponseWriter + rc *http.ResponseController + ctx context.Context + writeCh chan SSEMessage + errCh chan error + + mu sync.RWMutex + closed bool + err error + + wg sync.WaitGroup +} + +// MakeSSEHandlerCh creates a new channel-based SSE handler +func MakeSSEHandlerCh(w http.ResponseWriter, ctx context.Context) *SSEHandlerCh { + return &SSEHandlerCh{ + w: w, + rc: http.NewResponseController(w), + ctx: ctx, + writeCh: make(chan SSEMessage, 10), // Buffered to prevent blocking + errCh: make(chan error, 1), // Buffered for single error + } +} + +// SetupSSE configures the response headers and starts the writer goroutine +func (h *SSEHandlerCh) SetupSSE() error { + h.mu.Lock() + defer h.mu.Unlock() + + if h.closed { + return fmt.Errorf("SSE handler is closed") + } + + // Reset write deadline for streaming + if err := h.rc.SetWriteDeadline(time.Time{}); err != nil { + return fmt.Errorf("failed to reset write deadline: %v", err) + } + + // Set SSE headers + h.w.Header().Set("Content-Type", SSEContentType) + h.w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate, no-transform") + h.w.Header().Set("Connection", SSEConnection) + h.w.Header().Set("x-vercel-ai-ui-message-stream", "v1") + h.w.Header().Set("X-Accel-Buffering", "no") + + // Send headers and establish streaming + h.w.WriteHeader(http.StatusOK) + fmt.Fprint(h.w, SSEStreamStartMsg) + if err := h.flush(); err != nil { + return err + } + + // Start the writer goroutine + h.wg.Add(1) + go h.writerLoop() + + return nil +} + +// writerLoop handles all writes and keepalives in a single goroutine +func (h *SSEHandlerCh) writerLoop() { + defer h.wg.Done() + + keepaliveTicker := time.NewTicker(SSEKeepaliveInterval) + defer keepaliveTicker.Stop() + + for { + select { + case msg, ok := <-h.writeCh: + if !ok { + // Channel closed, send [DONE] and exit + h.writeDirectly("[DONE]", SSEMsgData) + return + } + + if err := h.writeMessage(msg); err != nil { + h.setError(err) + return + } + + case <-keepaliveTicker.C: + if err := h.writeDirectly("keepalive", SSEMsgComment); err != nil { + h.setError(err) + return + } + + case <-h.ctx.Done(): + return + } + } +} + +// writeMessage writes a message to the SSE stream +func (h *SSEHandlerCh) writeMessage(msg SSEMessage) error { + switch msg.Type { + case SSEMsgData: + return h.writeDirectly(msg.Data, SSEMsgData) + case SSEMsgEvent: + return h.writeEvent(msg.EventType, msg.Data) + case SSEMsgComment: + return h.writeDirectly(msg.Data, SSEMsgComment) + case SSEMsgError: + return h.writeDirectly(msg.Data, SSEMsgData) + default: + return fmt.Errorf("unknown message type: %s", msg.Type) + } +} + +// writeDirectly writes data directly to the response writer +func (h *SSEHandlerCh) writeDirectly(data string, msgType SSEMessageType) error { + switch msgType { + case SSEMsgData: + _, err := fmt.Fprintf(h.w, "data: %s\n\n", data) + if err != nil { + return err + } + case SSEMsgComment: + _, err := fmt.Fprintf(h.w, ": %s\n\n", data) + if err != nil { + return err + } + default: + return fmt.Errorf("unsupported direct write type: %s", msgType) + } + return h.flush() +} + +// writeEvent writes an SSE event with optional event type +func (h *SSEHandlerCh) writeEvent(eventType, data string) error { + if eventType != "" { + if _, err := fmt.Fprintf(h.w, "event: %s\n", eventType); err != nil { + return err + } + } + if _, err := fmt.Fprintf(h.w, "data: %s\n\n", data); err != nil { + return err + } + return h.flush() +} + +// flush attempts to flush the response writer +func (h *SSEHandlerCh) flush() error { + return h.rc.Flush() +} + +// setError sets the error state thread-safely +func (h *SSEHandlerCh) setError(err error) { + h.mu.Lock() + defer h.mu.Unlock() + + if h.err == nil { + h.err = err + // Send error to error channel if there's space + select { + case h.errCh <- err: + default: + } + } +} + +// WriteData queues data to be written in SSE format +func (h *SSEHandlerCh) WriteData(data string) error { + h.mu.RLock() + closed := h.closed + h.mu.RUnlock() + + if closed { + return fmt.Errorf("SSE handler is closed") + } + + select { + case h.writeCh <- SSEMessage{Type: SSEMsgData, Data: data}: + return nil + case <-h.ctx.Done(): + return h.ctx.Err() + default: + return fmt.Errorf("write channel is full") + } +} + +// WriteJsonData marshals data to JSON and queues it for writing +func (h *SSEHandlerCh) WriteJsonData(data interface{}) error { + jsonData, err := json.Marshal(data) + if err != nil { + return fmt.Errorf("failed to marshal JSON: %v", err) + } + return h.WriteData(string(jsonData)) +} + +// WriteError queues an error message and closes the handler +func (h *SSEHandlerCh) WriteError(errorMsg string) error { + errorResp := map[string]interface{}{ + "type": AiMsgError, + "errorText": errorMsg, + } + if err := h.WriteJsonData(errorResp); err != nil { + return err + } + h.Close() + return nil +} + +// WriteEvent queues an SSE event with optional event type +func (h *SSEHandlerCh) WriteEvent(eventType, data string) error { + h.mu.RLock() + closed := h.closed + h.mu.RUnlock() + + if closed { + return fmt.Errorf("SSE handler is closed") + } + + select { + case h.writeCh <- SSEMessage{Type: SSEMsgEvent, Data: data, EventType: eventType}: + return nil + case <-h.ctx.Done(): + return h.ctx.Err() + default: + return fmt.Errorf("write channel is full") + } +} + +// WriteComment queues an SSE comment +func (h *SSEHandlerCh) WriteComment(comment string) error { + h.mu.RLock() + closed := h.closed + h.mu.RUnlock() + + if closed { + return fmt.Errorf("SSE handler is closed") + } + + select { + case h.writeCh <- SSEMessage{Type: SSEMsgComment, Data: comment}: + return nil + case <-h.ctx.Done(): + return h.ctx.Err() + default: + return fmt.Errorf("write channel is full") + } +} + +// Err returns any error that occurred during writing +func (h *SSEHandlerCh) Err() error { + h.mu.RLock() + defer h.mu.RUnlock() + return h.err +} + +// Close closes the write channel, sends [DONE], and cleans up resources +func (h *SSEHandlerCh) Close() { + h.mu.Lock() + if h.closed { + h.mu.Unlock() + return + } + h.closed = true + + // Close the write channel, which will trigger [DONE] in writerLoop + close(h.writeCh) + h.mu.Unlock() + + // Wait for writer goroutine to finish (without holding the lock) + h.wg.Wait() +} + +// AI message writing methods + +func (h *SSEHandlerCh) AiMsgStart(messageId string) error { + resp := map[string]interface{}{ + "type": AiMsgStart, + "messageId": messageId, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgTextStart(textId string) error { + resp := map[string]interface{}{ + "type": AiMsgTextStart, + "id": textId, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgTextDelta(textId string, text string) error { + resp := map[string]interface{}{ + "type": AiMsgTextDelta, + "id": textId, + "delta": text, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgTextEnd(textId string) error { + resp := map[string]interface{}{ + "type": AiMsgTextEnd, + "id": textId, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgFinish(finishReason string, usage interface{}) error { + resp := map[string]interface{}{ + "type": AiMsgFinish, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgReasoningStart(reasoningId string) error { + resp := map[string]interface{}{ + "type": AiMsgReasoningStart, + "id": reasoningId, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgReasoningDelta(reasoningId string, reasoning string) error { + resp := map[string]interface{}{ + "type": AiMsgReasoningDelta, + "id": reasoningId, + "delta": reasoning, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgReasoningEnd(reasoningId string) error { + resp := map[string]interface{}{ + "type": AiMsgReasoningEnd, + "id": reasoningId, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgToolInputStart(toolCallId, toolName string) error { + resp := map[string]interface{}{ + "type": AiMsgToolInputStart, + "toolCallId": toolCallId, + "toolName": toolName, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgToolInputDelta(toolCallId, inputTextDelta string) error { + resp := map[string]interface{}{ + "type": AiMsgToolInputDelta, + "toolCallId": toolCallId, + "inputTextDelta": inputTextDelta, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgToolInputAvailable(toolCallId, toolName string, input json.RawMessage) error { + resp := map[string]interface{}{ + "type": AiMsgToolInputAvailable, + "toolCallId": toolCallId, + "toolName": toolName, + "input": json.RawMessage(input), + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgFinishStep() error { + resp := map[string]interface{}{ + "type": AiMsgFinishStep, + } + return h.WriteJsonData(resp) +} + +func (h *SSEHandlerCh) AiMsgError(errText string) error { + resp := map[string]interface{}{ + "type": AiMsgError, + "errorText": errText, + } + return h.WriteJsonData(resp) +} diff --git a/pkg/waveai/usechat-anthropic.go b/pkg/waveai/usechat-anthropic.go new file mode 100644 index 0000000000..d2f11ce76d --- /dev/null +++ b/pkg/waveai/usechat-anthropic.go @@ -0,0 +1,627 @@ +// Package anthropicadapter streams Anthropic Messages API events and adapts them +// to our AI‑SDK style SSE parts. Mapping is based on the AI‑SDK data stream +// protocol (start/text-start/text-delta/text-end, reasoning-*, tool-input-*, finish, finish-step) :contentReference[oaicite:0]{index=0} +// and Anthropic's Messages + Streaming event schemas (message_start, +// content_block_start/delta/stop, message_delta, message_stop, error). :contentReference[oaicite:1]{index=1} :contentReference[oaicite:2]{index=2} +// +// NOTE: The public signature in api.txt references wshrpc.WaveAIOptsType; +// for this self-contained package we define WaveAIOptsType locally with the +// same shape. Adapt the import/alias as needed in your codebase. :contentReference[oaicite:3]{index=3} +package waveai + +import ( + "bytes" + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/launchdarkly/eventsource" + "github.com/wavetermdev/waveterm/pkg/wshrpc" +) + +type StopReasonKind string + +const ( + StopKindDone StopReasonKind = "done" + StopKindToolUse StopReasonKind = "tool_use" + StopKindMaxTokens StopReasonKind = "max_tokens" + StopKindContent StopReasonKind = "content_filter" + StopKindCanceled StopReasonKind = "canceled" + StopKindError StopReasonKind = "error" +) + +const ( + AnthropicDefaultBaseURL = "https://api.anthropic.com" + AnthropicDefaultAPIVersion = "2023-06-01" + AnthropicDefaultMaxTokens = 1024 +) + +// ---------- Anthropic wire types (subset) ---------- +// Derived from anthropic-messages-api.md and anthropic-streaming.md. :contentReference[oaicite:6]{index=6} :contentReference[oaicite:7]{index=7} + +type anthropicInputMessage struct { + Role string `json:"role"` + Content json.RawMessage `json:"content"` // string or []blocks +} + +type anthropicStreamRequest struct { + Model string `json:"model"` + Messages []anthropicInputMessage `json:"messages"` + MaxTokens int `json:"max_tokens"` + Stream bool `json:"stream"` + System any `json:"system,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + Tools []ToolDefinition `json:"tools,omitempty"` + Thinking any `json:"thinking,omitempty"` +} + +type anthropicMessageObj struct { + ID string `json:"id"` + Model string `json:"model"` + StopReason *string `json:"stop_reason"` + StopSequence *string `json:"stop_sequence"` +} + +type anthropicContentBlockType struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + Thinking string `json:"thinking,omitempty"` + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Input json.RawMessage `json:"input,omitempty"` +} + +type anthropicDeltaType struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` // text_delta.text + Thinking string `json:"thinking,omitempty"` // thinking_delta.thinking + PartialJSON string `json:"partial_json,omitempty"` + Signature string `json:"signature,omitempty"` + StopReason *string `json:"stop_reason,omitempty"` // message_delta.delta.stop_reason + StopSeq *string `json:"stop_sequence,omitempty"` // message_delta.delta.stop_sequence +} + +type anthropicUsageType struct { + OutputTokens int `json:"output_tokens,omitempty"` // cumulative +} + +type anthropicErrorType struct { + Type string `json:"type"` + Message string `json:"message"` +} + +type anthropicFullStreamEvent struct { + Type string `json:"type"` + Message *anthropicMessageObj `json:"message,omitempty"` + Index *int `json:"index,omitempty"` + ContentBlock *anthropicContentBlockType `json:"content_block,omitempty"` + Delta *anthropicDeltaType `json:"delta,omitempty"` + Usage *anthropicUsageType `json:"usage,omitempty"` + Error *anthropicErrorType `json:"error,omitempty"` +} + +// ---------- per-index content block bookkeeping ---------- +type blockKind int + +const ( + blockText blockKind = iota + blockThinking + blockToolUse +) + +type blockState struct { + kind blockKind + // For text/reasoning: local SSE id + localID string + // For tool_use: + toolCallID string // Anthropic tool_use.id + toolName string + accumJSON *partialJSON // accumulator for input_json_delta +} + +// partialJSON is a minimal, allocation-friendly accumulator for Anthropic +// input_json_delta (concat, then parse once on content_block_stop). :contentReference[oaicite:8]{index=8} +type partialJSON struct { + buf bytes.Buffer +} + +func (p *partialJSON) Write(s string) { + // The stream may send empty "" chunks; ignore if zero-length + if s == "" { + return + } + p.buf.WriteString(s) +} + +func (p *partialJSON) Bytes() []byte { return p.buf.Bytes() } + +func (p *partialJSON) FinalObject() (json.RawMessage, error) { + raw := p.buf.Bytes() + // If empty, treat as "{}" + if len(bytes.TrimSpace(raw)) == 0 { + return json.RawMessage(`{}`), nil + } + // The accumulated content should be a valid JSON object string; parse it. + var v interface{} + if err := json.Unmarshal(raw, &v); err != nil { + return nil, fmt.Errorf("invalid accumulated tool input JSON: %w", err) + } + // Ensure it's an object per Anthropic contract + switch v.(type) { + case map[string]interface{}: + return json.RawMessage(raw), nil + default: + return nil, fmt.Errorf("tool input is not an object") + } +} + +// ---------- Public entrypoint ---------- +// +// Mapping rules recap (Anthropic → AI‑SDK): +// - message_start → AiMsgStart +// - content_block_start(type=text) → AiMsgTextStart; text_delta → AiMsgTextDelta; content_block_stop → AiMsgTextEnd +// - content_block_start(type=thinking) → AiMsgReasoningStart; thinking_delta → AiMsgReasoningDelta; stop → AiMsgReasoningEnd +// - content_block_start(type=tool_use) → AiMsgToolInputStart; input_json_delta → AiMsgToolInputDelta; stop → AiMsgToolInputAvailable +// - If final stop_reason == "tool_use": emit AiMsgFinishStep and return StopReason{Kind:ToolUse, ...} WITHOUT AiMsgFinish +// - If message_stop with stop_reason == "end_turn" or nil: emit AiMsgFinish then [DONE] +// - On Anthropic error event: AiMsgError and return StopKindError. :contentReference[oaicite:9]{index=9} :contentReference[oaicite:10]{index=10} + +// parseAnthropicHTTPError parses Anthropic API HTTP error responses +func parseAnthropicHTTPError(resp *http.Response) error { + var eresp struct { + Type string `json:"type"` + Error struct { + Type string `json:"type"` + Message string `json:"message"` + } `json:"error"` + } + slurp, _ := io.ReadAll(resp.Body) + _ = json.Unmarshal(slurp, &eresp) + + var msg string + if eresp.Error.Message != "" { + msg = eresp.Error.Message + } else { + // Limit raw response to avoid giant messages + rawMsg := strings.TrimSpace(string(slurp)) + if len(rawMsg) > 500 { + rawMsg = rawMsg[:500] + "..." + } + if rawMsg == "" { + msg = "unknown error" + } else { + msg = rawMsg + } + } + return fmt.Errorf("anthropic %s: %s", resp.Status, msg) +} + +func StreamAnthropicResponses( + ctx context.Context, + sse *SSEHandlerCh, + opts *wshrpc.WaveAIOptsType, + messages []UseChatMessage, + tools []ToolDefinition, +) (*StopReason, error) { + if sse == nil { + return nil, errors.New("sse handler is nil") + } + // Context with timeout if provided. + if opts.TimeoutMs > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, time.Duration(opts.TimeoutMs)*time.Millisecond) + defer cancel() + } + + req, err := buildAnthropicHTTPRequest(ctx, opts, messages, tools) + if err != nil { + return nil, err + } + + httpClient := &http.Client{ + Timeout: 0, // rely on ctx; streaming can be long + } + // Proxy support + if opts.ProxyURL != "" { + pURL, perr := url.Parse(opts.ProxyURL) + if perr == nil { + httpClient.Transport = &http.Transport{ + Proxy: http.ProxyURL(pURL), + } + } + } + + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + ct := resp.Header.Get("Content-Type") + if resp.StatusCode != http.StatusOK || !strings.HasPrefix(ct, "text/event-stream") { + return nil, parseAnthropicHTTPError(resp) + } + + // Use eventsource decoder for proper SSE parsing + decoder := eventsource.NewDecoder(resp.Body) + + // Per-response state + blockMap := map[int]*blockState{} + var toolCalls []ToolCall + var stopFromDelta string + var msgID string + var model string + + // SSE event processing loop + for { + // Check for context cancellation + if err := ctx.Err(); err != nil { + _ = sse.AiMsgError("request cancelled") + return &StopReason{ + Kind: StopKindCanceled, + ErrorType: "cancelled", + ErrorText: "request cancelled", + }, err + } + + event, err := decoder.Decode() + if err != nil { + if errors.Is(err, io.EOF) { + // Normal end of stream + break + } + // transport error mid-stream + _ = sse.AiMsgError(err.Error()) + return &StopReason{ + Kind: StopKindError, + ErrorType: "stream", + ErrorText: err.Error(), + }, err + } + + if stop, ret := handleAnthropicEvent(event, sse, blockMap, &toolCalls, &msgID, &model, stopFromDelta); ret != nil { + // Either error or message_stop triggered return + return ret, nil + } else { + // maybe updated final stop reason (from message_delta) + if stop != nil && *stop != "" { + stopFromDelta = *stop + } + } + } + + // If we got here without a message_stop, close as done. + _ = sse.AiMsgFinish("", nil) + return &StopReason{ + Kind: StopKindDone, + RawReason: stopFromDelta, + MessageID: msgID, + Model: model, + }, nil +} + +// handleAnthropicEvent processes one SSE event block. It may emit SSE parts +// and/or return a StopReason when the stream is complete. +// +// Return tuple: +// - stopFromDelta: a *string with stop reason when message_delta updates stop_reason +// - final: a *StopReason to return immediately (e.g., after message_stop or error) +// +// Event model: anthropic-streaming.md. :contentReference[oaicite:16]{index=16} +func handleAnthropicEvent( + event eventsource.Event, + sse *SSEHandlerCh, + blocks map[int]*blockState, + toolCalls *[]ToolCall, + msgID *string, + model *string, + stopFromPreviousDelta string, +) (stopFromDelta *string, final *StopReason) { + eventName := event.Event() + data := event.Data() + switch eventName { + case "ping": + return nil, nil // ignore + + case "error": + // Example: data: {"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}} :contentReference[oaicite:17]{index=17} + var ev anthropicFullStreamEvent + if jerr := json.Unmarshal([]byte(data), &ev); jerr != nil { + err := fmt.Errorf("error event decode: %w", jerr) + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + msg := "unknown error" + etype := "error" + if ev.Error != nil { + msg = ev.Error.Message + etype = ev.Error.Type + } + _ = sse.AiMsgError(msg) + return nil, &StopReason{ + Kind: StopKindError, + ErrorType: etype, + ErrorText: msg, + } + + case "message_start": + var ev anthropicFullStreamEvent + if err := json.Unmarshal([]byte(data), &ev); err != nil { + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + if ev.Message != nil { + *msgID = ev.Message.ID + *model = ev.Message.Model + } + _ = sse.AiMsgStart(*msgID) + return nil, nil + + case "content_block_start": + var ev anthropicFullStreamEvent + if err := json.Unmarshal([]byte(data), &ev); err != nil { + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + if ev.Index == nil || ev.ContentBlock == nil { + return nil, nil + } + idx := *ev.Index + switch ev.ContentBlock.Type { + case "text": + id := genID("text") + blocks[idx] = &blockState{kind: blockText, localID: id} + _ = sse.AiMsgTextStart(id) + case "thinking": + id := genID("reasoning") + blocks[idx] = &blockState{kind: blockThinking, localID: id} + _ = sse.AiMsgReasoningStart(id) + case "tool_use": + tcID := ev.ContentBlock.ID + tName := ev.ContentBlock.Name + st := &blockState{ + kind: blockToolUse, + toolCallID: tcID, + toolName: tName, + accumJSON: &partialJSON{}, + } + blocks[idx] = st + _ = sse.AiMsgToolInputStart(tcID, tName) + default: + // ignore other block types gracefully per Anthropic guidance :contentReference[oaicite:18]{index=18} + } + return nil, nil + + case "content_block_delta": + var ev anthropicFullStreamEvent + if err := json.Unmarshal([]byte(data), &ev); err != nil { + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + if ev.Index == nil || ev.Delta == nil { + return nil, nil + } + st := blocks[*ev.Index] + if st == nil { + return nil, nil + } + switch ev.Delta.Type { + case "text_delta": + if st.kind == blockText { + _ = sse.AiMsgTextDelta(st.localID, ev.Delta.Text) + } + case "thinking_delta": + if st.kind == blockThinking { + _ = sse.AiMsgReasoningDelta(st.localID, ev.Delta.Thinking) + } + case "input_json_delta": + if st.kind == blockToolUse { + st.accumJSON.Write(ev.Delta.PartialJSON) + _ = sse.AiMsgToolInputDelta(st.toolCallID, ev.Delta.PartialJSON) + } + case "signature_delta": + // ignore; integrity metadata for thinking blocks. :contentReference[oaicite:19]{index=19} + default: + // ignore unknown deltas gracefully. :contentReference[oaicite:20]{index=20} + } + return nil, nil + + case "content_block_stop": + var ev anthropicFullStreamEvent + if err := json.Unmarshal([]byte(data), &ev); err != nil { + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + if ev.Index == nil { + return nil, nil + } + st := blocks[*ev.Index] + if st == nil { + return nil, nil + } + switch st.kind { + case blockText: + _ = sse.AiMsgTextEnd(st.localID) + case blockThinking: + _ = sse.AiMsgReasoningEnd(st.localID) + case blockToolUse: + raw, jerr := st.accumJSON.FinalObject() + if jerr != nil { + _ = sse.AiMsgError(jerr.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "parse", ErrorText: jerr.Error()} + } + var input any + if len(raw) > 0 { + jerr = json.Unmarshal(raw, &input) + if jerr != nil { + _ = sse.AiMsgError(jerr.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "parse", ErrorText: jerr.Error()} + } + } + _ = sse.AiMsgToolInputAvailable(st.toolCallID, st.toolName, raw) + *toolCalls = append(*toolCalls, ToolCall{ + ID: st.toolCallID, + Name: st.toolName, + Input: input, + }) + } + return nil, nil + + case "message_delta": + var ev anthropicFullStreamEvent + if err := json.Unmarshal([]byte(data), &ev); err != nil { + _ = sse.AiMsgError(err.Error()) + return nil, &StopReason{Kind: StopKindError, ErrorType: "decode", ErrorText: err.Error()} + } + if ev.Delta != nil && ev.Delta.StopReason != nil { + stopFromDelta = ev.Delta.StopReason + } + return stopFromDelta, nil + + case "message_stop": + // Decide finalization based on last known stop_reason. + // If we didn't capture it in message_delta, treat as end_turn. + reason := "end_turn" + if stopFromPreviousDelta != "" { + reason = stopFromPreviousDelta + } + switch reason { + case "tool_use": + // Finish step, return tool calls (no finish). :contentReference[oaicite:21]{index=21} + _ = sse.AiMsgFinishStep() + return nil, &StopReason{ + Kind: StopKindToolUse, + RawReason: reason, + MessageID: *msgID, + Model: *model, + ToolCalls: *toolCalls, + FinishStep: true, + } + case "max_tokens": + _ = sse.AiMsgFinish(reason, nil) + return nil, &StopReason{ + Kind: StopKindMaxTokens, + RawReason: reason, + MessageID: *msgID, + Model: *model, + } + case "refusal": + _ = sse.AiMsgFinish(reason, nil) + return nil, &StopReason{ + Kind: StopKindContent, + RawReason: reason, + MessageID: *msgID, + Model: *model, + } + default: + // end_turn, stop_sequence, pause_turn (treat as end of this call) + _ = sse.AiMsgFinish(reason, nil) + return nil, &StopReason{ + Kind: StopKindDone, + RawReason: reason, + MessageID: *msgID, + Model: *model, + } + } + + default: + // Unknown event names may appear over time; ignore. :contentReference[oaicite:22]{index=22} + return nil, nil + } +} + +// buildAnthropicHTTPRequest creates a complete HTTP request for the Anthropic API +func buildAnthropicHTTPRequest(ctx context.Context, opts *wshrpc.WaveAIOptsType, msgs []UseChatMessage, tools []ToolDefinition) (*http.Request, error) { + if opts == nil { + return nil, errors.New("opts is nil") + } + if opts.APIToken == "" { + return nil, errors.New("Anthropic API token missing") + } + if opts.Model == "" { + return nil, errors.New("opts.model is required") + } + + // Set defaults + baseURL := opts.BaseURL + if baseURL == "" { + baseURL = AnthropicDefaultBaseURL + } + endpoint := strings.TrimRight(baseURL, "/") + "/v1/messages" + + apiVersion := opts.APIVersion + if apiVersion == "" { + apiVersion = AnthropicDefaultAPIVersion + } + + maxTokens := opts.MaxTokens + if maxTokens <= 0 { + maxTokens = AnthropicDefaultMaxTokens + } + + // Build request body + reqBody := &anthropicStreamRequest{ + Model: opts.Model, + MaxTokens: maxTokens, + Stream: true, + } + if len(tools) > 0 { + reqBody.Tools = tools + } + + for _, m := range msgs { + aim := anthropicInputMessage{Role: m.Role} + // Content may be a string or array of blocks; support text only. :contentReference[oaicite:24]{index=24} + if len(m.Parts) > 0 { + var blocks []map[string]string + for _, p := range m.Parts { + if strings.ToLower(p.Type) == "text" || p.Type == "" { + blocks = append(blocks, map[string]string{ + "type": "text", + "text": p.Text, + }) + } + } + bs, _ := json.Marshal(blocks) + aim.Content = bs + } else { + // Shorthand: string becomes a single text block. :contentReference[oaicite:25]{index=25} + if m.Content == "" { + m.Content = "" + } + aim.Content = json.RawMessage(fmt.Sprintf("%q", m.Content)) + } + reqBody.Messages = append(reqBody.Messages, aim) + } + + bodyBytes, err := json.Marshal(reqBody) + if err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(bodyBytes)) + if err != nil { + return nil, err + } + + req.Header.Set("content-type", "application/json") + req.Header.Set("x-api-key", opts.APIToken) + req.Header.Set("anthropic-version", apiVersion) + req.Header.Set("accept", "text/event-stream") + + return req, nil +} + +func genID(prefix string) string { + var b [8]byte + _, _ = rand.Read(b[:]) + return fmt.Sprintf("%s_%s", prefix, hex.EncodeToString(b[:])) +} diff --git a/pkg/waveai/usechat-openai-completions.go b/pkg/waveai/usechat-openai-completions.go new file mode 100644 index 0000000000..31944d27db --- /dev/null +++ b/pkg/waveai/usechat-openai-completions.go @@ -0,0 +1,162 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package waveai + +import ( + "context" + "fmt" + "strings" + + "github.com/openai/openai-go/v2" + "github.com/openai/openai-go/v2/option" + "github.com/wavetermdev/waveterm/pkg/wshrpc" +) + +// OpenAI Chat Completion streaming response format +type OpenAIStreamChoice struct { + Index int `json:"index"` + Delta struct { + Content string `json:"content,omitempty"` + Reasoning string `json:"reasoning,omitempty"` + } `json:"delta"` + FinishReason *string `json:"finish_reason"` +} + +type OpenAIStreamResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + Choices []OpenAIStreamChoice `json:"choices"` + Usage *OpenAIUsageResponse `json:"usage,omitempty"` +} + +type OpenAIUsageResponse struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` +} + +func StreamOpenAIChatCompletions(sseHandler *SSEHandlerCh, ctx context.Context, opts *wshrpc.WaveAIOptsType, messages []UseChatMessage) { + // Set up OpenAI client options + clientOpts := []option.RequestOption{ + option.WithAPIKey(opts.APIToken), + } + + if opts.BaseURL != "" { + clientOpts = append(clientOpts, option.WithBaseURL(opts.BaseURL)) + } + if opts.OrgID != "" { + clientOpts = append(clientOpts, option.WithOrganization(opts.OrgID)) + } + + client := openai.NewClient(clientOpts...) + + // Convert messages to ChatCompletionMessageParam, filtering out empty content + var chatMessages []openai.ChatCompletionMessageParamUnion + for _, msg := range messages { + content := msg.GetContent() + // Skip messages with empty content as OpenAI requires non-empty content + if strings.TrimSpace(content) == "" { + continue + } + + // Create appropriate message based on role + switch msg.Role { + case "user": + chatMessages = append(chatMessages, openai.UserMessage(content)) + case "assistant": + chatMessages = append(chatMessages, openai.AssistantMessage(content)) + case "system": + chatMessages = append(chatMessages, openai.SystemMessage(content)) + default: + chatMessages = append(chatMessages, openai.UserMessage(content)) + } + } + + // Create request using Chat Completions API + req := openai.ChatCompletionNewParams{ + Model: opts.Model, + Messages: chatMessages, + } + + if opts.MaxTokens > 0 { + if isReasoningModel(opts.Model) { + req.MaxCompletionTokens = openai.Int(int64(opts.MaxTokens)) + } else { + req.MaxTokens = openai.Int(int64(opts.MaxTokens)) + } + } + + // Create stream using Chat Completions API + stream := client.Chat.Completions.NewStreaming(ctx, req) + defer stream.Close() + + // Generate IDs for the streaming protocol + messageId := generateID() + textId := generateID() + + // Send message start + sseHandler.AiMsgStart(messageId) + + // Track whether we've started text streaming and finished + textStarted := false + textEnded := false + finished := false + + // Stream responses using event-based API + for stream.Next() { + chunk := stream.Current() + + if len(chunk.Choices) > 0 { + choice := chunk.Choices[0] + + // Handle content delta + if choice.Delta.Content != "" { + // Send text start only when we have actual content + if !textStarted { + sseHandler.AiMsgTextStart(textId) + textStarted = true + } + sseHandler.AiMsgTextDelta(textId, choice.Delta.Content) + } + + // Handle finish reason + if choice.FinishReason != "" && !finished { + usage := &OpenAIUsageResponse{} + if chunk.Usage.PromptTokens > 0 || chunk.Usage.CompletionTokens > 0 { + usage.PromptTokens = int(chunk.Usage.PromptTokens) + usage.CompletionTokens = int(chunk.Usage.CompletionTokens) + usage.TotalTokens = int(chunk.Usage.TotalTokens) + } + + // End text if it was started but not ended + if textStarted && !textEnded { + sseHandler.AiMsgTextEnd(textId) + textEnded = true + } + + sseHandler.AiMsgFinish(choice.FinishReason, usage) + finished = true + return + } + } + } + + // Handle stream errors + if err := stream.Err(); err != nil { + sseHandler.WriteError(fmt.Sprintf("OpenAI API error: %v", err)) + return + } + + // Cleanup if stream ended without completion event + if !finished { + // End text if it was started but not ended + if textStarted && !textEnded { + sseHandler.AiMsgTextEnd(textId) + textEnded = true + } + sseHandler.AiMsgFinish("stop", nil) + } +} diff --git a/pkg/waveai/usechat-openai-responses.go b/pkg/waveai/usechat-openai-responses.go new file mode 100644 index 0000000000..e9b50e066e --- /dev/null +++ b/pkg/waveai/usechat-openai-responses.go @@ -0,0 +1,235 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package waveai + +import ( + "context" + "fmt" + "strings" + + "github.com/openai/openai-go/v2" + "github.com/openai/openai-go/v2/option" + "github.com/openai/openai-go/v2/responses" + "github.com/openai/openai-go/v2/shared" + "github.com/wavetermdev/waveterm/pkg/wavebase" + "github.com/wavetermdev/waveterm/pkg/wshrpc" +) + +func createOpenAIRequest(opts *wshrpc.WaveAIOptsType, messages []UseChatMessage, tools []ToolDefinition) (openai.Client, responses.ResponseNewParams) { + // Set up OpenAI client options + clientOpts := []option.RequestOption{ + option.WithAPIKey(opts.APIToken), + } + + if opts.BaseURL != "" { + clientOpts = append(clientOpts, option.WithBaseURL(opts.BaseURL)) + } + if opts.OrgID != "" { + clientOpts = append(clientOpts, option.WithOrganization(opts.OrgID)) + } + + client := openai.NewClient(clientOpts...) + + // Convert messages to input items, filtering out empty content + var inputItems []responses.ResponseInputItemUnionParam + for _, msg := range messages { + content := msg.GetContent() + // Skip messages with empty content as OpenAI requires non-empty content + if strings.TrimSpace(content) == "" { + continue + } + + // Convert role to EasyInputMessageRole + var role responses.EasyInputMessageRole + switch msg.Role { + case "user": + role = responses.EasyInputMessageRoleUser + case "assistant": + role = responses.EasyInputMessageRoleAssistant + case "system": + role = responses.EasyInputMessageRoleSystem + default: + role = responses.EasyInputMessageRoleUser + } + + inputItems = append(inputItems, responses.ResponseInputItemParamOfMessage(content, role)) + } + + // Create request using Responses API for reasoning support + req := responses.ResponseNewParams{ + Model: opts.Model, + Input: responses.ResponseNewParamsInputUnion{ + OfInputItemList: responses.ResponseInputParam(inputItems), + }, + } + + // Convert tools if provided + if len(tools) > 0 { + var responseTools []responses.ToolUnionParam + for _, tool := range tools { + responseTool := responses.ToolParamOfFunction(tool.Name, tool.InputSchema, false) + responseTools = append(responseTools, responseTool) + } + req.Tools = responseTools + } + + // Only set reasoning parameter for reasoning models + if isReasoningModel(opts.Model) { + req.Reasoning = shared.ReasoningParam{ + Effort: openai.ReasoningEffortMedium, + Summary: openai.ReasoningSummaryAuto, + } + } + + if opts.MaxTokens > 0 { + req.MaxOutputTokens = openai.Int(int64(opts.MaxTokens)) + } + + return client, req +} + +func StreamOpenAIResponsesAPI(sseHandler *SSEHandlerCh, ctx context.Context, opts *wshrpc.WaveAIOptsType, messages []UseChatMessage, tools []ToolDefinition) { + client, req := createOpenAIRequest(opts, messages, tools) + + // Create stream using Responses API + stream := client.Responses.NewStreaming(ctx, req) + defer stream.Close() + + // Generate IDs for the streaming protocol + messageId := generateID() + textId := generateID() + reasoningId := generateID() + + // Send message start + sseHandler.AiMsgStart(messageId) + + // Track whether we've started text/reasoning streaming and finished + textStarted := false + textEnded := false + reasoningStarted := false + reasoningEnded := false + finished := false + + // Stream responses using event-based API + for stream.Next() { + event := stream.Current() + + fmt.Printf("DEBUG: Received event type: %s\n", event.Type) + + switch event.Type { + case "response.output_item.added": + outputItem := event.AsResponseOutputItemAdded() + // fmt.Printf("DEBUG: output_item.added - Type: %s\n", outputItem.Item.Type) + if outputItem.Item.Type == "reasoning" && !reasoningStarted { + sseHandler.AiMsgReasoningStart(reasoningId) + reasoningStarted = true + } + + case "response.reasoning_summary_part.added": + // Optional; first empty part—no-op + + case "response.reasoning_summary_text.delta": + reasoningDelta := event.AsResponseReasoningSummaryTextDelta() + fmt.Printf("DEBUG: reasoning delta - reasoningEnded=%t, delta='%s'\n", reasoningEnded, reasoningDelta.Delta) + if reasoningDelta.Delta != "" && !reasoningEnded { + sseHandler.AiMsgReasoningDelta(reasoningId, reasoningDelta.Delta) + } + + case "response.reasoning_summary_text.done": + fmt.Printf("DEBUG: reasoning summary text done - reasoningStarted=%t, reasoningEnded=%t (not ending here, waiting for output_item.done)\n", reasoningStarted, reasoningEnded) + // Don't end reasoning here - there may be multiple reasoning parts + // Wait for response.output_item.done to end reasoning + + case "response.reasoning_summary_part.done": + // Reasoning summary part done - no action needed + + case "response.content_part.added": + // First output_text part for message—no-op + + case "response.content_part.done": + // Content part done - no action needed + + case "response.output_text.delta": + textDelta := event.AsResponseOutputTextDelta() + if textDelta.Delta != "" && !textEnded { + if !textStarted { + sseHandler.AiMsgTextStart(textId) + textStarted = true + } + sseHandler.AiMsgTextDelta(textId, textDelta.Delta) + } + + case "response.output_text.done": + if textStarted && !textEnded { + sseHandler.AiMsgTextEnd(textId) + textEnded = true + } + + case "response.output_item.done": + // Item-level close (reasoning or message) + // If we had started reasoning but haven't ended it, end it now + if reasoningStarted && !reasoningEnded { + sseHandler.AiMsgReasoningEnd(reasoningId) + reasoningEnded = true + } + + case "response.completed": + responseDone := event.AsResponseCompleted() + if !finished { + usage := &OpenAIUsageResponse{} + responseUsage := responseDone.Response.Usage + usage.PromptTokens = int(responseUsage.InputTokens) + usage.CompletionTokens = int(responseUsage.OutputTokens) + usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens + + // End reasoning if it was started but not ended + if reasoningStarted && !reasoningEnded { + sseHandler.AiMsgReasoningEnd(reasoningId) + reasoningEnded = true + } + // End text if it was started but not ended + if textStarted && !textEnded { + sseHandler.AiMsgTextEnd(textId) + textEnded = true + } + + finishReason := "stop" + if responseDone.Response.Status == "completed" { + finishReason = "stop" + } + + sseHandler.AiMsgFinish(finishReason, usage) + finished = true + } + return + + default: + // Log unhandled event types in dev mode + if wavebase.IsDevMode() { + fmt.Printf("DEBUG: Unhandled event type: %s\n", event.Type) + } + } + } + + // Handle stream errors + if err := stream.Err(); err != nil { + sseHandler.WriteError(fmt.Sprintf("OpenAI API error: %v", err)) + return + } + + // Cleanup if stream ended without completion event + if !finished { + // End reasoning if it was started but not ended + if reasoningStarted && !reasoningEnded { + sseHandler.AiMsgReasoningEnd(reasoningId) + reasoningEnded = true + } + // End text if it was started but not ended + if textStarted && !textEnded { + sseHandler.AiMsgTextEnd(textId) + textEnded = true + } + sseHandler.AiMsgFinish("stop", nil) + } +} diff --git a/pkg/waveai/usechat.go b/pkg/waveai/usechat.go new file mode 100644 index 0000000000..3bfd1747c2 --- /dev/null +++ b/pkg/waveai/usechat.go @@ -0,0 +1,273 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package waveai + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + "log" + "net/http" + "strings" + + "github.com/wavetermdev/waveterm/pkg/waveobj" + "github.com/wavetermdev/waveterm/pkg/wconfig" + "github.com/wavetermdev/waveterm/pkg/wshrpc" + "github.com/wavetermdev/waveterm/pkg/wstore" +) + +type UseChatMessagePart struct { + Type string `json:"type"` + Text string `json:"text"` +} + +type UseChatMessage struct { + Role string `json:"role"` + Content string `json:"content,omitempty"` + Parts []UseChatMessagePart `json:"parts,omitempty"` +} + +type ToolCall struct { + ID string `json:"id"` // Anthropic tool_use.id + Name string `json:"name,omitempty"` // tool name (if provided) + Input any `json:"input,omitempty"` // accumulated input JSON +} + +type StopReason struct { + Kind StopReasonKind `json:"kind"` + RawReason string `json:"raw_reason,omitempty"` + MessageID string `json:"message_id,omitempty"` + Model string `json:"model,omitempty"` + + ToolCalls []ToolCall `json:"tool_calls,omitempty"` + + ErrorType string `json:"error_type,omitempty"` + ErrorText string `json:"error_text,omitempty"` + + FinishStep bool `json:"finish_step,omitempty"` +} + +// ToolDefinition represents a tool that can be used by the AI model +type ToolDefinition struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema map[string]any `json:"input_schema"` +} + +// GetContent extracts the text content from either content field or parts array +func (m *UseChatMessage) GetContent() string { + if m.Content != "" { + return m.Content + } + if len(m.Parts) > 0 { + var content strings.Builder + for _, part := range m.Parts { + if part.Type == "text" { + content.WriteString(part.Text) + } + } + return content.String() + } + return "" +} + +type UseChatRequest struct { + Messages []UseChatMessage `json:"messages"` + Options *wconfig.AiSettingsType `json:"options,omitempty"` +} + +func resolveAIConfig(ctx context.Context, blockId, presetKey string, requestOptions *wconfig.AiSettingsType) (*wshrpc.WaveAIOptsType, error) { + // Get block metadata + block, err := wstore.DBMustGet[*waveobj.Block](ctx, blockId) + if err != nil { + return nil, fmt.Errorf("failed to get block: %w", err) + } + + // Get global settings + fullConfig := wconfig.GetWatcher().GetFullConfig() + globalAiSettings := fullConfig.Settings.GetAiSettings() + + // Resolve preset hierarchy + finalPreset := presetKey + if finalPreset == "" && block != nil && block.Meta != nil { + if blockPreset, ok := block.Meta["ai:preset"].(string); ok { + finalPreset = blockPreset + } + } + if finalPreset == "" { + finalPreset = globalAiSettings.AiPreset + } + if finalPreset == "" { + finalPreset = "default" + } + + // Load preset configuration + var presetAiSettings *wconfig.AiSettingsType + if finalPreset != "default" { + var presetKey string + if strings.HasPrefix(finalPreset, "ai@") { + presetKey = finalPreset + } else { + presetKey = fmt.Sprintf("ai@%s", finalPreset) + } + if preset, ok := fullConfig.Presets[presetKey]; ok { + presetAiSettings = &wconfig.AiSettingsType{} + if err := json.Unmarshal(mustMarshal(preset), presetAiSettings); err == nil { + // Successfully unmarshaled preset + } else { + presetAiSettings = nil + } + } + } + + // Extract block AI settings from metadata + var blockAiSettings *wconfig.AiSettingsType + if block != nil && block.Meta != nil { + blockAiSettings = &wconfig.AiSettingsType{} + if err := json.Unmarshal(mustMarshal(block.Meta), blockAiSettings); err != nil { + blockAiSettings = nil + } + } + + // Merge settings with hierarchy: global < preset < block < request + finalSettings := wconfig.MergeAiSettings(globalAiSettings, presetAiSettings, blockAiSettings, requestOptions) + + // Convert to WaveAIOptsType + aiOpts := &wshrpc.WaveAIOptsType{ + Model: finalSettings.AiModel, + APIType: finalSettings.AiApiType, + APIToken: finalSettings.AiApiToken, + BaseURL: finalSettings.AiBaseURL, + OrgID: finalSettings.AiOrgID, + APIVersion: finalSettings.AIApiVersion, + ProxyURL: finalSettings.AiProxyUrl, + MaxTokens: int(finalSettings.AiMaxTokens), + TimeoutMs: int(finalSettings.AiTimeoutMs), + } + + // Set defaults + if aiOpts.Model == "" { + aiOpts.Model = "gpt-4.1" + } + if aiOpts.APIType == "" { + aiOpts.APIType = APIType_OpenAI + } + if aiOpts.MaxTokens == 0 { + aiOpts.MaxTokens = 4000 + } + + return aiOpts, nil +} + +func mustMarshal(v any) []byte { + data, err := json.Marshal(v) + if err != nil { + return []byte("{}") + } + return data +} + +func shouldUseChatCompletionsAPI(model string) bool { + m := strings.ToLower(model) + // Chat Completions API is required for older models: gpt-3.5-*, gpt-4, gpt-4-turbo, o1-* + return strings.HasPrefix(m, "gpt-3.5") || + strings.HasPrefix(m, "gpt-4-") || + m == "gpt-4" || + strings.HasPrefix(m, "o1-") +} + +func StreamOpenAIToUseChat(ctx context.Context, sseHandler *SSEHandlerCh, opts *wshrpc.WaveAIOptsType, messages []UseChatMessage, tools []ToolDefinition) (*StopReason, error) { + // Route to appropriate API based on model + if shouldUseChatCompletionsAPI(opts.Model) { + // Older models (gpt-3.5, gpt-4, gpt-4-turbo, o1-*) use Chat Completions API + StreamOpenAIChatCompletions(sseHandler, ctx, opts, messages) + } else { + // Newer models (gpt-4.1, gpt-4o, gpt-5, o3, o4, etc.) use Responses API for reasoning support + StreamOpenAIResponsesAPI(sseHandler, ctx, opts, messages, tools) + } + + return &StopReason{ + Kind: StopKindDone, + }, nil +} + +func generateID() string { + bytes := make([]byte, 16) + rand.Read(bytes) + return hex.EncodeToString(bytes) +} + +func HandleAIChat(w http.ResponseWriter, r *http.Request) { + // Handle CORS preflight requests + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + + // Parse query parameters first + blockId := r.URL.Query().Get("blockid") + presetKey := r.URL.Query().Get("preset") + + if blockId == "" { + http.Error(w, "blockid query parameter is required", http.StatusBadRequest) + return + } + + // Parse request body completely before sending any response + var req UseChatRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("Invalid request body: %v", err), http.StatusBadRequest) + return + } + + // Resolve AI configuration + aiOpts, err := resolveAIConfig(r.Context(), blockId, presetKey, req.Options) + if err != nil { + http.Error(w, fmt.Sprintf("Configuration error: %v", err), http.StatusInternalServerError) + return + } + + // Validate configuration + if aiOpts.Model == "" { + http.Error(w, "No AI model specified", http.StatusBadRequest) + return + } + log.Printf("using AI model: %s (%s)", aiOpts.Model, aiOpts.BaseURL) + + // Support OpenAI and Anthropic + if aiOpts.APIType != APIType_OpenAI && aiOpts.APIType != APIType_Anthropic && aiOpts.APIType != "" { + http.Error(w, fmt.Sprintf("Unsupported API type: %s (only OpenAI and Anthropic supported)", aiOpts.APIType), http.StatusBadRequest) + return + } + + if aiOpts.APIToken == "" { + http.Error(w, "No API token provided", http.StatusBadRequest) + return + } + + // Create SSE handler and set up streaming + sseHandler := MakeSSEHandlerCh(w, r.Context()) + defer sseHandler.Close() + + if err := sseHandler.SetupSSE(); err != nil { + http.Error(w, fmt.Sprintf("Failed to setup SSE: %v", err), http.StatusInternalServerError) + return + } + + // Stream response based on API type + if aiOpts.APIType == APIType_Anthropic { + _, err := StreamAnthropicResponses(r.Context(), sseHandler, aiOpts, req.Messages, nil) + if err != nil { + log.Printf("Anthropic streaming error: %v", err) + } + } else { + // Default to OpenAI + _, err := StreamOpenAIToUseChat(r.Context(), sseHandler, aiOpts, req.Messages, nil) + if err != nil { + log.Printf("OpenAI streaming error: %v", err) + } + } +} diff --git a/pkg/waveai/waveai.go b/pkg/waveai/waveai.go index 89f1afe9a5..4d012e968a 100644 --- a/pkg/waveai/waveai.go +++ b/pkg/waveai/waveai.go @@ -15,8 +15,8 @@ import ( ) const WaveAIPacketstr = "waveai" -const ApiType_Anthropic = "anthropic" -const ApiType_Perplexity = "perplexity" +const APIType_Anthropic = "anthropic" +const APIType_Perplexity = "perplexity" const APIType_Google = "google" const APIType_OpenAI = "openai" @@ -81,12 +81,12 @@ func RunAICommand(ctx context.Context, request wshrpc.WaveAIStreamRequest) chan } var backend AIBackend var backendType string - if request.Opts.APIType == ApiType_Anthropic { + if request.Opts.APIType == APIType_Anthropic { backend = AnthropicBackend{} - backendType = ApiType_Anthropic - } else if request.Opts.APIType == ApiType_Perplexity { + backendType = APIType_Anthropic + } else if request.Opts.APIType == APIType_Perplexity { backend = PerplexityBackend{} - backendType = ApiType_Perplexity + backendType = APIType_Perplexity } else if request.Opts.APIType == APIType_Google { backend = GoogleBackend{} backendType = APIType_Google diff --git a/pkg/web/web.go b/pkg/web/web.go index 8a3e6470b4..07d0625a6e 100644 --- a/pkg/web/web.go +++ b/pkg/web/web.go @@ -19,7 +19,6 @@ import ( "time" "github.com/google/uuid" - "github.com/gorilla/handlers" "github.com/gorilla/mux" "github.com/wavetermdev/waveterm/pkg/authkey" "github.com/wavetermdev/waveterm/pkg/docsite" @@ -29,6 +28,7 @@ import ( "github.com/wavetermdev/waveterm/pkg/schema" "github.com/wavetermdev/waveterm/pkg/service" "github.com/wavetermdev/waveterm/pkg/util/utilfn" + "github.com/wavetermdev/waveterm/pkg/waveai" "github.com/wavetermdev/waveterm/pkg/wavebase" "github.com/wavetermdev/waveterm/pkg/wshrpc" "github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient" @@ -404,6 +404,13 @@ func WebFnWrap(opts WebFnOpts, fn WebFnType) WebFnType { w.Header().Set(CacheControlHeaderKey, CacheControlHeaderNoCache) } w.Header().Set("Access-Control-Expose-Headers", "X-ZoneFileInfo") + + // Handle CORS preflight OPTIONS requests without auth validation + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + err := authkey.ValidateIncomingRequest(r) if err != nil { w.WriteHeader(http.StatusUnauthorized) @@ -442,17 +449,49 @@ const schemaPrefix = "/schema/" // blocking func RunWebServer(listener net.Listener) { gr := mux.NewRouter() - gr.HandleFunc("/wave/stream-local-file", WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamLocalFile)) - gr.HandleFunc("/wave/stream-file", WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamFile)) - gr.PathPrefix("/wave/stream-file/").HandlerFunc(WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamFile)) - gr.HandleFunc("/wave/file", WebFnWrap(WebFnOpts{AllowCaching: false}, handleWaveFile)) - gr.HandleFunc("/wave/service", WebFnWrap(WebFnOpts{JsonErrors: true}, handleService)) - gr.HandleFunc("/vdom/{uuid}/{path:.*}", WebFnWrap(WebFnOpts{AllowCaching: true}, handleVDom)) + + // Create separate routers for different timeout requirements + waveRouter := mux.NewRouter() + waveRouter.HandleFunc("/wave/stream-local-file", WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamLocalFile)) + waveRouter.HandleFunc("/wave/stream-file", WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamFile)) + waveRouter.PathPrefix("/wave/stream-file/").HandlerFunc(WebFnWrap(WebFnOpts{AllowCaching: true}, handleStreamFile)) + waveRouter.HandleFunc("/wave/file", WebFnWrap(WebFnOpts{AllowCaching: false}, handleWaveFile)) + waveRouter.HandleFunc("/wave/service", WebFnWrap(WebFnOpts{JsonErrors: true}, handleService)) + + vdomRouter := mux.NewRouter() + vdomRouter.HandleFunc("/vdom/{uuid}/{path:.*}", WebFnWrap(WebFnOpts{AllowCaching: true}, handleVDom)) + + // Routes that need timeout handling + gr.PathPrefix("/wave/").Handler(http.TimeoutHandler(waveRouter, HttpTimeoutDuration, "Timeout")) + gr.PathPrefix("/vdom/").Handler(http.TimeoutHandler(vdomRouter, HttpTimeoutDuration, "Timeout")) + + // Routes that should NOT have timeout handling (for streaming) + gr.HandleFunc("/api/aichat", WebFnWrap(WebFnOpts{AllowCaching: false}, waveai.HandleAIChat)) + + // Other routes without timeout gr.PathPrefix(docsitePrefix).Handler(http.StripPrefix(docsitePrefix, docsite.GetDocsiteHandler())) gr.PathPrefix(schemaPrefix).Handler(http.StripPrefix(schemaPrefix, schema.GetSchemaHandler())) - handler := http.TimeoutHandler(gr, HttpTimeoutDuration, "Timeout") + + handler := http.Handler(gr) if wavebase.IsDevMode() { - handler = handlers.CORS(handlers.AllowedOrigins([]string{"*"}))(handler) + originalHandler := handler + handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + origin := r.Header.Get("Origin") + if origin != "" { + w.Header().Set("Access-Control-Allow-Origin", origin) + } + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, X-Session-Id, X-AuthKey, Authorization, X-Requested-With, Accept, x-vercel-ai-ui-message-stream") + w.Header().Set("Access-Control-Expose-Headers", "X-ZoneFileInfo, Content-Length, Content-Type, x-vercel-ai-ui-message-stream") + w.Header().Set("Access-Control-Allow-Credentials", "true") + + if r.Method == "OPTIONS" { + w.WriteHeader(204) + return + } + + originalHandler.ServeHTTP(w, r) + }) } server := &http.Server{ ReadTimeout: HttpReadTimeout,