diff --git a/docs/docs/ai-presets.mdx b/docs/docs/ai-presets.mdx index de117a86c9..cded25ab13 100644 --- a/docs/docs/ai-presets.mdx +++ b/docs/docs/ai-presets.mdx @@ -70,6 +70,22 @@ To use Claude models, create a preset like this: } ``` +### OpenAI + +To use OpenAI's models: + +```json +{ + "ai@openai-gpt41": { + "display:name": "GPT-4.1", + "display:order": 2, + "ai:*": true, + "ai:model": "gpt-4.1", + "ai:apitoken": "" + } +} +``` + ### Local LLMs (Ollama) To connect to a local Ollama instance: @@ -78,7 +94,7 @@ To connect to a local Ollama instance: { "ai@ollama-llama": { "display:name": "Ollama - Llama2", - "display:order": 2, + "display:order": 3, "ai:*": true, "ai:baseurl": "http://localhost:11434/v1", "ai:name": "llama2", @@ -98,7 +114,7 @@ To connect to Azure AI services: { "ai@azure-gpt4": { "display:name": "Azure GPT-4", - "display:order": 3, + "display:order": 4, "ai:*": true, "ai:apitype": "azure", "ai:baseurl": "", @@ -118,7 +134,7 @@ To use Perplexity's models: { "ai@perplexity-sonar": { "display:name": "Perplexity Sonar", - "display:order": 4, + "display:order": 5, "ai:*": true, "ai:apitype": "perplexity", "ai:model": "llama-3.1-sonar-small-128k-online", @@ -135,7 +151,7 @@ To use Google's Gemini models from [Google AI Studio](https://aistudio.google.co { "ai@gemini-2.0": { "display:name": "Gemini 2.0", - "display:order": 5, + "display:order": 6, "ai:*": true, "ai:apitype": "google", "ai:model": "gemini-2.0-flash-exp", @@ -158,9 +174,16 @@ You can define multiple presets in your `ai.json` file: "ai:model": "claude-3-5-sonnet-latest", "ai:apitoken": "" }, + "ai@openai-gpt41": { + "display:name": "GPT-4.1", + "display:order": 2, + "ai:*": true, + "ai:model": "gpt-4.1", + "ai:apitoken": "" + }, "ai@ollama-llama": { "display:name": "Ollama - Llama2", - "display:order": 2, + "display:order": 3, "ai:*": true, "ai:baseurl": "http://localhost:11434/v1", "ai:name": "llama2", @@ -169,7 +192,7 @@ You can define multiple presets in your `ai.json` file: }, "ai@perplexity-sonar": { "display:name": "Perplexity Sonar", - "display:order": 3, + "display:order": 4, "ai:*": true, "ai:apitype": "perplexity", "ai:model": "llama-3.1-sonar-small-128k-online", @@ -187,3 +210,23 @@ Remember to set your default preset in `settings.json`: "ai:preset": "ai@claude-sonnet" } ``` + +## Using a Proxy + +If you need to route AI requests through an HTTP proxy, you can add the `ai:proxyurl` setting to any preset: + +```json +{ + "ai@claude-with-proxy": { + "display:name": "Claude 3 Sonnet (via Proxy)", + "display:order": 1, + "ai:*": true, + "ai:apitype": "anthropic", + "ai:model": "claude-3-5-sonnet-latest", + "ai:apitoken": "", + "ai:proxyurl": "http://proxy.example.com:8080" + } +} +``` + +The proxy URL should be in the format `http://host:port` or `https://host:port`. This setting works with all AI providers except Wave Cloud AI (the default). diff --git a/docs/docs/config.mdx b/docs/docs/config.mdx index 9a211c3cc4..ed042ffbb7 100644 --- a/docs/docs/config.mdx +++ b/docs/docs/config.mdx @@ -44,8 +44,9 @@ wsh editconfig | ai:model | string | model name to pass to API | | ai:apiversion | string | for Azure AI only (when apitype is "azure", this will default to "2023-05-15") | | ai:orgid | string | | -| ai:maxtokens | int | max tokens to pass to API | -| ai:timeoutms | int | timeout (in milliseconds) for AI calls | +| ai:maxtokens | int | max tokens to pass to API | +| ai:timeoutms | int | timeout (in milliseconds) for AI calls | +| ai:proxyurl | string | HTTP proxy URL for AI API requests (does not apply to Wave Cloud AI) | | conn:askbeforewshinstall | bool | set to false to disable popup asking if you want to install wsh extensions on new machines | | term:fontsize | float | the fontsize for the terminal block | | term:fontfamily | string | font family to use for terminal block | diff --git a/pkg/waveai/openaibackend.go b/pkg/waveai/openaibackend.go index c077afeea4..a698a9df27 100644 --- a/pkg/waveai/openaibackend.go +++ b/pkg/waveai/openaibackend.go @@ -29,6 +29,14 @@ func defaultAzureMapperFn(model string) string { return regexp.MustCompile(`[.:]`).ReplaceAllString(model, "") } +func isReasoningModel(model string) bool { + m := strings.ToLower(model) + return strings.HasPrefix(m, "o1") || + strings.HasPrefix(m, "o3") || + strings.HasPrefix(m, "o4") || + strings.HasPrefix(m, "gpt-5") +} + func setApiType(opts *wshrpc.WaveAIOptsType, clientConfig *openaiapi.ClientConfig) error { ourApiType := strings.ToLower(opts.APIType) if ourApiType == "" || ourApiType == APIType_OpenAI || ourApiType == strings.ToLower(string(openaiapi.APITypeOpenAI)) { @@ -123,38 +131,14 @@ func (OpenAIBackend) StreamCompletion(ctx context.Context, request wshrpc.WaveAI Messages: convertPrompt(request.Prompt), } - // Handle o1 models differently - use non-streaming API - if strings.HasPrefix(request.Opts.Model, "o1-") { + // Set MaxCompletionTokens for reasoning models, MaxTokens for others + if isReasoningModel(request.Opts.Model) { req.MaxCompletionTokens = request.Opts.MaxTokens - req.Stream = false - - // Make non-streaming API call - resp, err := client.CreateChatCompletion(ctx, req) - if err != nil { - rtn <- makeAIError(fmt.Errorf("error calling openai API: %v", err)) - return - } - - // Send header packet - headerPk := MakeWaveAIPacket() - headerPk.Model = resp.Model - headerPk.Created = resp.Created - rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *headerPk} - - // Send content packet(s) - for i, choice := range resp.Choices { - pk := MakeWaveAIPacket() - pk.Index = i - pk.Text = choice.Message.Content - pk.FinishReason = string(choice.FinishReason) - rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *pk} - } - return + } else { + req.MaxTokens = request.Opts.MaxTokens } - // Original streaming implementation for non-o1 models req.Stream = true - req.MaxTokens = request.Opts.MaxTokens if request.Opts.MaxChoices > 1 { req.N = request.Opts.MaxChoices }