wavetermdev · sawka · Dec 5, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/cmd/testai/main-testai.go b/cmd/testai/main-testai.go
@@ -27,6 +27,7 @@ const (
 	DefaultAnthropicModel  = "claude-sonnet-4-5"
 	DefaultOpenAIModel     = "gpt-5.1"
 	DefaultOpenRouterModel = "mistralai/mistral-small-3.2-24b-instruct"
+	DefaultGeminiModel     = "gemini-3-pro-preview"
 )
 
 // TestResponseWriter implements http.ResponseWriter and additional interfaces for testing
@@ -306,6 +307,57 @@ func testAnthropic(ctx context.Context, model, message string, tools []uctypes.T
 	}
 }
 
+func testGemini(ctx context.Context, model, message string, tools []uctypes.ToolDefinition) {
+	apiKey := os.Getenv("GOOGLE_APIKEY")
+	if apiKey == "" {
+		fmt.Println("Error: GOOGLE_APIKEY environment variable not set")
+		os.Exit(1)
+	}
+
+	opts := &uctypes.AIOptsType{
+		APIType:      uctypes.APIType_GoogleGemini,
+		APIToken:     apiKey,
+		Model:        model,
+		MaxTokens:    8192,
+		Capabilities: []string{uctypes.AICapabilityTools, uctypes.AICapabilityImages, uctypes.AICapabilityPdfs},
+	}
+
+	// Generate a chat ID
+	chatID := uuid.New().String()
+
+	// Convert to AIMessage format for WaveAIPostMessageWrap
+	aiMessage := &uctypes.AIMessage{
+		MessageId: uuid.New().String(),
+		Parts: []uctypes.AIMessagePart{
+			{
+				Type: uctypes.AIMessagePartTypeText,
+				Text: message,
+			},
+		},
+	}
+
+	fmt.Printf("Testing Google Gemini streaming with WaveAIPostMessageWrap, model: %s\n", model)
+	fmt.Printf("Message: %s\n", message)
+	fmt.Printf("Chat ID: %s\n", chatID)
+	fmt.Println("---")
+
+	testWriter := &TestResponseWriter{}
+	sseHandler := sse.MakeSSEHandlerCh(testWriter, ctx)
+	defer sseHandler.Close()
+
+	chatOpts := uctypes.WaveChatOpts{
+		ChatId:       chatID,
+		ClientId:     uuid.New().String(),
+		Config:       *opts,
+		Tools:        tools,
+		SystemPrompt: []string{"You are a helpful assistant. Be concise and clear in your responses."},
+	}
+	err := aiusechat.WaveAIPostMessageWrap(ctx, sseHandler, aiMessage, chatOpts)
+	if err != nil {
+		fmt.Printf("Google Gemini streaming error: %v\n", err)
+	}
+}
+
 func testT1(ctx context.Context) {
 	tool := aiusechat.GetAdderToolDefinition()
 	tools := []uctypes.ToolDefinition{tool}
@@ -322,8 +374,14 @@ func testT3(ctx context.Context) {
 	testOpenAIComp(ctx, "gpt-4o", "what is 2+2? please be brief", nil)
 }
 
+func testT4(ctx context.Context) {
+	tool := aiusechat.GetAdderToolDefinition()
+	tools := []uctypes.ToolDefinition{tool}
+	testGemini(ctx, DefaultGeminiModel, "what is 2+2+8, use the provider adder tool", tools)
+}
+
 func printUsage() {
-	fmt.Println("Usage: go run main-testai.go [--anthropic|--openaicomp|--openrouter] [--tools] [--model <model>] [message]")
+	fmt.Println("Usage: go run main-testai.go [--anthropic|--openaicomp|--openrouter|--gemini] [--tools] [--model <model>] [message]")
 	fmt.Println("Examples:")
 	fmt.Println("  go run main-testai.go 'What is 2+2?'")
 	fmt.Println("  go run main-testai.go --model o4-mini 'What is 2+2?'")
@@ -332,32 +390,38 @@ func printUsage() {
 	fmt.Println("  go run main-testai.go --openaicomp --model gpt-4o 'What is 2+2?'")
 	fmt.Println("  go run main-testai.go --openrouter 'What is 2+2?'")
 	fmt.Println("  go run main-testai.go --openrouter --model anthropic/claude-3.5-sonnet 'What is 2+2?'")
+	fmt.Println("  go run main-testai.go --gemini 'What is 2+2?'")
+	fmt.Println("  go run main-testai.go --gemini --model gemini-1.5-pro 'What is 2+2?'")
 	fmt.Println("  go run main-testai.go --tools 'Help me configure GitHub Actions monitoring'")
 	fmt.Println("")
 	fmt.Println("Default models:")
 	fmt.Printf("  OpenAI: %s\n", DefaultOpenAIModel)
 	fmt.Printf("  Anthropic: %s\n", DefaultAnthropicModel)
 	fmt.Printf("  OpenAI Completions: gpt-4o\n")
 	fmt.Printf("  OpenRouter: %s\n", DefaultOpenRouterModel)
+	fmt.Printf("  Google Gemini: %s\n", DefaultGeminiModel)
 	fmt.Println("")
 	fmt.Println("Environment variables:")
 	fmt.Println("  OPENAI_APIKEY (for OpenAI models)")
 	fmt.Println("  ANTHROPIC_APIKEY (for Anthropic models)")
 	fmt.Println("  OPENROUTER_APIKEY (for OpenRouter models)")
+	fmt.Println("  GOOGLE_APIKEY (for Google Gemini models)")
 }
 
 func main() {
-	var anthropic, openaicomp, openrouter, tools, help, t1, t2, t3 bool
+	var anthropic, openaicomp, openrouter, gemini, tools, help, t1, t2, t3, t4 bool
 	var model string
 	flag.BoolVar(&anthropic, "anthropic", false, "Use Anthropic API instead of OpenAI")
 	flag.BoolVar(&openaicomp, "openaicomp", false, "Use OpenAI Completions API")
 	flag.BoolVar(&openrouter, "openrouter", false, "Use OpenRouter API")
+	flag.BoolVar(&gemini, "gemini", false, "Use Google Gemini API")
 	flag.BoolVar(&tools, "tools", false, "Enable GitHub Actions Monitor tools for testing")
-	flag.StringVar(&model, "model", "", fmt.Sprintf("AI model to use (defaults: %s for OpenAI, %s for Anthropic, %s for OpenRouter)", DefaultOpenAIModel, DefaultAnthropicModel, DefaultOpenRouterModel))
+	flag.StringVar(&model, "model", "", fmt.Sprintf("AI model to use (defaults: %s for OpenAI, %s for Anthropic, %s for OpenRouter, %s for Gemini)", DefaultOpenAIModel, DefaultAnthropicModel, DefaultOpenRouterModel, DefaultGeminiModel))
 	flag.BoolVar(&help, "help", false, "Show usage information")
 	flag.BoolVar(&t1, "t1", false, fmt.Sprintf("Run preset T1 test (%s with 'what is 2+2')", DefaultAnthropicModel))
 	flag.BoolVar(&t2, "t2", false, fmt.Sprintf("Run preset T2 test (%s with 'what is 2+2')", DefaultOpenAIModel))
-	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-4o)")
+	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-5.1)")
+	flag.BoolVar(&t4, "t4", false, "Run preset T4 test (OpenAI Completions API with gemini-3-pro-preview)")
-	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-5.1)")
-	flag.BoolVar(&t4, "t4", false, "Run preset T4 test (OpenAI Completions API with gemini-3-pro-preview)")
+	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-4o)")
+	flag.BoolVar(&t4, "t4", false, "Run preset T4 test (Google Gemini API with gemini-3-pro-preview)")
-	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-5.1)")
-	flag.BoolVar(&t4, "t4", false, "Run preset T4 test (OpenAI Completions API with gemini-3-pro-preview)")
+	flag.BoolVar(&t3, "t3", false, "Run preset T3 test (OpenAI Completions API with gpt-4o)")
+	flag.BoolVar(&t4, "t4", false, "Run preset T4 test (Google Gemini API with gemini-3-pro-preview)")
 	flag.Parse()
 
 	if help {
@@ -380,6 +444,10 @@ func main() {
 		testT3(ctx)
 		return
 	}
+	if t4 {
+		testT4(ctx)
+		return
+	}
 
 	// Set default model based on API type if not provided
 	if model == "" {
@@ -389,6 +457,8 @@ func main() {
 			model = "gpt-4o"
 		} else if openrouter {
 			model = DefaultOpenRouterModel
+		} else if gemini {
+			model = DefaultGeminiModel
 		} else {
 			model = DefaultOpenAIModel
 		}
@@ -411,6 +481,8 @@ func main() {
 		testOpenAIComp(ctx, model, message, toolDefs)
 	} else if openrouter {
 		testOpenRouter(ctx, model, message, toolDefs)
+	} else if gemini {
+		testGemini(ctx, model, message, toolDefs)
 	} else {
 		testOpenAI(ctx, model, message, toolDefs)
 	}

diff --git a/cmd/wsh/cmd/wshcmd-secret.go b/cmd/wsh/cmd/wshcmd-secret.go
@@ -179,7 +179,8 @@ func secretUiRun(cmd *cobra.Command, args []string) (rtnErr error) {
 	wshCmd := &wshrpc.CommandCreateBlockData{
 		BlockDef: &waveobj.BlockDef{
 			Meta: map[string]interface{}{
-				waveobj.MetaKey_View: "secretstore",
+				waveobj.MetaKey_View: "waveconfig",
+				waveobj.MetaKey_File: "secrets",
 			},
 		},
 		Magnified: secretUiMagnified,

diff --git a/docs/docs/waveai-modes.mdx b/docs/docs/waveai-modes.mdx
@@ -1,7 +1,7 @@
 ---
 sidebar_position: 1.6
 id: "waveai-modes"
-title: "Wave AI (Local Models)"
+title: "Wave AI (Local Models + BYOK)"
 ---
 
 Wave AI supports custom AI modes that allow you to use local models, custom API endpoints, and alternative AI providers. This gives you complete control over which models and providers you use with Wave's AI features.
@@ -37,10 +37,11 @@ Wave AI now supports provider-based configuration which automatically applies se
 
 ### Supported API Types
 
-Wave AI supports two OpenAI-compatible API types:
+Wave AI supports the following API types:
 
 - **`openai-chat`**: Uses the `/v1/chat/completions` endpoint (most common)
 - **`openai-responses`**: Uses the `/v1/responses` endpoint (modern API for GPT-5+ models)
+- **`google-gemini`**: Google's Gemini API format (automatically set when using `ai:provider: "google"`, not typically used directly)
 
 ## Configuration Structure
 
@@ -49,7 +50,7 @@ Wave AI supports two OpenAI-compatible API types:
 ```json
 {
   "mode-key": {
-    "display:name": "Display Name",
+    "display:name": "Qwen (OpenRouter)",
     "ai:provider": "openrouter",
     "ai:model": "qwen/qwen-2.5-coder-32b-instruct"
   }
@@ -89,10 +90,10 @@ Wave AI supports two OpenAI-compatible API types:
 | `display:icon` | No | Icon identifier for the mode |
 | `display:description` | No | Full description of the mode |
 | `ai:provider` | No | Provider preset: `openai`, `openrouter`, `google`, `azure`, `azure-legacy`, `custom` |
-| `ai:apitype` | No | API type: `openai-chat` or `openai-responses` (defaults to `openai-chat` if not specified) |
+| `ai:apitype` | No | API type: `openai-chat`, `openai-responses`, or `google-gemini` (defaults to `openai-chat` if not specified) |
 | `ai:model` | No | Model identifier (required for most providers) |
 | `ai:thinkinglevel` | No | Thinking level: `low`, `medium`, or `high` |
-| `ai:endpoint` | No | Full API endpoint URL (auto-set by provider when available) |
+| `ai:endpoint` | No | *Full* API endpoint URL (auto-set by provider when available) |
 | `ai:azureapiversion` | No | Azure API version (for `azure-legacy` provider, defaults to `2025-04-01-preview`) |
 | `ai:apitoken` | No | API key/token (not recommended - use secrets instead) |
 | `ai:apitokensecretname` | No | Name of secret containing API token (auto-set by provider) |
@@ -110,6 +111,14 @@ The `ai:capabilities` field specifies what features the AI mode supports:
 - **`images`** - Allows image attachments in chat (model can view uploaded images)
 - **`pdfs`** - Allows PDF file attachments in chat (model can read PDF content)
 
+**Provider-specific behavior:**
+- **OpenAI and Google providers**: Capabilities are automatically configured based on the model. You don't need to specify them.
+- **OpenRouter, Azure, Azure-Legacy, and Custom providers**: You must manually specify capabilities based on your model's features.
+
+:::warning
+If you don't include `"tools"` in the `ai:capabilities` array, the AI model will not be able to interact with your Wave terminal widgets, read/write files, or execute commands. Most AI modes should include `"tools"` for the best Wave experience.
+:::
+
 Most models support `tools` and can benefit from it. Vision-capable models should include `images`. Not all models support PDFs, so only include `pdfs` if your model can process them.
 
 ## Local Model Examples
@@ -127,7 +136,7 @@ Most models support `tools` and can benefit from it. Vision-capable models shoul
     "display:description": "Local Llama 3.3 70B model via Ollama",
     "ai:apitype": "openai-chat",
     "ai:model": "llama3.3:70b",
-    "ai:thinkinglevel": "normal",
+    "ai:thinkinglevel": "medium",
     "ai:endpoint": "http://localhost:11434/v1/chat/completions",
     "ai:apitoken": "ollama"
   }
@@ -151,28 +160,28 @@ The `ai:apitoken` field is required but Ollama ignores it - you can set it to an
     "display:description": "Local Qwen model via LM Studio",
     "ai:apitype": "openai-chat",
     "ai:model": "qwen/qwen-2.5-coder-32b-instruct",
-    "ai:thinkinglevel": "normal",
+    "ai:thinkinglevel": "medium",
     "ai:endpoint": "http://localhost:1234/v1/chat/completions",
     "ai:apitoken": "not-needed"
   }
 }
 ```
 
-### Jan
+### vLLM
 
-[Jan](https://jan.ai) is another local AI runtime with OpenAI API compatibility:
+[vLLM](https://docs.vllm.ai) is a high-performance inference server with OpenAI API compatibility:
 
 ```json
 {
-  "jan-local": {
-    "display:name": "Jan",
+  "vllm-local": {
+    "display:name": "vLLM",
     "display:order": 3,
     "display:icon": "server",
-    "display:description": "Local model via Jan",
+    "display:description": "Local model via vLLM",
     "ai:apitype": "openai-chat",
     "ai:model": "your-model-name",
-    "ai:thinkinglevel": "normal",
-    "ai:endpoint": "http://localhost:1337/v1/chat/completions",
+    "ai:thinkinglevel": "medium",
+    "ai:endpoint": "http://localhost:8000/v1/chat/completions",
     "ai:apitoken": "not-needed"
   }
 }
@@ -198,6 +207,7 @@ The provider automatically sets:
 - `ai:endpoint` to `https://api.openai.com/v1/chat/completions`
 - `ai:apitype` to `openai-chat` (or `openai-responses` for GPT-5+ models)
 - `ai:apitokensecretname` to `OPENAI_KEY` (store your OpenAI API key with this name)
+- `ai:capabilities` to `["tools", "images", "pdfs"]` (automatically determined based on model)
 
 For newer models like GPT-4.1 or GPT-5, the API type is automatically determined:
 
@@ -230,6 +240,40 @@ The provider automatically sets:
 - `ai:apitype` to `openai-chat`
 - `ai:apitokensecretname` to `OPENROUTER_KEY` (store your OpenRouter API key with this name)
 
+:::note
+For OpenRouter, you must manually specify `ai:capabilities` based on your model's features. Example:
+```json
+{
+  "openrouter-qwen": {
+    "display:name": "OpenRouter - Qwen",
+    "ai:provider": "openrouter",
+    "ai:model": "qwen/qwen-2.5-coder-32b-instruct",
+    "ai:capabilities": ["tools"]
+  }
+}
+```
+:::
+
+### Google AI (Gemini)
+
+[Google AI](https://ai.google.dev) provides the Gemini family of models. Using the `google` provider simplifies configuration:
+
+```json
+{
+  "google-gemini": {
+    "display:name": "Gemini 3 Pro",
+    "ai:provider": "google",
+    "ai:model": "gemini-3-pro-preview"
+  }
+}
+```
+
+The provider automatically sets:
+- `ai:endpoint` to `https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent`
+- `ai:apitype` to `google-gemini`
+- `ai:apitokensecretname` to `GOOGLE_AI_KEY` (store your Google AI API key with this name)
+- `ai:capabilities` to `["tools", "images", "pdfs"]` (automatically configured)
+
 ### Azure OpenAI (Modern API)
 
 For the modern Azure OpenAI API, use the `azure` provider:
@@ -250,6 +294,21 @@ The provider automatically sets:
 - `ai:apitype` based on the model
 - `ai:apitokensecretname` to `AZURE_OPENAI_KEY` (store your Azure OpenAI key with this name)
 
+:::note
+For Azure providers, you must manually specify `ai:capabilities` based on your model's features. Example:
+```json
+{
+  "azure-gpt4": {
+    "display:name": "Azure GPT-4",
+    "ai:provider": "azure",
+    "ai:model": "gpt-4",
+    "ai:azureresourcename": "your-resource-name",
+    "ai:capabilities": ["tools", "images"]
+  }
+}
+```
+:::
+
 ### Azure OpenAI (Legacy Deployment API)
 
 For legacy Azure deployments, use the `azure-legacy` provider:
@@ -267,6 +326,10 @@ For legacy Azure deployments, use the `azure-legacy` provider:
 
 The provider automatically constructs the full endpoint URL and sets the API version (defaults to `2025-04-01-preview`). You can override the API version with `ai:azureapiversion` if needed.
 
+:::note
+For Azure Legacy provider, you must manually specify `ai:capabilities` based on your model's features.
+:::
+
 ## Using Secrets for API Keys
 
 Instead of storing API keys directly in the configuration, you should use Wave's secret store to keep your credentials secure. Secrets are stored encrypted using your system's native keychain.

diff --git a/docs/docs/waveai.mdx b/docs/docs/waveai.mdx
@@ -34,7 +34,7 @@ Controls AI's access to your workspace:
 
 ## File Attachments
 
-Drag files onto the AI panel to attach:
+Drag files onto the AI panel to attach (not supported with all models):
 
 | Type | Formats | Size Limit | Notes |
 |------|---------|------------|-------|
@@ -68,7 +68,7 @@ Supports text files, images, PDFs, and directories. Use `-n` for new chat, `-s`
 - **Navigate Web**: Changes URLs in web browser widgets
 
 ### All Widgets
-- **Capture Screenshots**: Takes screenshots of any widget for visual analysis
+- **Capture Screenshots**: Takes screenshots of any widget for visual analysis (not supported on all models)
 
 :::warning Security
 File system operations require explicit approval. You control all file access.

diff --git a/go.mod b/go.mod
@@ -79,6 +79,7 @@ require (
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/outrigdev/goid v0.3.0 // indirect
 	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect

diff --git a/go.sum b/go.sum
@@ -142,6 +142,8 @@ github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuE
 github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/outrigdev/goid v0.3.0 h1:t/otQD3EXc45cLtQVPUnNgEyRaTQA4cPeu3qVcrsIws=
+github.com/outrigdev/goid v0.3.0/go.mod h1:hEH7f27ypN/GHWt/7gvkRoFYR0LZizfUBIAbak4neVE=
 github.com/photostorm/pty v1.1.19-0.20230903182454-31354506054b h1:cLGKfKb1uk0hxI0Q8L83UAJPpeJ+gSpn3cCU/tjd3eg=
 github.com/photostorm/pty v1.1.19-0.20230903182454-31354506054b/go.mod h1:KO+FcPtyLAiRC0hJwreJVvfwc7vnNz77UxBTIGHdPVk=
 github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=

diff --git a/pkg/aiusechat/aiutil/aiutil.go b/pkg/aiusechat/aiutil/aiutil.go
@@ -209,6 +209,13 @@ func CheckModelSubPrefix(model string, prefix string) bool {
 	return false
 }
 
+// GeminiSupportsImageToolResults returns true if the model supports multimodal function responses (images in tool results)
+// This is only supported by Gemini 3 Pro and later models
+func GeminiSupportsImageToolResults(model string) bool {
+	m := strings.ToLower(model)
+	return strings.Contains(m, "gemini-3") || strings.Contains(m, "gemini-4")
+}
+
 // CreateToolUseData creates a UIMessageDataToolUse from tool call information
 func CreateToolUseData(toolCallID, toolName string, arguments string, chatOpts uctypes.WaveChatOpts) uctypes.UIMessageDataToolUse {
 	toolUseData := uctypes.UIMessageDataToolUse{