diff --git a/aiprompts/usechat-backend-design.md b/aiprompts/usechat-backend-design.md new file mode 100644 index 0000000000..f5793718c1 --- /dev/null +++ b/aiprompts/usechat-backend-design.md @@ -0,0 +1,463 @@ +# useChat Compatible Backend Design for Wave Terminal + +## Overview + +This document outlines how to create a `useChat()` compatible backend API using Go and Server-Sent Events (SSE) to replace the current complex RPC-based AI chat system. The goal is to leverage Vercel AI SDK's `useChat()` hook while maintaining all existing AI provider functionality. + +## Current vs Target Architecture + +### Current Architecture +``` +Frontend (React) → Custom RPC → Go Backend → AI Providers +- 10+ Jotai atoms for state management +- Custom WaveAIStreamRequest/WaveAIPacketType +- Complex configuration merging in frontend +- Custom streaming protocol over WebSocket +``` + +### Target Architecture +``` +Frontend (useChat) → HTTP/SSE → Go Backend → AI Providers +- Single useChat() hook manages all state +- Standard HTTP POST + SSE streaming +- Backend-driven configuration resolution +- Standard AI SDK streaming format +``` + +## API Design + +### 1. Endpoint Structure + +**Chat Streaming Endpoint:** +``` +POST /api/ai/chat/{blockId}?preset={presetKey} +``` + +**Conversation Persistence Endpoints:** +``` +POST /api/ai/conversations/{blockId} # Save conversation +GET /api/ai/conversations/{blockId} # Load conversation +``` + +**Why this approach:** +- `blockId`: Identifies the conversation context (existing Wave concept) +- `preset`: URL parameter for AI configuration preset +- **Separate persistence**: Clean separation of streaming vs storage +- **Fast localhost calls**: Frontend can call both endpoints quickly +- **Simple backend**: Each endpoint has single responsibility + +### 2. Request Format & Message Flow + +**Simplified Approach:** +- Frontend manages **entire conversation state** (like all modern chat apps) +- Frontend sends **complete message history** with each request +- Backend just processes the messages and streams response +- Frontend handles persistence via existing Wave file system + +**Standard useChat() Request:** +```json +{ + "messages": [ + { + "id": "msg-1", + "role": "user", + "content": "Hello world" + }, + { + "id": "msg-2", + "role": "assistant", + "content": "Hi there!" + }, + { + "id": "msg-3", + "role": "user", + "content": "How are you?" // <- NEW message user just typed + } + ] +} +``` + +**Backend Processing:** +1. **Receive complete conversation** from frontend +2. **Resolve AI configuration** (preset, model, etc.) +3. **Send messages directly** to AI provider +4. **Stream response** back to frontend +5. **Frontend calls separate persistence endpoint** when needed + +**Optional Extensions:** +```json +{ + "messages": [...], + "options": { + "temperature": 0.7, + "maxTokens": 1000, + "model": "gpt-4" // Override preset model + } +} +``` + +### 3. Configuration Resolution + +**Priority Order (backend resolves):** +1. **Request options** (highest priority) +2. **URL preset parameter** +3. **Block metadata** (`block.meta["ai:preset"]`) +4. **Global settings** (`settings["ai:preset"]`) +5. **Default preset** (lowest priority) + +**Backend Logic:** +```go +func resolveAIConfig(blockId, presetKey string, requestOptions map[string]any) (*WaveAIOptsType, error) { + // 1. Load block metadata + block := getBlock(blockId) + blockPreset := block.Meta["ai:preset"] + + // 2. Load global settings + settings := getGlobalSettings() + globalPreset := settings["ai:preset"] + + // 3. Resolve preset hierarchy + finalPreset := presetKey + if finalPreset == "" { + finalPreset = blockPreset + } + if finalPreset == "" { + finalPreset = globalPreset + } + if finalPreset == "" { + finalPreset = "default" + } + + // 4. Load and merge preset config + presetConfig := loadPreset(finalPreset) + + // 5. Apply request overrides + return mergeAIConfig(presetConfig, requestOptions), nil +} +``` + +### 4. Response Format (SSE) + +**Key Insight: Minimal Conversion** +Most AI providers (OpenAI, Anthropic) already return SSE streams. Instead of converting to our custom format and back, we can **proxy/transform** their streams directly to useChat format. + +**Headers:** +``` +Content-Type: text/event-stream +Cache-Control: no-cache +Connection: keep-alive +Access-Control-Allow-Origin: * +``` + +**useChat Expected Format:** +``` +data: {"type":"text","text":"Hello"} + +data: {"type":"text","text":" world"} + +data: {"type":"text","text":"!"} + +data: {"type":"finish","finish_reason":"stop","usage":{"prompt_tokens":10,"completion_tokens":3,"total_tokens":13}} + +data: [DONE] +``` + +**Provider Stream Transformation:** +- **OpenAI**: Already SSE → direct proxy (no conversion needed) +- **Anthropic**: Already SSE → direct proxy (minimal field mapping) +- **Google**: Already streaming → direct proxy +- **Perplexity**: OpenAI-compatible → direct proxy +- **Wave Cloud**: WebSocket → **requires conversion** (only one needing transformation) + +**Error Format:** +``` +data: {"type":"error","error":"API key invalid"} + +data: [DONE] +``` + +## Implementation Plan + +### Phase 1: HTTP Handler + +```go +// Simplified approach: Direct provider streaming with minimal transformation +func (s *WshServer) HandleAIChat(w http.ResponseWriter, r *http.Request) { + // 1. Parse URL parameters + blockId := mux.Vars(r)["blockId"] + presetKey := r.URL.Query().Get("preset") + + // 2. Parse request body + var req struct { + Messages []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"messages"` + Options map[string]any `json:"options,omitempty"` + } + json.NewDecoder(r.Body).Decode(&req) + + // 3. Resolve configuration + aiOpts, err := resolveAIConfig(blockId, presetKey, req.Options) + if err != nil { + http.Error(w, err.Error(), 400) + return + } + + // 4. Set SSE headers + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // 5. Route to provider and stream directly + switch aiOpts.APIType { + case "openai", "perplexity": + // Direct proxy - these are already SSE compatible + streamDirectSSE(w, r.Context(), aiOpts, req.Messages) + case "anthropic": + // Direct proxy with minimal field mapping + streamAnthropicSSE(w, r.Context(), aiOpts, req.Messages) + case "google": + // Direct proxy + streamGoogleSSE(w, r.Context(), aiOpts, req.Messages) + default: + // Wave Cloud - only one requiring conversion (WebSocket → SSE) + if isCloudAIRequest(aiOpts) { + streamWaveCloudToUseChat(w, r.Context(), aiOpts, req.Messages) + } else { + http.Error(w, "Unsupported provider", 400) + } + } +} + +// Example: Direct OpenAI streaming (minimal conversion) +func streamOpenAIToUseChat(w http.ResponseWriter, ctx context.Context, opts *WaveAIOptsType, messages []Message) { + client := openai.NewClient(opts.APIToken) + + stream, err := client.CreateChatCompletionStream(ctx, openai.ChatCompletionRequest{ + Model: opts.Model, + Messages: convertToOpenAIMessages(messages), + Stream: true, + }) + if err != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", err.Error()) + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + defer stream.Close() + + for { + response, err := stream.Recv() + if errors.Is(err, io.EOF) { + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + if err != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", err.Error()) + fmt.Fprintf(w, "data: [DONE]\n\n") + return + } + + // Direct transformation: OpenAI format → useChat format + for _, choice := range response.Choices { + if choice.Delta.Content != "" { + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", choice.Delta.Content) + } + if choice.FinishReason != "" { + fmt.Fprintf(w, "data: {\"type\":\"finish\",\"finish_reason\":%q}\n\n", choice.FinishReason) + } + } + + w.(http.Flusher).Flush() + } +} + +// Wave Cloud conversion (only provider needing transformation) +func streamWaveCloudToUseChat(w http.ResponseWriter, ctx context.Context, opts *WaveAIOptsType, messages []Message) { + // Use existing Wave Cloud WebSocket logic + waveReq := wshrpc.WaveAIStreamRequest{ + Opts: opts, + Prompt: convertMessagesToPrompt(messages), + } + + stream := waveai.RunAICommand(ctx, waveReq) // Returns WebSocket stream + + // Convert Wave Cloud packets to useChat SSE format + for packet := range stream { + if packet.Error != nil { + fmt.Fprintf(w, "data: {\"type\":\"error\",\"error\":%q}\n\n", packet.Error.Error()) + break + } + + resp := packet.Response + if resp.Text != "" { + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", resp.Text) + } + if resp.FinishReason != "" { + usage := "" + if resp.Usage != nil { + usage = fmt.Sprintf(",\"usage\":{\"prompt_tokens\":%d,\"completion_tokens\":%d,\"total_tokens\":%d}", + resp.Usage.PromptTokens, resp.Usage.CompletionTokens, resp.Usage.TotalTokens) + } + fmt.Fprintf(w, "data: {\"type\":\"finish\",\"finish_reason\":%q%s}\n\n", resp.FinishReason, usage) + } + + w.(http.Flusher).Flush() + } + + fmt.Fprintf(w, "data: [DONE]\n\n") +} +``` + +### Phase 2: Frontend Integration + +```typescript +import { useChat } from '@ai-sdk/react'; + +function WaveAI({ blockId }: { blockId: string }) { + // Get current preset from block metadata or settings + const preset = useAtomValue(currentPresetAtom); + + const { messages, input, handleInputChange, handleSubmit, isLoading, error } = useChat({ + api: `/api/ai/chat/${blockId}?preset=${preset}`, + initialMessages: [], // Load from existing aidata file + onFinish: (message) => { + // Save conversation to aidata file + saveConversation(blockId, messages); + } + }); + + return ( +
+
+ {messages.map(message => ( +
+ +
+ ))} + {isLoading && } + {error &&
{error.message}
} +
+ +
+ +
+
+ ); +} +``` + +### Phase 3: Advanced Features + +#### Multi-modal Support +```typescript +// useChat supports multi-modal out of the box +const { messages, append } = useChat({ + api: `/api/ai/chat/${blockId}`, +}); + +// Send image + text +await append({ + role: 'user', + content: [ + { type: 'text', text: 'What do you see in this image?' }, + { type: 'image', image: imageFile } + ] +}); +``` + +#### Thinking Models +```go +// Backend detects thinking models and formats appropriately +if isThinkingModel(aiOpts.Model) { + // Send thinking content separately + fmt.Fprintf(w, "data: {\"type\":\"thinking\",\"text\":%q}\n\n", thinkingText) + fmt.Fprintf(w, "data: {\"type\":\"text\",\"text\":%q}\n\n", responseText) +} +``` + +#### Context Injection +```typescript +// Add system messages or context via useChat options +const { messages, append } = useChat({ + api: `/api/ai/chat/${blockId}`, + initialMessages: [ + { + role: 'system', + content: 'You are a helpful terminal assistant...' + } + ] +}); +``` + +## Migration Strategy + +### 1. Parallel Implementation +- Keep existing RPC system running +- Add new HTTP/SSE endpoint alongside +- Feature flag to switch between systems + +### 2. Gradual Migration +- Start with new blocks using useChat +- Migrate existing conversations on first interaction +- Remove RPC system once stable + +### 3. Backward Compatibility +- Existing aidata files work unchanged +- Same provider backends (OpenAI, Anthropic, etc.) +- Same configuration system + +## Benefits + +### Complexity Reduction +- **Frontend**: ~900 lines → ~100 lines (90% reduction) +- **State Management**: 10+ atoms → 1 useChat hook +- **Configuration**: Frontend merging → Backend resolution +- **Streaming**: Custom protocol → Standard SSE + +### Modern Features +- **Multi-modal**: Images, files, audio support +- **Thinking Models**: Built-in reasoning trace support +- **Conversation Management**: Edit, retry, branch conversations +- **Error Handling**: Automatic retry and error boundaries +- **Performance**: Optimized streaming and batching + +### Developer Experience +- **Type Safety**: Full TypeScript support +- **Testing**: Standard HTTP endpoints easier to test +- **Debugging**: Standard browser dev tools work +- **Documentation**: Leverage AI SDK docs and community + +## Configuration Examples + +### URL-based Configuration +``` +POST /api/ai/chat/block-123?preset=claude-coding +POST /api/ai/chat/block-456?preset=gpt4-creative +``` + +### Header-based Overrides +``` +POST /api/ai/chat/block-123 +X-AI-Model: gpt-4-turbo +X-AI-Temperature: 0.8 +``` + +### Request Body Options +```json +{ + "messages": [...], + "options": { + "model": "claude-3-sonnet", + "temperature": 0.7, + "maxTokens": 2000 + } +} +``` + +This design maintains all existing functionality while dramatically simplifying the implementation and adding modern AI chat capabilities. \ No newline at end of file diff --git a/aiprompts/usechat-streamingproto.md b/aiprompts/usechat-streamingproto.md new file mode 100644 index 0000000000..57ab550ba1 --- /dev/null +++ b/aiprompts/usechat-streamingproto.md @@ -0,0 +1,185 @@ +Data Stream Protocol +A data stream follows a special protocol that the AI SDK provides to send information to the frontend. + +The data stream protocol uses Server-Sent Events (SSE) format for improved standardization, keep-alive through ping, reconnect capabilities, and better cache handling. + +When you provide data streams from a custom backend, you need to set the x-vercel-ai-ui-message-stream header to v1. + +The following stream parts are currently supported: + +Message Start Part +Indicates the beginning of a new message with metadata. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"start","messageId":"..."} +Text Parts +Text content is streamed using a start/delta/end pattern with unique IDs for each text block. + +Text Start Part +Indicates the beginning of a text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-start","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d"} +Text Delta Part +Contains incremental text content for the text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-delta","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d","delta":"Hello"} +Text End Part +Indicates the completion of a text block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"text-end","id":"msg_68679a454370819ca74c8eb3d04379630dd1afb72306ca5d"} +Reasoning Parts +Reasoning content is streamed using a start/delta/end pattern with unique IDs for each reasoning block. + +Reasoning Start Part +Indicates the beginning of a reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-start","id":"reasoning_123"} +Reasoning Delta Part +Contains incremental reasoning content for the reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-delta","id":"reasoning_123","delta":"This is some reasoning"} +Reasoning End Part +Indicates the completion of a reasoning block. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"reasoning-end","id":"reasoning_123"} +Source Parts +Source parts provide references to external content sources. + +Source URL Part +References to external URLs. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"source-url","sourceId":"https://example.com","url":"https://example.com"} +Source Document Part +References to documents or files. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"source-document","sourceId":"https://example.com","mediaType":"file","title":"Title"} +File Part +The file parts contain references to files with their media type. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"file","url":"https://example.com/file.png","mediaType":"image/png"} +Data Parts +Custom data parts allow streaming of arbitrary structured data with type-specific handling. + +Format: Server-Sent Event with JSON object where the type includes a custom suffix + +Example: + +data: {"type":"data-weather","data":{"location":"SF","temperature":100}} +The data-\* type pattern allows you to define custom data types that your frontend can handle specifically. + +Error Part +The error parts are appended to the message as they are received. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"error","errorText":"error message"} +Tool Input Start Part +Indicates the beginning of tool input streaming. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-start","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","toolName":"getWeatherInformation"} +Tool Input Delta Part +Incremental chunks of tool input as it's being generated. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-delta","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","inputTextDelta":"San Francisco"} +Tool Input Available Part +Indicates that tool input is complete and ready for execution. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-input-available","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","toolName":"getWeatherInformation","input":{"city":"San Francisco"}} +Tool Output Available Part +Contains the result of tool execution. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"tool-output-available","toolCallId":"call_fJdQDqnXeGxTmr4E3YPSR7Ar","output":{"city":"San Francisco","weather":"sunny"}} +Start Step Part +A part indicating the start of a step. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"start-step"} +Finish Step Part +A part indicating that a step (i.e., one LLM API call in the backend) has been completed. + +This part is necessary to correctly process multiple stitched assistant calls, e.g. when calling tools in the backend, and using steps in useChat at the same time. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"finish-step"} +Finish Message Part +A part indicating the completion of a message. + +Format: Server-Sent Event with JSON object + +Example: + +data: {"type":"finish"} +Stream Termination +The stream ends with a special [DONE] marker. + +Format: Server-Sent Event with literal [DONE] + +Example: + +data: [DONE] +The data stream protocol is supported by useChat and useCompletion on the frontend and used by default. useCompletion only supports the text and data stream parts. + +On the backend, you can use toUIMessageStreamResponse() from the streamText result object to return a streaming HTTP response. diff --git a/cmd/testai/main-testai.go b/cmd/testai/main-testai.go new file mode 100644 index 0000000000..87b34d4bb0 --- /dev/null +++ b/cmd/testai/main-testai.go @@ -0,0 +1,111 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "net/http" + "os" + "time" + + "github.com/wavetermdev/waveterm/pkg/waveai" + "github.com/wavetermdev/waveterm/pkg/wshrpc" +) + +// TestResponseWriter implements http.ResponseWriter and additional interfaces for testing +type TestResponseWriter struct { + header http.Header +} + +func (w *TestResponseWriter) Header() http.Header { + if w.header == nil { + w.header = make(http.Header) + } + return w.header +} + +func (w *TestResponseWriter) Write(data []byte) (int, error) { + fmt.Printf("SSE: %s", string(data)) + return len(data), nil +} + +func (w *TestResponseWriter) WriteHeader(statusCode int) { + fmt.Printf("Status: %d\n", statusCode) +} + +// Implement http.Flusher interface +func (w *TestResponseWriter) Flush() { + // No-op for testing +} + +// Implement interfaces needed by http.ResponseController +func (w *TestResponseWriter) SetWriteDeadline(deadline time.Time) error { + // No-op for testing + return nil +} + +func (w *TestResponseWriter) SetReadDeadline(deadline time.Time) error { + // No-op for testing + return nil +} + +func main() { + if len(os.Args) < 2 { + fmt.Println("Usage: go run main-testai.go [message]") + fmt.Println("Example: go run main-testai.go o4-mini 'What is 2+2?'") + fmt.Println("Set OPENAI_API_KEY environment variable") + os.Exit(1) + } + + apiKey := os.Getenv("OPENAI_API_KEY") + if apiKey == "" { + fmt.Println("Error: OPENAI_API_KEY environment variable not set") + os.Exit(1) + } + + model := os.Args[1] + message := "What is 2+2?" + if len(os.Args) > 2 { + message = os.Args[2] + } + + // Create AI options + opts := &wshrpc.WaveAIOptsType{ + APIToken: apiKey, + Model: model, + MaxTokens: 1000, + } + + // Create messages + messages := []waveai.UseChatMessage{ + { + Role: "user", + Content: message, + }, + } + + fmt.Printf("Testing AI streaming with model: %s\n", model) + fmt.Printf("Message: %s\n", message) + fmt.Println("---") + + // Create a test response writer and SSE handler + ctx := context.Background() + testWriter := &TestResponseWriter{} + sseHandler := waveai.MakeSSEHandlerCh(testWriter, ctx) + + // Setup the SSE handler + err := sseHandler.SetupSSE() + if err != nil { + fmt.Printf("Error setting up SSE: %v\n", err) + return + } + defer sseHandler.Close() + + // Call the streaming function + waveai.StreamOpenAIToUseChat(sseHandler, ctx, opts, messages) + + fmt.Println("---") + fmt.Println("Test completed") +} diff --git a/frontend/app/view/waveai/reasoning.tsx b/frontend/app/view/waveai/reasoning.tsx new file mode 100644 index 0000000000..d334a4f80b --- /dev/null +++ b/frontend/app/view/waveai/reasoning.tsx @@ -0,0 +1,143 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import React, { createContext, memo, useCallback, useContext, useEffect, useState } from "react"; +import { Streamdown } from "streamdown"; + +type ReasoningContextValue = { + isStreaming: boolean; + isOpen: boolean; + setIsOpen: (open: boolean) => void; + duration: number; +}; + +const ReasoningContext = createContext(null); + +const useReasoning = () => { + const context = useContext(ReasoningContext); + if (!context) { + throw new Error("Reasoning components must be used within Reasoning"); + } + return context; +}; + +const AUTO_CLOSE_DELAY = 1000; + +export const Reasoning = memo( + ({ + className, + isStreaming = false, + open, + defaultOpen = false, + onOpenChange, + duration: durationProp = 3, + children, + }: { + className?: string; + isStreaming?: boolean; + open?: boolean; + defaultOpen?: boolean; + onOpenChange?: (open: boolean) => void; + duration?: number; + children: React.ReactNode; + }) => { + const [isOpen, setIsOpenState] = useState(defaultOpen); + const [duration, setDuration] = useState(0); + const [hasAutoClosedRef, setHasAutoClosedRef] = useState(false); + const [startTime, setStartTime] = useState(null); + + const setIsOpen = useCallback( + (newOpen: boolean) => { + setIsOpenState(newOpen); + onOpenChange?.(newOpen); + }, + [onOpenChange] + ); + + // Track duration when streaming starts and ends + useEffect(() => { + if (isStreaming) { + if (startTime === null) { + setStartTime(Date.now()); + } + } else if (startTime !== null) { + setDuration(Math.round((Date.now() - startTime) / 1000)); + setStartTime(null); + } + }, [isStreaming, startTime]); + + // Don't auto-open or auto-close - let user control the state manually + + // Handle controlled open state + useEffect(() => { + if (open !== undefined) { + setIsOpenState(open); + } + }, [open]); + + return ( + +
{children}
+
+ ); + } +); + +export const ReasoningTrigger = memo( + ({ + className, + title = "Reasoning", + children, + onClick, + }: { + className?: string; + title?: string; + children?: React.ReactNode; + onClick?: () => void; + }) => { + const { isStreaming, isOpen, setIsOpen, duration } = useReasoning(); + + const handleClick = useCallback(() => { + setIsOpen(!isOpen); + onClick?.(); + }, [isOpen, setIsOpen, onClick]); + + return ( + + ); + } +); + +export const ReasoningContent = memo(({ className, children }: { className?: string; children: string }) => { + const { isOpen } = useReasoning(); + + if (!isOpen) return null; + + return ( +
+ {children} +
+ ); +}); + +Reasoning.displayName = "Reasoning"; +ReasoningTrigger.displayName = "ReasoningTrigger"; +ReasoningContent.displayName = "ReasoningContent"; diff --git a/frontend/app/view/waveai/waveai.tsx b/frontend/app/view/waveai/waveai.tsx index 048a76b487..fc1a961a48 100644 --- a/frontend/app/view/waveai/waveai.tsx +++ b/frontend/app/view/waveai/waveai.tsx @@ -19,6 +19,7 @@ import { OverlayScrollbarsComponent, OverlayScrollbarsComponentRef } from "overl import { forwardRef, memo, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react"; import { debounce, throttle } from "throttle-debounce"; import "./waveai.scss"; +import { WaveAiUseChat, WaveAiUseChatModel } from "./waveaiusechat"; interface ChatMessageType { id: string; @@ -296,7 +297,15 @@ export class WaveAiModel implements ViewModel { } get viewComponent(): ViewComponent { - return WaveAi; + // Check if we should use the new useChat implementation + const useNewImplementation = this.shouldUseNewImplementation(); + return useNewImplementation ? WaveAiUseChat : WaveAi; + } + + private shouldUseNewImplementation(): boolean { + // For now, check for a meta flag to enable the new implementation + const blockMeta = globalStore.get(this.blockAtom)?.meta ?? {}; + return blockMeta["ai:usechat"] === "true" || blockMeta["ai:usechat"] === true; } dispose() { @@ -685,7 +694,7 @@ const ChatInput = forwardRef( } ); -const WaveAi = ({ model }: { model: WaveAiModel; blockId: string }) => { +const WaveAiOld = ({ model }: { model: WaveAiModel; blockId: string }) => { const { sendMessage } = model.useWaveAi(); const waveaiRef = useRef(null); const chatWindowRef = useRef(null); @@ -879,4 +888,15 @@ const WaveAi = ({ model }: { model: WaveAiModel; blockId: string }) => { ); }; +const WaveAi = ({ model, blockId }: { model: WaveAiModel; blockId: string }) => { + const useNewImplementation = true; + + if (useNewImplementation) { + const useChatModel = useMemo(() => new WaveAiUseChatModel(blockId), [blockId]); + return ; + } + + return ; +}; + export { WaveAi }; diff --git a/frontend/app/view/waveai/waveaiusechat.tsx b/frontend/app/view/waveai/waveaiusechat.tsx new file mode 100644 index 0000000000..1b334e7a03 --- /dev/null +++ b/frontend/app/view/waveai/waveaiusechat.tsx @@ -0,0 +1,657 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +import { Button } from "@/app/element/button"; +import { TypingIndicator } from "@/app/element/typingindicator"; +import { atoms, fetchWaveFile, WOS } from "@/store/global"; +import { BlockService, ObjectService } from "@/store/services"; +import { getWebServerEndpoint } from "@/util/endpoints"; +import { checkKeyPressed } from "@/util/keyutil"; +import { fireAndForget, isBlank, mergeMeta } from "@/util/util"; +import { useChat } from "@ai-sdk/react"; +import { DefaultChatTransport } from "ai"; +import { atom, Atom, useAtomValue } from "jotai"; +import { OverlayScrollbarsComponent, OverlayScrollbarsComponentRef } from "overlayscrollbars-react"; +import React, { forwardRef, memo, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react"; +import { Streamdown } from "streamdown"; +import { debounce, throttle } from "throttle-debounce"; +import { Reasoning, ReasoningContent, ReasoningTrigger } from "./reasoning"; + +interface WaveAiUseChatProps { + blockId: string; + model: WaveAiUseChatModelImpl; +} + +interface ChatMessage { + id: string; + role: "user" | "assistant" | "system"; + content: string; + reasoning?: string; +} + +const slidingWindowSize = 30; + +class WaveAiUseChatModelImpl implements ViewModel { + viewType: string; + blockId: string; + blockAtom: Atom; + presetKey: Atom; + presetMap: Atom<{ [k: string]: MetaType }>; + mergedPresets: Atom; + aiOpts: Atom; + viewIcon?: Atom; + viewName?: Atom; + viewText?: Atom; + endIconButtons?: Atom; + textAreaRef: React.RefObject; + + constructor(blockId: string) { + this.viewType = "waveai"; + this.blockId = blockId; + this.blockAtom = WOS.getWaveObjectAtom(`block:${blockId}`); + this.viewIcon = atom("sparkles"); + this.viewName = atom("Wave AI"); + this.textAreaRef = React.createRef(); + + this.presetKey = atom((get) => { + const metaPresetKey = get(this.blockAtom).meta["ai:preset"]; + const globalPresetKey = get(atoms.settingsAtom)["ai:preset"]; + return metaPresetKey ?? globalPresetKey; + }); + + this.presetMap = atom((get) => { + const fullConfig = get(atoms.fullConfigAtom); + const presets = fullConfig.presets; + const settings = fullConfig.settings; + return Object.fromEntries( + Object.entries(presets) + .filter(([k]) => k.startsWith("ai@")) + .map(([k, v]) => { + const aiPresetKeys = Object.keys(v).filter((k) => k.startsWith("ai:")); + const newV = { ...v }; + newV["display:name"] = + aiPresetKeys.length == 1 && aiPresetKeys.includes("ai:*") + ? `${newV["display:name"] ?? "Default"} (${settings["ai:model"]})` + : newV["display:name"]; + return [k, newV]; + }) + ); + }); + + this.mergedPresets = atom((get) => { + const meta = get(this.blockAtom).meta; + let settings = get(atoms.settingsAtom); + let presetKey = get(this.presetKey); + let presets = get(atoms.fullConfigAtom).presets; + let selectedPresets = presets?.[presetKey] ?? {}; + + let mergedPresets: MetaType = {}; + mergedPresets = mergeMeta(settings, selectedPresets, "ai"); + mergedPresets = mergeMeta(mergedPresets, meta, "ai"); + + return mergedPresets; + }); + + this.aiOpts = atom((get) => { + const mergedPresets = get(this.mergedPresets); + + const opts: WaveAIOptsType = { + model: mergedPresets["ai:model"] ?? null, + apitype: mergedPresets["ai:apitype"] ?? null, + orgid: mergedPresets["ai:orgid"] ?? null, + apitoken: mergedPresets["ai:apitoken"] ?? null, + apiversion: mergedPresets["ai:apiversion"] ?? null, + maxtokens: mergedPresets["ai:maxtokens"] ?? null, + timeoutms: mergedPresets["ai:timeoutms"] ?? 60000, + baseurl: mergedPresets["ai:baseurl"] ?? null, + proxyurl: mergedPresets["ai:proxyurl"] ?? null, + }; + return opts; + }); + + this.viewText = atom((get) => { + const viewTextChildren: HeaderElem[] = []; + const aiOpts = get(this.aiOpts); + const presets = get(this.presetMap); + const presetKey = get(this.presetKey); + const presetName = presets[presetKey]?.["display:name"] ?? ""; + const isCloud = isBlank(aiOpts.apitoken) && isBlank(aiOpts.baseurl); + + // Handle known API providers + switch (aiOpts?.apitype) { + case "anthropic": + viewTextChildren.push({ + elemtype: "iconbutton", + icon: "globe", + title: `Using Remote Anthropic API (${aiOpts.model})`, + noAction: true, + }); + break; + case "perplexity": + viewTextChildren.push({ + elemtype: "iconbutton", + icon: "globe", + title: `Using Remote Perplexity API (${aiOpts.model})`, + noAction: true, + }); + break; + default: + if (isCloud) { + viewTextChildren.push({ + elemtype: "iconbutton", + icon: "cloud", + title: "Using Wave's AI Proxy (gpt-4o-mini)", + noAction: true, + }); + } else { + const baseUrl = aiOpts.baseurl ?? "OpenAI Default Endpoint"; + const modelName = aiOpts.model; + if (baseUrl.startsWith("http://localhost") || baseUrl.startsWith("http://127.0.0.1")) { + viewTextChildren.push({ + elemtype: "iconbutton", + icon: "location-dot", + title: `Using Local Model @ ${baseUrl} (${modelName})`, + noAction: true, + }); + } else { + viewTextChildren.push({ + elemtype: "iconbutton", + icon: "globe", + title: `Using Remote Model @ ${baseUrl} (${modelName})`, + noAction: true, + }); + } + } + } + + const dropdownItems = Object.entries(presets) + .sort((a, b) => ((a[1]["display:order"] ?? 0) > (b[1]["display:order"] ?? 0) ? 1 : -1)) + .map( + (preset) => + ({ + label: preset[1]["display:name"], + onClick: () => + fireAndForget(() => + ObjectService.UpdateObjectMeta(WOS.makeORef("block", this.blockId), { + "ai:preset": preset[0], + }) + ), + }) as MenuItem + ); + + viewTextChildren.push({ + elemtype: "menubutton", + text: presetName, + title: "Select AI Configuration", + items: dropdownItems, + }); + return viewTextChildren; + }); + + this.endIconButtons = atom((_) => { + let clearButton: IconButtonDecl = { + elemtype: "iconbutton", + icon: "delete-left", + title: "Clear Chat History", + click: this.clearMessages.bind(this), + }; + return [clearButton]; + }); + } + + get viewComponent(): ViewComponent { + return WaveAiUseChat; + } + + dispose() { + // No cleanup needed for useChat version + } + + async populateMessages(): Promise { + const history = await this.fetchAiData(); + return history.map((msg) => ({ + id: crypto.randomUUID(), + role: msg.role as "user" | "assistant" | "system", + content: msg.content, + })); + } + + async fetchAiData(): Promise> { + const { data } = await fetchWaveFile(this.blockId, "aidata"); + if (!data) { + return []; + } + const history: Array = JSON.parse(new TextDecoder().decode(data)); + return history.slice(Math.max(history.length - slidingWindowSize, 0)); + } + + async saveMessages(messages: ChatMessage[]): Promise { + const history: WaveAIPromptMessageType[] = messages.map((msg) => ({ + role: msg.role, + content: msg.content, + })); + await BlockService.SaveWaveAiData(this.blockId, history); + } + + giveFocus(): boolean { + if (this?.textAreaRef?.current) { + this.textAreaRef.current?.focus(); + return true; + } + return false; + } + + async clearMessages() { + await BlockService.SaveWaveAiData(this.blockId, []); + } + + keyDownHandler(waveEvent: WaveKeyboardEvent): boolean { + if (checkKeyPressed(waveEvent, "Cmd:l")) { + fireAndForget(this.clearMessages.bind(this)); + return true; + } + return false; + } +} + +const ChatWindow = memo( + forwardRef< + OverlayScrollbarsComponentRef, + { messages: ChatMessage[]; isLoading: boolean; error: Error | null; fontSize?: string; fixedFontSize?: string } + >(({ messages, isLoading, error, fontSize, fixedFontSize }, ref) => { + const osRef = useRef(null); + const [userHasScrolled, setUserHasScrolled] = useState(false); + const [shouldAutoScroll, setShouldAutoScroll] = useState(true); + + useImperativeHandle(ref, () => osRef.current!, []); + + const scrollToBottom = useCallback(() => { + if (osRef.current && shouldAutoScroll) { + const viewport = osRef.current.osInstance()?.elements().viewport; + if (viewport) { + viewport.scrollTop = viewport.scrollHeight; + } + } + }, [shouldAutoScroll]); + + const handleScroll = useMemo( + () => + throttle(100, () => { + if (osRef.current) { + const viewport = osRef.current.osInstance()?.elements().viewport; + if (viewport) { + const { scrollTop, scrollHeight, clientHeight } = viewport; + const isNearBottom = scrollHeight - scrollTop - clientHeight < 100; + setShouldAutoScroll(isNearBottom); + if (!isNearBottom && !userHasScrolled) { + setUserHasScrolled(true); + } + } + } + }), + [userHasScrolled] + ); + + const resetUserScroll = useMemo( + () => + debounce(300, () => { + setUserHasScrolled(false); + }), + [] + ); + + useEffect(() => { + scrollToBottom(); + }, [messages, isLoading, scrollToBottom]); + + useEffect(() => { + if (shouldAutoScroll && userHasScrolled) { + resetUserScroll(); + } + }, [shouldAutoScroll, userHasScrolled, resetUserScroll]); + + return ( +
+ +
+ {messages.map((message, index) => { + // Only the last assistant message should be streaming when isLoading is true + const isLastAssistantMessage = + message.role === "assistant" && index === messages.length - 1; + const isCurrentlyStreaming = isLoading && isLastAssistantMessage; + + return ( + + ); + })} + {error && ( +
+
+ +
+
+
+ Error: {error.message} +
+
+
+ )} +
+
+
+ ); + }) +); +ChatWindow.displayName = "ChatWindow"; + +const ChatItem = memo( + ({ + message, + fontSize, + fixedFontSize, + isStreaming = false, + }: { + message: ChatMessage; + fontSize?: string; + fixedFontSize?: string; + isStreaming?: boolean; + }) => { + const { role, content, reasoning } = message; + + if (role === "user") { + return ( +
+
+ + {content} + +
+
+ +
+
+ ); + } + + if (role === "assistant") { + return ( +
+
+ +
+
+ {reasoning && ( +
+ + + {reasoning || ""} + +
+ )} + {content ? ( +
+ + {content} + +
+ ) : ( +
+ +
+ )} +
+
+ ); + } + + return null; + } +); +ChatItem.displayName = "ChatItem"; + +const ChatInput = memo( + ({ + input, + handleInputChange, + handleSubmit, + isLoading, + textAreaRef, + }: { + input: string; + handleInputChange: (e: React.ChangeEvent) => void; + handleSubmit: (e: React.FormEvent) => void; + isLoading: boolean; + textAreaRef: React.RefObject; + }) => { + const [textAreaHeight, setTextAreaHeight] = useState(25); + const maxLines = 5; + const lineHeight = 17; + const minHeight = 25; + const maxHeight = minHeight + (maxLines - 1) * lineHeight; + + const adjustTextAreaHeight = useCallback(() => { + if (textAreaRef.current) { + const textArea = textAreaRef.current; + textArea.style.height = `${minHeight}px`; + const scrollHeight = textArea.scrollHeight; + const newHeight = Math.min(Math.max(scrollHeight, minHeight), maxHeight); + setTextAreaHeight(newHeight); + textArea.style.height = `${newHeight}px`; + } + }, [textAreaRef, minHeight, maxHeight]); + + useEffect(() => { + adjustTextAreaHeight(); + }, [input, adjustTextAreaHeight]); + + const handleKeyDown = useCallback( + (event: React.KeyboardEvent) => { + if (event.key === "Enter" && !event.shiftKey) { + event.preventDefault(); + handleSubmit(event as any); + return; + } + }, + [handleSubmit] + ); + + return ( +
+
+
+