diff --git a/cmd/testsummarize/main-testsummarize.go b/cmd/testsummarize/main-testsummarize.go new file mode 100644 index 0000000000..fc16e59e04 --- /dev/null +++ b/cmd/testsummarize/main-testsummarize.go @@ -0,0 +1,104 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "flag" + "fmt" + "os" + "time" + + "github.com/wavetermdev/waveterm/pkg/aiusechat/google" +) + +func printUsage() { + fmt.Println("Usage: go run main-testsummarize.go [--help] [--mode MODE] ") + fmt.Println("Examples:") + fmt.Println(" go run main-testsummarize.go README.md") + fmt.Println(" go run main-testsummarize.go --mode useful /path/to/image.png") + fmt.Println(" go run main-testsummarize.go -m publiccode document.pdf") + fmt.Println("") + fmt.Println("Supported file types:") + fmt.Println(" - Text files (up to 200KB)") + fmt.Println(" - Images (up to 7MB)") + fmt.Println(" - PDFs (up to 5MB)") + fmt.Println("") + fmt.Println("Flags:") + fmt.Println(" --mode, -m Summarization mode (default: quick)") + fmt.Println(" Options: quick, useful, publiccode, htmlcontent, htmlfull") + fmt.Println("") + fmt.Println("Environment variables:") + fmt.Println(" GOOGLE_APIKEY (required)") +} + +func main() { + var showHelp bool + var mode string + flag.BoolVar(&showHelp, "help", false, "Show usage information") + flag.StringVar(&mode, "mode", "quick", "Summarization mode") + flag.StringVar(&mode, "m", "quick", "Summarization mode (shorthand)") + flag.Parse() + + if showHelp { + printUsage() + os.Exit(0) + } + + apiKey := os.Getenv("GOOGLE_APIKEY") + if apiKey == "" { + fmt.Println("Error: GOOGLE_APIKEY environment variable not set") + printUsage() + os.Exit(1) + } + + args := flag.Args() + if len(args) == 0 { + fmt.Println("Error: filename required") + printUsage() + os.Exit(1) + } + + filename := args[0] + + // Check if file exists + if _, err := os.Stat(filename); os.IsNotExist(err) { + fmt.Printf("Error: file '%s' does not exist\n", filename) + os.Exit(1) + } + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + fmt.Printf("Summarizing file: %s\n", filename) + fmt.Printf("Model: %s\n", google.SummarizeModel) + fmt.Printf("Mode: %s\n", mode) + + startTime := time.Now() + summary, usage, err := google.SummarizeFile(ctx, filename, google.SummarizeOpts{ + APIKey: apiKey, + Mode: mode, + }) + latency := time.Since(startTime) + + fmt.Printf("Latency: %d ms\n", latency.Milliseconds()) + fmt.Println("===") + if err != nil { + fmt.Printf("Error: %v\n", err) + os.Exit(1) + } + + fmt.Println("\nSummary:") + fmt.Println("---") + fmt.Println(summary) + fmt.Println("---") + + if usage != nil { + fmt.Println("\nUsage Statistics:") + fmt.Printf(" Prompt tokens: %d\n", usage.PromptTokenCount) + fmt.Printf(" Cached tokens: %d\n", usage.CachedContentTokenCount) + fmt.Printf(" Response tokens: %d\n", usage.CandidatesTokenCount) + fmt.Printf(" Total tokens: %d\n", usage.TotalTokenCount) + } +} \ No newline at end of file diff --git a/pkg/aiusechat/google/doc.go b/pkg/aiusechat/google/doc.go new file mode 100644 index 0000000000..caab8a4ecd --- /dev/null +++ b/pkg/aiusechat/google/doc.go @@ -0,0 +1,41 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Package google provides Google Generative AI integration for WaveTerm. +// +// This package implements file summarization using Google's Gemini models. +// Unlike other AI provider implementations in the aiusechat package, this +// package does NOT implement full SSE streaming. It uses a simple +// request-response API for file summarization. +// +// # Supported File Types +// +// The package supports the same file types as defined in wshcmd-ai.go: +// - Images (PNG, JPEG, etc.): up to 7MB +// - PDFs: up to 5MB +// - Text files: up to 200KB +// +// Binary files are rejected unless they are recognized as images or PDFs. +// +// # Usage +// +// To summarize a file: +// +// ctx := context.Background() +// summary, usage, err := google.SummarizeFile(ctx, "/path/to/file.txt", google.SummarizeOpts{ +// APIKey: "YOUR_API_KEY", +// Mode: google.ModeQuickSummary, +// }) +// if err != nil { +// log.Fatal(err) +// } +// fmt.Println("Summary:", summary) +// fmt.Printf("Tokens used: %d\n", usage.TotalTokenCount) +// +// # Configuration +// +// The summarization behavior can be customized by modifying the constants: +// - SummarizeModel: The Gemini model to use (default: "gemini-2.5-flash-lite") +// - SummarizePrompt: The prompt sent to the model +// - GoogleAPIURL: The base URL for the API (for reference, not currently used by the SDK) +package google diff --git a/pkg/aiusechat/google/google-summarize.go b/pkg/aiusechat/google/google-summarize.go new file mode 100644 index 0000000000..67d3cd3eb5 --- /dev/null +++ b/pkg/aiusechat/google/google-summarize.go @@ -0,0 +1,283 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package google + +import ( + "context" + "fmt" + "net/http" + "os" + "strings" + + "github.com/google/generative-ai-go/genai" + "github.com/wavetermdev/waveterm/pkg/util/utilfn" + "google.golang.org/api/option" +) + +const ( + // GoogleAPIURL is the base URL for the Google Generative AI API + GoogleAPIURL = "https://generativelanguage.googleapis.com" + + // SummarizeModel is the model used for file summarization + SummarizeModel = "gemini-2.5-flash-lite" + + // Mode constants + ModeQuickSummary = "quick" + ModeUseful = "useful" + ModePublicCode = "publiccode" + ModeHTMLContent = "htmlcontent" + ModeHTMLFull = "htmlfull" + + // SummarizePrompt is the default prompt used for file summarization + SummarizePrompt = "Please provide a concise summary of this file. Include the main topics, key points, and any notable information." + + // QuickSummaryPrompt is the prompt for quick file summaries + QuickSummaryPrompt = `Summarize the following file for another AI agent that is deciding which files to read. + +If the content is HTML or web page markup, ignore layout elements such as headers, footers, sidebars, navigation menus, cookie banners, pop-ups, ads, and search boxes. +Focus only on the visible main content that describes the page’s subject or purpose. + +Keep the summary extremely concise — one or two sentences at most. +Explain what the file appears to be and its main purpose or contents. +If it's code, mention the language and what it implements (e.g., a CLI, library, test, or config). +Avoid speculation or verbose explanations. +Do not include markdown, bullets, or formatting — just a plain text summary.` + + // UsefulSummaryPrompt is the prompt for useful file summaries with more detail + UsefulSummaryPrompt = `You are summarizing a single file so that another AI agent can understand its purpose and structure. + +If the content is HTML or web page markup, ignore layout elements such as headers, footers, sidebars, navigation menus, cookie banners, pop-ups, ads, and search boxes. +Focus only on the visible main content that describes the page’s subject or purpose. + +Start with a short overview (2–4 sentences) describing the overall purpose of the file. +If the file is large (more than about 150 lines) or has multiple major sections or functions, +then briefly summarize each major section (1–2 sentences per section) and include an approximate line range in parentheses like "(lines 80–220)". + +Keep section summaries extremely concise — only include the most important parts or entry points. +If it's code, mention key functions or classes and what they do. +If it's documentation, describe key topics or sections. +If it's a data or config file, summarize the structure and purpose of the values. + +Never produce more text than would fit comfortably on one screen (roughly under 200 words total). +Plain text only — no lists, no markdown, no JSON.` + + // PublicCodeSummaryPrompt is the prompt for public API summaries + PublicCodeSummaryPrompt = `You are summarizing a SINGLE source file to expose its PUBLIC API to another AI client. + +GOAL +Produce a compact, header-like listing of all PUBLIC symbols callers would use. + +OUTPUT FORMAT (plain text only; no bullets/markdown/JSON): +1) Public data structures required by public functions (types/structs/interfaces/enums/const groups): + (lines A–B) + + +2) Public functions/methods in order of appearance: + (lines A–B) + + +RULES +- PUBLIC means exported/externally visible for the language (Go: capitalized; Java/C#/TS: public; Rust: pub; Python: not underscore-prefixed, etc.). +- Include ALL public functions/methods. +- Include public data structures ONLY if referenced by any public function OR commonly constructed/consumed by callers. +- For multi-line declarations, emit a single-line canonical form by collapsing internal whitespace while preserving tokens and order. +- The one-line comment is either a compressed docstring or, if absent, a concise inferred purpose (≤ 20 words). +- Include approximate line ranges as "(lines A–B)". +- Skip private helpers, tests, examples, and internal-only constants. +- Preserve generics/annotations/modifiers as they appear (e.g., type params, async, const, noexcept). +- No preface or epilogue text—just the listing. + +EXAMPLE STYLE (illustrative; use the target language's comment syntax): +// Configuration for the proxy (lines 10–42) +type ProxyConfig struct { ... } + +// Creates and configures a new proxy instance (lines 60–92) +func NewProxy(cfg ProxyConfig) (*Proxy, error) + +// Handles a single HTTP request (lines 95–168) +func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request)` + + // HTMLContentPrompt is the prompt for converting HTML to content-focused Markdown + HTMLContentPrompt = `Convert the following stripped HTML into clean Markdown for READING CONTENT ONLY. + +- Output Markdown ONLY (no explanations, no JSON, no code fences). +- Keep document title as a single H1 if present (from or first <h1>). +- Preserve headings (h1–h6), paragraphs, strong/emphasis, inline code. +- Convert <a> to [text](absolute_url). If href is relative, resolve against BASE_URL: {{BASE_URL}}. Do not output javascript:void links. +- Preserve lists (ul/ol, nested), blockquotes, and code blocks (<pre><code>) as fenced code (include language if obvious). +- Convert tables to Markdown tables; keep header row; include up to 50 data rows, then append "… (more rows)". +- Keep images ONLY if alt text is descriptive; render as ![alt](absolute_url). Skip tracking pixels and decorative images. +- Discard navigation, site header/footer, sidebars, cookie banners, search bars, newsletter/signup, social share, repetitive link clouds, and legal boilerplate unless they are the ONLY content. +- Preserve in-page structure order; do not invent content; do not summarize prose—extract faithfully. +- Normalize whitespace, collapse repeated blank lines to one. +` + + // HTMLFullPrompt is the prompt for converting HTML to navigation-focused Markdown + HTMLFullPrompt = `Convert the following stripped HTML into Markdown optimized for SITE NAVIGATION. + +- Output Markdown ONLY (no explanations, no JSON, no code fences). +- Start with a top-level title (from <title> or first <h1>) as H1. +- Include primary navigation as a section "## Navigation" with bullet lists of top-level links (use visible link text; dedupe exact duplicates). +- Include secondary nav/footer links under "## Footer Links". +- Then extract the main page content as Markdown (headings, paragraphs, lists, blockquotes, code blocks). +- Convert <a> to [text](absolute_url). If href is relative, resolve against BASE_URL: {{BASE_URL}}. +- Convert tables to Markdown tables; keep header + up to 50 rows, then "… (more rows)". +- Keep images with meaningful alt as ![alt](absolute_url); otherwise skip. +- Preserve order as it appears in the page; do not summarize prose—extract faithfully. +- Normalize whitespace; collapse repeated blank lines.` +) + +// SummarizeOpts contains options for file summarization +type SummarizeOpts struct { + APIKey string + Mode string +} + +// GoogleUsage represents token usage information from Google's Generative AI API +type GoogleUsage struct { + PromptTokenCount int32 `json:"prompt_token_count"` + CachedContentTokenCount int32 `json:"cached_content_token_count"` + CandidatesTokenCount int32 `json:"candidates_token_count"` + TotalTokenCount int32 `json:"total_token_count"` +} + +func detectMimeType(data []byte) string { + mimeType := http.DetectContentType(data) + return strings.Split(mimeType, ";")[0] +} + +func getMaxFileSize(mimeType, mode string) (int, string) { + if mimeType == "application/pdf" { + return 5 * 1024 * 1024, "5MB" + } + if strings.HasPrefix(mimeType, "image/") { + return 7 * 1024 * 1024, "7MB" + } + if mode == ModeHTMLContent || mode == ModeHTMLFull { + return 500 * 1024, "500KB" + } + return 200 * 1024, "200KB" +} + +// SummarizeFile reads a file and generates a summary using Google's Generative AI. +// It supports images, PDFs, and text files based on the limits defined in wshcmd-ai.go. +// Returns the summary text, usage information, and any error encountered. +func SummarizeFile(ctx context.Context, filename string, opts SummarizeOpts) (string, *GoogleUsage, error) { + if opts.Mode == "" { + return "", nil, fmt.Errorf("mode is required") + } + + // Read the file + data, err := os.ReadFile(filename) + if err != nil { + return "", nil, fmt.Errorf("reading file: %w", err) + } + + // Detect MIME type + mimeType := detectMimeType(data) + + isPDF := mimeType == "application/pdf" + isImage := strings.HasPrefix(mimeType, "image/") + + if !isPDF && !isImage { + mimeType = "text/plain" + if utilfn.ContainsBinaryData(data) { + return "", nil, fmt.Errorf("file contains binary data and cannot be summarized") + } + } + + // Validate file size + maxSize, sizeStr := getMaxFileSize(mimeType, opts.Mode) + if len(data) > maxSize { + return "", nil, fmt.Errorf("file exceeds maximum size of %s for %s files", sizeStr, mimeType) + } + + // Create client + client, err := genai.NewClient(ctx, option.WithAPIKey(opts.APIKey)) + if err != nil { + return "", nil, fmt.Errorf("creating Google AI client: %w", err) + } + defer client.Close() + + // Create model + model := client.GenerativeModel(SummarizeModel) + + // Select prompt based on mode + var prompt string + switch opts.Mode { + case ModeQuickSummary: + prompt = QuickSummaryPrompt + case ModeUseful: + prompt = UsefulSummaryPrompt + case ModePublicCode: + prompt = PublicCodeSummaryPrompt + case ModeHTMLContent: + prompt = HTMLContentPrompt + case ModeHTMLFull: + prompt = HTMLFullPrompt + default: + prompt = SummarizePrompt + } + + // Prepare the content parts + var parts []genai.Part + + // Add the prompt + parts = append(parts, genai.Text(prompt)) + + // Add the file content based on type + if isImage { + // For images, use Blob + parts = append(parts, genai.Blob{ + MIMEType: mimeType, + Data: data, + }) + } else if isPDF { + // For PDFs, use Blob + parts = append(parts, genai.Blob{ + MIMEType: mimeType, + Data: data, + }) + } else { + // For text files, convert to string + parts = append(parts, genai.Text(string(data))) + } + + // Generate content + resp, err := model.GenerateContent(ctx, parts...) + if err != nil { + return "", nil, fmt.Errorf("generating content: %w", err) + } + + // Check if we got any candidates + if len(resp.Candidates) == 0 { + return "", nil, fmt.Errorf("no response candidates returned") + } + + // Extract the text from the first candidate + candidate := resp.Candidates[0] + if candidate.Content == nil || len(candidate.Content.Parts) == 0 { + return "", nil, fmt.Errorf("no content in response") + } + + var summary strings.Builder + for _, part := range candidate.Content.Parts { + if textPart, ok := part.(genai.Text); ok { + summary.WriteString(string(textPart)) + } + } + + // Convert usage metadata + var usage *GoogleUsage + if resp.UsageMetadata != nil { + usage = &GoogleUsage{ + PromptTokenCount: resp.UsageMetadata.PromptTokenCount, + CachedContentTokenCount: resp.UsageMetadata.CachedContentTokenCount, + CandidatesTokenCount: resp.UsageMetadata.CandidatesTokenCount, + TotalTokenCount: resp.UsageMetadata.TotalTokenCount, + } + } + + return summary.String(), usage, nil +} diff --git a/pkg/aiusechat/google/google-summarize_test.go b/pkg/aiusechat/google/google-summarize_test.go new file mode 100644 index 0000000000..1dd1c45733 --- /dev/null +++ b/pkg/aiusechat/google/google-summarize_test.go @@ -0,0 +1,130 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package google + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" +) + +func TestDetectMimeType(t *testing.T) { + tests := []struct { + name string + data []byte + expected string + }{ + { + name: "plain text", + data: []byte("Hello, World!"), + expected: "text/plain", + }, + { + name: "empty file", + data: []byte{}, + expected: "text/plain", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := detectMimeType(tt.data) + if !containsMimeType(result, tt.expected) { + t.Errorf("detectMimeType() = %v, want to contain %v", result, tt.expected) + } + }) + } +} + +func containsMimeType(got, want string) bool { + // DetectContentType may return variations like "text/plain; charset=utf-8" + return got == want || (want == "text/plain" && got == "text/plain; charset=utf-8") +} + +func TestSummarizeFile_FileNotFound(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + _, _, err := SummarizeFile(ctx, "/nonexistent/file.txt", SummarizeOpts{ + APIKey: "fake-api-key", + Mode: ModeQuickSummary, + }) + if err == nil { + t.Error("SummarizeFile() expected error for nonexistent file, got nil") + } +} + +func TestSummarizeFile_BinaryFile(t *testing.T) { + // Create a temporary binary file + tmpDir := t.TempDir() + binFile := filepath.Join(tmpDir, "test.bin") + + // Create binary data (not text, image, or PDF) + binaryData := []byte{0x00, 0x01, 0x02, 0x03, 0x7F, 0x80, 0xFF} + if err := os.WriteFile(binFile, binaryData, 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + _, _, err := SummarizeFile(ctx, binFile, SummarizeOpts{ + APIKey: "fake-api-key", + Mode: ModeQuickSummary, + }) + if err == nil { + t.Error("SummarizeFile() expected error for binary file, got nil") + } + if err != nil && !containsString(err.Error(), "binary data") { + t.Errorf("SummarizeFile() error = %v, want error containing 'binary data'", err) + } +} + +func TestSummarizeFile_FileTooLarge(t *testing.T) { + // Create a temporary text file that exceeds the limit + tmpDir := t.TempDir() + textFile := filepath.Join(tmpDir, "large.txt") + + // Create a file larger than 200KB (text file limit) + largeData := make([]byte, 201*1024) + for i := range largeData { + largeData[i] = 'a' + } + if err := os.WriteFile(textFile, largeData, 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + _, _, err := SummarizeFile(ctx, textFile, SummarizeOpts{ + APIKey: "fake-api-key", + Mode: ModeQuickSummary, + }) + if err == nil { + t.Error("SummarizeFile() expected error for file too large, got nil") + } + if err != nil && !containsString(err.Error(), "exceeds maximum size") { + t.Errorf("SummarizeFile() error = %v, want error containing 'exceeds maximum size'", err) + } +} + +func containsString(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(substr) == 0 || + (len(s) > 0 && len(substr) > 0 && stringContains(s, substr))) +} + +func stringContains(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +// Note: We don't test the actual API call without a real API key +// Integration tests would require setting GOOGLE_API_KEY environment variable