From 2ca0c62f8cd8e660d99a2b6f2b242ac2663fd6d7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 08:10:11 +0000 Subject: [PATCH] feat: add offline privacy-first speech-to-text using Web Worker - Implement `useWhisper` hook for audio recording and worker communication - Add `workers/worker.js` to handle Xenova/whisper-tiny.en model inference - Update `PromptInputBox` to include microphone button and recording state UI - Ensure main thread is not blocked during model loading and inference --- components/ui/ai-prompt-box.tsx | 45 +- hooks/useWhisper.ts | 179 +++++++ package-lock.json | 842 +++++++++++++++++++++++++++++++- package.json | 1 + workers/worker.js | 55 +++ 5 files changed, 1115 insertions(+), 7 deletions(-) create mode 100644 hooks/useWhisper.ts create mode 100644 workers/worker.js diff --git a/components/ui/ai-prompt-box.tsx b/components/ui/ai-prompt-box.tsx index 30f12b4..0d763f9 100644 --- a/components/ui/ai-prompt-box.tsx +++ b/components/ui/ai-prompt-box.tsx @@ -2,9 +2,10 @@ import React from "react"; import * as TooltipPrimitive from "@radix-ui/react-tooltip"; import * as DialogPrimitive from "@radix-ui/react-dialog"; import * as PopoverPrimitive from "@radix-ui/react-popover"; -import { ArrowUp, Paperclip, Square, X, Globe, BrainCog, ChevronDown, Brain, ShieldAlert, Layout, MessageCircleQuestion, Link as LinkIcon } from "lucide-react"; +import { ArrowUp, Paperclip, Square, X, Globe, BrainCog, ChevronDown, Brain, ShieldAlert, Layout, MessageCircleQuestion, Link as LinkIcon, Mic, Loader2, StopCircle } from "lucide-react"; import { motion, AnimatePresence } from "framer-motion"; import { PERSONAS, PersonaKey } from "../../constants"; +import { useWhisper } from "../../hooks/useWhisper"; const cn = (...classes: (string | undefined | null | false)[]) => classes.filter(Boolean).join(" "); @@ -156,6 +157,13 @@ export const PromptInputBox = React.forwardRef(null); const fileInputRef = React.useRef(null); + const { isRecording, isTranscribing, isLoadingModel, loadingProgress, startRecording, stopRecording } = useWhisper((text) => { + setInput(prev => { + const spacer = prev.length > 0 && !prev.endsWith(' ') ? ' ' : ''; + return prev + spacer + text; + }); + }); + React.useEffect(() => { if (textareaRef.current) { textareaRef.current.style.height = "auto"; @@ -374,6 +382,41 @@ export const PromptInputBox = React.forwardRefAttach Image + + + + + + {isLoadingModel ? "Downloading Model..." : + isTranscribing ? "Transcribing..." : + isRecording ? "Stop Recording" : "Voice Input"} + + +