Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 189 additions & 47 deletions CLAUDE.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions electron/config-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const DEFAULT_CONFIG: Record<string, any> = {
repetitionElimination: true,
toneRules: [],
defaultTone: 'professional',
autoMuteOnRecord: false,
autoLearnDictionary: true,
history: [],
totalWordsThisWeek: 0,
};
Expand Down
161 changes: 150 additions & 11 deletions electron/llm-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,98 @@
* Handles post-processing, rewriting, and connection testing.
*/

/** Smart truncation: keeps beginning + end of long text, with ellipsis in middle */
function smartTruncate(text: string, maxLen: number): string {
if (!text || text.length <= maxLen) return text;
const keepEach = Math.floor((maxLen - 20) / 2);
return text.slice(0, keepEach) + '\n... [truncated] ...\n' + text.slice(-keepEach);
}

/** Truncation limits for each context field (in characters) */
const CONTEXT_LIMITS = {
selectedText: 500,
fieldText: 1500,
fieldTextWithMarker: 2000, // higher to account for cursor/selection markers
clipboardText: 500,
screenContext: 400,
recentTranscription: 200, // per item
recentTotal: 3, // max items
};

/** Truncate text centered around the cursor position, keeping context on both sides */
function cursorCenteredTruncate(text: string, cursorPos: number, maxLen: number): { text: string; adjustedPos: number } {
if (text.length <= maxLen) return { text, adjustedPos: cursorPos };

const ellipsis = '\n... [truncated] ...\n';
const halfWindow = Math.floor((maxLen - ellipsis.length * 2) / 2);
let start = Math.max(0, cursorPos - halfWindow);
let end = Math.min(text.length, cursorPos + halfWindow);

// If one side is shorter, give more to the other
if (start === 0) end = Math.min(text.length, maxLen - ellipsis.length);
if (end === text.length) start = Math.max(0, text.length - maxLen + ellipsis.length);

let result = '';
let adjustedPos = cursorPos;

if (start > 0) {
result = ellipsis;
adjustedPos = cursorPos - start + result.length;
result += text.slice(start, end);
} else {
result = text.slice(0, end);
}

if (end < text.length) {
result += ellipsis;
}

return { text: result, adjustedPos };
}

/** Build rich field context string with cursor/selection markers for the LLM */
function buildFieldContext(context: any): string | null {
const fieldText: string | undefined = context.fieldText;
if (!fieldText) return null;

const range = context.selectionRange as { location: number; length: number } | undefined;
const placeholder = context.fieldPlaceholder as string | undefined;
const label = context.fieldLabel as string | undefined;
const roleDesc = context.fieldRoleDescription || context.fieldRole || 'input field';

// Build descriptor: ("Message body", text area)
const labelPart = label ? `"${label}", ` : '';
const descriptor = `(${labelPart}${roleDesc})`;

if (range && typeof range.location === 'number' && typeof range.length === 'number') {
const loc = range.location;
const len = range.length;

if (len > 0 && loc + len <= fieldText.length) {
// User has selected text — show [SELECTED: ...] marker
const { text: truncated, adjustedPos } = cursorCenteredTruncate(fieldText, loc, CONTEXT_LIMITS.fieldTextWithMarker - 30);
const before = truncated.slice(0, adjustedPos);
const selectedText = truncated.slice(adjustedPos, adjustedPos + len);
const after = truncated.slice(adjustedPos + len);
const markedText = before + '[SELECTED: ' + selectedText + ']' + after;

return `The user selected text to replace with dictation in the ${descriptor}:\n"""\n${markedText}\n"""\nThe dictated text should replace the [SELECTED: ...] portion.`;
} else if (len === 0 && loc <= fieldText.length) {
// Cursor position — show | marker
const { text: truncated, adjustedPos } = cursorCenteredTruncate(fieldText, loc, CONTEXT_LIMITS.fieldTextWithMarker - 10);
const before = truncated.slice(0, adjustedPos);
const after = truncated.slice(adjustedPos);
const markedText = before + '|' + after;

return `Existing text in the ${descriptor}:\n"""\n${markedText}\n"""\n(The "|" marks the cursor position where the dictated text will be inserted.)`;
}
}

// Fallback: no range info, show raw field text
const snippet = smartTruncate(fieldText, CONTEXT_LIMITS.fieldText);
return `Existing text in the ${descriptor}:\n"""\n${snippet}\n"""\nThe dictated text should flow naturally with this existing content.`;
}

export class LLMService {
private getOpts(config: Record<string, any>, provider?: string) {
const p = provider || config.llmProvider || 'siliconflow';
Expand Down Expand Up @@ -52,8 +144,8 @@ export class LLMService {
return content;
}

async process(rawText: string, config: Record<string, any>, context?: any): Promise<string> {
if (!rawText.trim()) return '';
async process(rawText: string, config: Record<string, any>, context?: any): Promise<{ text: string; systemPrompt: string }> {
if (!rawText.trim()) return { text: '', systemPrompt: '' };
const opts = this.getOpts(config);

const parts: string[] = [
Expand Down Expand Up @@ -93,24 +185,55 @@ export class LLMService {
if (context.windowTitle) {
parts.push(`Window title: "${context.windowTitle}"`);
}
if (context.url) {
parts.push(`URL: ${context.url}`);
}
}

if (context?.selectedText) {
const snippet = context.selectedText.slice(0, 500);
parts.push(`\nThe user was editing/viewing this text:\n"""\n${snippet}\n"""\nEnsure the dictation output is consistent and coherent with this context.`);
// L1: Field context with cursor/selection position markers
const fieldCtx = buildFieldContext(context);
if (fieldCtx) {
parts.push(`\n${fieldCtx}`);
} else if (context?.selectedText) {
// Standalone selected text (no field text available)
const snippet = smartTruncate(context.selectedText, CONTEXT_LIMITS.selectedText);
parts.push(`\nThe user had selected this text:\n"""\n${snippet}\n"""\nEnsure the dictation output is consistent and coherent with this context.`);
}

// Field placeholder hint
if (context?.fieldPlaceholder) {
parts.push(`The input field's placeholder reads: "${context.fieldPlaceholder}"`);
}

// Clipboard content
if (context?.clipboardText) {
const snippet = smartTruncate(context.clipboardText, CONTEXT_LIMITS.clipboardText);
parts.push(`\nClipboard content:\n"""\n${snippet}\n"""\nThis may provide additional context for the dictation.`);
}

// Recent transcriptions for continuity
if (context?.recentTranscriptions?.length > 0) {
const recents = context.recentTranscriptions
.slice(0, CONTEXT_LIMITS.recentTotal)
.map((t: string) => smartTruncate(t, CONTEXT_LIMITS.recentTranscription));
parts.push(`\nRecent transcriptions (for continuity):\n${recents.map((r: string, i: number) => `${i + 1}. ${r}`).join('\n')}`);
}

if (context?.screenContext) {
parts.push(`\nScreen context (from OCR): ${context.screenContext}`);
const snippet = smartTruncate(context.screenContext, CONTEXT_LIMITS.screenContext);
parts.push(`\nScreen context (from OCR): ${snippet}`);
}

return this.call({
const systemPrompt = parts.join('\n');
const text = await this.call({
...opts,
messages: [
{ role: 'system', content: parts.join('\n') },
{ role: 'system', content: systemPrompt },
{ role: 'user', content: rawText },
],
});

return { text, systemPrompt };
}

async rewrite(selectedText: string, instruction: string, config: Record<string, any>): Promise<string> {
Expand Down Expand Up @@ -140,6 +263,22 @@ export class LLMService {
const model = config.contextOcrModel || 'Qwen/Qwen2-VL-7B-Instruct';
const opts = this.getOpts(config);

const prompt = [
'Analyze this screenshot to help with voice dictation context. Extract:',
'1. APP: Which application is open',
'2. TASK: What the user is working on (1 sentence)',
'3. KEY TERMS: List any proper nouns, brand names, technical terms, project names, or specialized vocabulary visible on screen (comma-separated)',
'4. TEXT CONTEXT: If there is a text input area, summarize what has been written so far (1 sentence)',
'',
'Format your response exactly as:',
'APP: ...',
'TASK: ...',
'KEY TERMS: ...',
'TEXT CONTEXT: ...',
'',
'Keep each line brief. If a field is not applicable, write "none".',
].join('\n');

const res = await fetch(`${opts.baseUrl}/chat/completions`, {
method: 'POST',
headers: {
Expand All @@ -153,11 +292,11 @@ export class LLMService {
role: 'user',
content: [
{ type: 'image_url', image_url: { url: dataUrl } },
{ type: 'text', text: 'Briefly describe the content visible on screen in 1-2 sentences. Focus on what app is open and what the user is working on. Be concise.' },
{ type: 'text', text: prompt },
],
}],
max_tokens: 200,
temperature: 0.2,
max_tokens: 300,
temperature: 0.1,
}),
});

Expand Down
Loading