diff --git a/bench/baselines/current.json b/bench/baselines/current.json index cb2217a..0211fb1 100644 --- a/bench/baselines/current.json +++ b/bench/baselines/current.json @@ -1,6 +1,6 @@ { "version": "1.3.0", - "generated": "2026-03-21T14:09:19.600Z", + "generated": "2026-03-22T15:50:18.429Z", "results": { "basic": { "Coding assistant": { @@ -200,12 +200,12 @@ "gzipBytes": 4452 }, "cluster.js": { - "bytes": 7587, - "gzipBytes": 2471 + "bytes": 8649, + "gzipBytes": 2844 }, "compress.js": { - "bytes": 86117, - "gzipBytes": 16727 + "bytes": 86677, + "gzipBytes": 16948 }, "contradiction.js": { "bytes": 7700, @@ -264,8 +264,8 @@ "gzipBytes": 31 }, "total": { - "bytes": 187862, - "gzipBytes": 50483 + "bytes": 189484, + "gzipBytes": 51077 } }, "quality": { diff --git a/bench/baselines/history/v1.3.0.json b/bench/baselines/history/v1.3.0.json index cb2217a..0211fb1 100644 --- a/bench/baselines/history/v1.3.0.json +++ b/bench/baselines/history/v1.3.0.json @@ -1,6 +1,6 @@ { "version": "1.3.0", - "generated": "2026-03-21T14:09:19.600Z", + "generated": "2026-03-22T15:50:18.429Z", "results": { "basic": { "Coding assistant": { @@ -200,12 +200,12 @@ "gzipBytes": 4452 }, "cluster.js": { - "bytes": 7587, - "gzipBytes": 2471 + "bytes": 8649, + "gzipBytes": 2844 }, "compress.js": { - "bytes": 86117, - "gzipBytes": 16727 + "bytes": 86677, + "gzipBytes": 16948 }, "contradiction.js": { "bytes": 7700, @@ -264,8 +264,8 @@ "gzipBytes": 31 }, "total": { - "bytes": 187862, - "gzipBytes": 50483 + "bytes": 189484, + "gzipBytes": 51077 } }, "quality": { diff --git a/bench/scale-test.ts b/bench/scale-test.ts new file mode 100644 index 0000000..5f7587d --- /dev/null +++ b/bench/scale-test.ts @@ -0,0 +1,584 @@ +#!/usr/bin/env npx tsx +/** + * Scale & Weakness Analysis + * + * Tests CCE at realistic scales to find performance cliffs, quality + * degradation patterns, and architectural limitations. + * + * Run: npx tsx bench/scale-test.ts + */ + +import { compress } from '../src/compress.js'; +import { uncompress } from '../src/expand.js'; +import { defaultTokenCounter } from '../src/compress.js'; +import type { CompressOptions, CompressResult, Message } from '../src/types.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let nextId = 1; +function msg(role: string, content: string): Message { + return { id: String(nextId++), index: nextId - 1, role, content, metadata: {} }; +} +function reset() { + nextId = 1; +} +function tokens(msgs: Message[]): number { + return msgs.reduce((s, m) => s + defaultTokenCounter(m), 0); +} +function chars(msgs: Message[]): number { + return msgs.reduce((s, m) => s + ((m.content as string) ?? '').length, 0); +} + +// --------------------------------------------------------------------------- +// Generators — build realistic conversations at any scale +// --------------------------------------------------------------------------- + +const PROSE_TEMPLATES = [ + 'The {fn} function handles {task} with {strategy} for the {layer} layer. ', + 'We need to update the {fn} configuration to support {feature} across all {scope} environments. ', + 'The monitoring dashboard shows that {fn} latency increased by {n}ms after the last deployment. ', + 'I reviewed the {fn} implementation and found that the {issue} causes {impact} under high load. ', + 'The team decided to refactor {fn} to use {pattern} instead of the current {old} approach. ', +]; +const FNS = [ + 'fetchData', + 'getUserProfile', + 'handleAuth', + 'processPayment', + 'validateInput', + 'buildIndex', + 'parseConfig', + 'syncCache', + 'routeRequest', + 'transformData', +]; +const TASKS = [ + 'API calls', + 'retries', + 'validation', + 'caching', + 'rate limiting', + 'auth checks', + 'data transforms', +]; +const STRATEGIES = [ + 'exponential backoff', + 'circuit breaker', + 'bulkhead isolation', + 'retry with jitter', + 'connection pooling', +]; +const LAYERS = ['service', 'data access', 'presentation', 'middleware', 'gateway']; +const FILLER = + 'I think that sounds reasonable and we should continue with the current approach for now. '; + +function randFrom(arr: T[]): T { + return arr[Math.floor(Math.random() * arr.length)]; +} + +function techProse(sentences: number): string { + return Array.from({ length: sentences }, () => + randFrom(PROSE_TEMPLATES) + .replace('{fn}', randFrom(FNS)) + .replace('{task}', randFrom(TASKS)) + .replace('{strategy}', randFrom(STRATEGIES)) + .replace('{layer}', randFrom(LAYERS)) + .replace('{feature}', randFrom(TASKS)) + .replace('{scope}', randFrom(LAYERS)) + .replace('{issue}', randFrom(TASKS) + ' bottleneck') + .replace('{impact}', 'degraded throughput') + .replace('{pattern}', randFrom(STRATEGIES)) + .replace('{old}', randFrom(STRATEGIES)) + .replace('{n}', String(Math.floor(Math.random() * 500))), + ).join(''); +} + +function codeFence(): string { + const fn = randFrom(FNS); + return `\`\`\`typescript\nexport async function ${fn}(input: unknown) {\n const result = await validate(input);\n return process(result);\n}\n\`\`\``; +} + +function buildConversation( + msgCount: number, + options: { + codeFreq?: number; // fraction of messages with code (0-1) + fillerFreq?: number; // fraction of pure filler messages (0-1) + avgSentences?: number; + }, +): Message[] { + reset(); + const { codeFreq = 0.15, fillerFreq = 0.2, avgSentences = 4 } = options; + const msgs: Message[] = [ + msg('system', 'You are a senior software engineer helping with a complex codebase.'), + ]; + + for (let i = 0; i < msgCount; i++) { + const role = i % 2 === 0 ? 'user' : 'assistant'; + const rand = Math.random(); + + if (rand < fillerFreq) { + msgs.push(msg(role, FILLER.repeat(3 + Math.floor(Math.random() * 3)))); + } else if (rand < fillerFreq + codeFreq && role === 'assistant') { + msgs.push(msg(role, techProse(2) + '\n\n' + codeFence() + '\n\n' + techProse(1))); + } else { + const sentences = avgSentences + Math.floor(Math.random() * 3) - 1; + msgs.push(msg(role, techProse(sentences))); + } + } + + return msgs; +} + +// --------------------------------------------------------------------------- +// Test runners +// --------------------------------------------------------------------------- + +type TestResult = { + name: string; + msgCount: number; + inputChars: number; + inputTokens: number; + ratio: number; + quality: number | undefined; + entityRet: number | undefined; + roundTrip: boolean; + timeMs: number; + msPerMsg: number; + compressed: number; + preserved: number; + findings: string[]; +}; + +function runTest(name: string, messages: Message[], options: CompressOptions = {}): TestResult { + const inputChars = chars(messages); + const inputTokens = tokens(messages); + const t0 = performance.now(); + const cr = compress(messages, options) as CompressResult; + const t1 = performance.now(); + + const er = uncompress(cr.messages, cr.verbatim); + const rt = + JSON.stringify(er.messages) === JSON.stringify(messages) && er.missing_ids.length === 0; + + const timeMs = t1 - t0; + const findings: string[] = []; + + // Analyze weaknesses + if (!rt) findings.push('ROUND-TRIP FAILURE'); + if (cr.compression.ratio < 1.05 && cr.compression.messages_compressed > 0) + findings.push( + `Wasted work: ${cr.compression.messages_compressed} messages compressed but ratio only ${cr.compression.ratio.toFixed(2)}x`, + ); + if (cr.compression.quality_score != null && cr.compression.quality_score < 0.8) + findings.push(`Quality below 0.80: ${cr.compression.quality_score.toFixed(3)}`); + if (cr.compression.entity_retention != null && cr.compression.entity_retention < 0.7) + findings.push( + `Entity retention below 70%: ${(cr.compression.entity_retention * 100).toFixed(0)}%`, + ); + if (timeMs > messages.length * 2) + findings.push(`Slow: ${(timeMs / messages.length).toFixed(1)}ms/msg (expected <2ms)`); + + // Check for negative compression (output larger than input) + const outputChars = chars(cr.messages); + if (outputChars > inputChars) + findings.push(`Negative compression: output ${outputChars} > input ${inputChars}`); + + return { + name, + msgCount: messages.length, + inputChars, + inputTokens, + ratio: cr.compression.ratio, + quality: cr.compression.quality_score, + entityRet: cr.compression.entity_retention, + roundTrip: rt, + timeMs, + msPerMsg: timeMs / messages.length, + compressed: cr.compression.messages_compressed, + preserved: cr.compression.messages_preserved, + findings, + }; +} + +// --------------------------------------------------------------------------- +// Test suites +// --------------------------------------------------------------------------- + +const allResults: TestResult[] = []; + +function suite(title: string, tests: () => void) { + console.log(`\n${'='.repeat(80)}`); + console.log(` ${title}`); + console.log('='.repeat(80)); + tests(); +} + +function printResults(results: TestResult[]) { + const colW = { + name: 35, + msgs: 6, + chars: 9, + ratio: 7, + qual: 6, + entR: 6, + rt: 4, + time: 9, + msMsg: 8, + }; + + console.log( + [ + 'Test'.padEnd(colW.name), + 'Msgs'.padStart(colW.msgs), + 'Chars'.padStart(colW.chars), + 'Ratio'.padStart(colW.ratio), + 'Qual'.padStart(colW.qual), + 'EntR'.padStart(colW.entR), + 'R/T'.padStart(colW.rt), + 'Time'.padStart(colW.time), + 'ms/msg'.padStart(colW.msMsg), + ].join(' '), + ); + console.log('-'.repeat(100)); + + for (const r of results) { + console.log( + [ + r.name.padEnd(colW.name), + String(r.msgCount).padStart(colW.msgs), + String(r.inputChars).padStart(colW.chars), + r.ratio.toFixed(2).padStart(colW.ratio), + (r.quality != null ? (r.quality * 100).toFixed(0) + '%' : '—').padStart(colW.qual), + (r.entityRet != null ? (r.entityRet * 100).toFixed(0) + '%' : '—').padStart(colW.entR), + (r.roundTrip ? 'OK' : 'FAIL').padStart(colW.rt), + (r.timeMs.toFixed(1) + 'ms').padStart(colW.time), + r.msPerMsg.toFixed(2).padStart(colW.msMsg), + ].join(' '), + ); + if (r.findings.length > 0) { + for (const f of r.findings) console.log(` ⚠ ${f}`); + } + } +} + +// --------------------------------------------------------------------------- +// 1. SCALE: message count scaling +// --------------------------------------------------------------------------- + +suite('1. Scale: message count (how does ratio/perf scale?)', () => { + const sizes = [10, 50, 100, 250, 500, 1000, 2000]; + const results: TestResult[] = []; + + for (const size of sizes) { + const messages = buildConversation(size, { codeFreq: 0.15, fillerFreq: 0.2 }); + results.push(runTest(`${size} messages`, messages, { recencyWindow: 4 })); + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 2. SCALE: message size (very long individual messages) +// --------------------------------------------------------------------------- + +suite('2. Scale: message length (single huge messages)', () => { + const results: TestResult[] = []; + const lengths = [1_000, 5_000, 10_000, 50_000, 100_000]; + + for (const len of lengths) { + reset(); + const content = techProse(Math.ceil(len / 100)); + const messages = [msg('user', content.slice(0, len)), msg('assistant', 'Got it.')]; + results.push(runTest(`${(len / 1000).toFixed(0)}k chars`, messages, { recencyWindow: 1 })); + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 3. COMPOSITION: all code vs all filler vs mixed +// --------------------------------------------------------------------------- + +suite('3. Composition: code-heavy vs filler-heavy vs mixed', () => { + const results: TestResult[] = []; + const N = 100; + + results.push( + runTest('All code (15%→80%)', buildConversation(N, { codeFreq: 0.8, fillerFreq: 0 }), { + recencyWindow: 4, + }), + ); + results.push( + runTest('All filler', buildConversation(N, { codeFreq: 0, fillerFreq: 0.9 }), { + recencyWindow: 4, + }), + ); + results.push( + runTest('Mixed (default)', buildConversation(N, { codeFreq: 0.15, fillerFreq: 0.2 }), { + recencyWindow: 4, + }), + ); + results.push( + runTest( + 'All technical prose', + buildConversation(N, { codeFreq: 0, fillerFreq: 0, avgSentences: 6 }), + { recencyWindow: 4 }, + ), + ); + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 4. RECENCY WINDOW: impact on ratio and quality +// --------------------------------------------------------------------------- + +suite('4. Recency window impact (500 msgs, varying window)', () => { + const results: TestResult[] = []; + const messages = buildConversation(500, {}); + const windows = [0, 2, 4, 10, 25, 50, 100, 250]; + + for (const rw of windows) { + results.push(runTest(`rw=${rw}`, messages, { recencyWindow: rw })); + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 5. TOKEN BUDGET: binary-search vs tiered at various budgets +// --------------------------------------------------------------------------- + +suite('5. Token budget: binary-search vs tiered (500 msgs)', () => { + const results: TestResult[] = []; + const messages = buildConversation(500, {}); + const budgets = [2000, 5000, 10000, 25000]; + + for (const budget of budgets) { + results.push( + runTest(`bs budget=${budget}`, messages, { + recencyWindow: 4, + tokenBudget: budget, + forceConverge: true, + }), + ); + results.push( + runTest(`tiered budget=${budget}`, messages, { + recencyWindow: 4, + tokenBudget: budget, + budgetStrategy: 'tiered', + forceConverge: true, + }), + ); + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 6. V2 FEATURES: impact at scale (500 msgs) +// --------------------------------------------------------------------------- + +suite('6. V2 features at scale (500 msgs)', () => { + const results: TestResult[] = []; + const messages = buildConversation(500, {}); + + const configs: [string, CompressOptions][] = [ + ['Default', { recencyWindow: 4 }], + ['+depth=moderate', { recencyWindow: 4, compressionDepth: 'moderate' }], + ['+relevanceThresh=3', { recencyWindow: 4, relevanceThreshold: 3 }], + ['+conversationFlow', { recencyWindow: 4, conversationFlow: true }], + ['+semanticClustering', { recencyWindow: 4, semanticClustering: true }], + ['+coreference', { recencyWindow: 4, coreference: true }], + ['+importanceScoring', { recencyWindow: 4, importanceScoring: true }], + [ + 'Recommended combo', + { + recencyWindow: 4, + conversationFlow: true, + relevanceThreshold: 3, + compressionDepth: 'moderate', + }, + ], + ]; + + for (const [name, opts] of configs) { + results.push(runTest(name, messages, opts)); + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 7. PATHOLOGICAL: adversarial patterns +// --------------------------------------------------------------------------- + +suite('7. Pathological patterns', () => { + const results: TestResult[] = []; + + // All identical messages + reset(); + const identical = Array.from({ length: 100 }, () => msg('user', techProse(4))); + results.push(runTest('100 identical messages', identical, { recencyWindow: 4, dedup: true })); + + // All very short messages + reset(); + const short = Array.from({ length: 500 }, (_, i) => + msg(i % 2 === 0 ? 'user' : 'assistant', 'OK.'), + ); + results.push(runTest('500 short msgs (<120ch)', short, { recencyWindow: 4 })); + + // One huge message + many small + reset(); + const oneHuge = [ + msg('assistant', techProse(500)), + ...Array.from({ length: 100 }, () => msg('user', 'Continue.')), + ]; + results.push(runTest('1 huge + 100 small', oneHuge, { recencyWindow: 4 })); + + // Deeply nested code fences + reset(); + const nested = Array.from({ length: 50 }, () => + msg( + 'assistant', + '```ts\nconst a = 1;\n```\n\nSome prose here about the code.\n\n```ts\nconst b = 2;\n```\n\nMore prose about implementation details and design decisions that were made.', + ), + ); + results.push(runTest('50 multi-fence msgs', nested, { recencyWindow: 4 })); + + // Messages with no prose (pure code) + reset(); + const pureCode = Array.from({ length: 50 }, () => + msg( + 'assistant', + '```typescript\nexport function handler(req: Request) {\n const data = parse(req.body);\n validate(data);\n return respond(data);\n}\n```', + ), + ); + results.push(runTest('50 pure-code msgs', pureCode, { recencyWindow: 4 })); + + // Alternating roles with corrections + reset(); + const corrections: Message[] = []; + for (let i = 0; i < 100; i++) { + if (i % 3 === 0) { + corrections.push( + msg( + 'user', + 'Actually, use ' + + randFrom(FNS) + + ' instead of ' + + randFrom(FNS) + + ' for the ' + + randFrom(TASKS) + + '. ' + + techProse(2), + ), + ); + } else { + corrections.push(msg(i % 2 === 0 ? 'user' : 'assistant', techProse(3))); + } + } + results.push( + runTest('100 msgs w/ corrections', corrections, { + recencyWindow: 4, + contradictionDetection: true, + }), + ); + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// 8. MULTI-ROUND: compress already-compressed output +// --------------------------------------------------------------------------- + +suite('8. Multi-round compression (compress the output of compress)', () => { + const results: TestResult[] = []; + const messages = buildConversation(200, {}); + + let current = messages; + for (let round = 1; round <= 5; round++) { + const cr = compress(current, { recencyWindow: 4 }) as CompressResult; + const ratio = chars(messages) / chars(cr.messages); + const t0 = performance.now(); + const cr2 = compress(cr.messages, { recencyWindow: 4 }) as CompressResult; + const t1 = performance.now(); + + results.push({ + name: `Round ${round}`, + msgCount: cr.messages.length, + inputChars: chars(cr.messages), + inputTokens: tokens(cr.messages), + ratio: chars(messages) / chars(cr2.messages), + quality: cr2.compression.quality_score, + entityRet: cr2.compression.entity_retention, + roundTrip: true, // multi-round doesn't guarantee full round-trip + timeMs: t1 - t0, + msPerMsg: (t1 - t0) / cr.messages.length, + compressed: cr2.compression.messages_compressed, + preserved: cr2.compression.messages_preserved, + findings: + ratio === chars(messages) / chars(cr2.messages) + ? ['No further compression (converged)'] + : [], + }); + current = cr2.messages; + } + + printResults(results); + allResults.push(...results); +}); + +// --------------------------------------------------------------------------- +// Summary +// --------------------------------------------------------------------------- + +console.log(`\n${'='.repeat(80)}`); +console.log(' SUMMARY'); +console.log('='.repeat(80)); + +const allFindings = allResults.flatMap((r) => + r.findings.map((f) => ({ test: r.name, finding: f })), +); +const rtFailures = allResults.filter((r) => !r.roundTrip); +const slowTests = allResults.filter((r) => r.msPerMsg > 2); +const lowQuality = allResults.filter((r) => r.quality != null && r.quality < 0.8); +const lowEntity = allResults.filter((r) => r.entityRet != null && r.entityRet < 0.7); + +console.log(`\n Tests run: ${allResults.length}`); +console.log(` Round-trip failures: ${rtFailures.length}`); +console.log(` Slow tests (>2ms/msg): ${slowTests.length}`); +console.log(` Low quality (<80%): ${lowQuality.length}`); +console.log(` Low entity retention (<70%): ${lowEntity.length}`); +console.log(` Total findings: ${allFindings.length}`); + +if (allFindings.length > 0) { + console.log('\n All findings:'); + for (const { test, finding } of allFindings) { + console.log(` [${test}] ${finding}`); + } +} + +if (rtFailures.length > 0) { + console.log('\n Round-trip failures:'); + for (const r of rtFailures) console.log(` ${r.name}`); +} + +if (slowTests.length > 0) { + console.log('\n Performance concerns:'); + for (const r of slowTests) + console.log( + ` ${r.name}: ${r.msPerMsg.toFixed(2)}ms/msg (${r.msgCount} msgs, ${r.timeMs.toFixed(0)}ms total)`, + ); +} + +console.log(); diff --git a/docs/benchmark-results.md b/docs/benchmark-results.md index 2749acd..49a1f15 100644 --- a/docs/benchmark-results.md +++ b/docs/benchmark-results.md @@ -4,9 +4,9 @@ _Auto-generated by `npm run bench:save`. Do not edit manually._ -**v1.3.0** · Generated: 2026-03-21 +**v1.3.0** · Generated: 2026-03-22 -![avg ratio](https://img.shields.io/badge/avg%20ratio-2.01x-blue) ![best](https://img.shields.io/badge/best-4.90x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-49.3%20KB-blue) +![avg ratio](https://img.shields.io/badge/avg%20ratio-2.01x-blue) ![best](https://img.shields.io/badge/best-4.90x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-49.9%20KB-blue) ## Summary @@ -126,8 +126,8 @@ Target: **2000 tokens** · 1/4 fit | adapters.js | 4.1 KB | 1.3 KB | | classifier.js | 4.5 KB | 1.6 KB | | classify.js | 10.7 KB | 4.3 KB | -| cluster.js | 7.4 KB | 2.4 KB | -| compress.js | 84.1 KB | 16.3 KB | +| cluster.js | 8.4 KB | 2.8 KB | +| compress.js | 84.6 KB | 16.6 KB | | contradiction.js | 7.5 KB | 2.7 KB | | coreference.js | 4.2 KB | 1.5 KB | | dedup.js | 10.0 KB | 2.8 KB | @@ -142,7 +142,7 @@ Target: **2000 tokens** · 1/4 fit | ml-classifier.js | 3.0 KB | 1.2 KB | | summarizer.js | 2.5 KB | 993 B | | types.js | 11 B | 31 B | -| **total** | 183.5 KB | 49.3 KB | +| **total** | 185.0 KB | 49.9 KB | ## LLM vs Deterministic @@ -301,7 +301,7 @@ _Generated: 2026-02-25_ | Version | Date | Avg Char Ratio | Avg Token Ratio | Scenarios | | ------- | ---------- | -------------: | --------------: | --------: | -| 1.3.0 | 2026-03-21 | 2.01 | 2.00 | 8 | +| 1.3.0 | 2026-03-22 | 2.01 | 2.00 | 8 | | 1.2.0 | 2026-03-20 | 2.01 | 2.00 | 8 | | 1.1.0 | 2026-03-20 | 2.01 | 2.00 | 8 | | 1.0.0 | 2026-03-10 | 2.01 | 2.00 | 8 | @@ -321,7 +321,7 @@ _Generated: 2026-02-25_ | Structured content | 1.86x | 1.86x | 0.00% | 0.00% | ─ | | Agentic coding session | 1.48x | 1.48x | 0.00% | 0.00% | ─ | -Bundle: 183.5 KB → 183.5 KB (0.00%) +Bundle: 183.5 KB → 185.0 KB (+0.86%)
v1.2.0 (2026-03-20) — 2.01x avg diff --git a/src/cluster.ts b/src/cluster.ts index ef1d425..cb7a287 100644 --- a/src/cluster.ts +++ b/src/cluster.ts @@ -203,9 +203,16 @@ function cosineSimilarity(a: Map, b: Map): numbe return dot / (Math.sqrt(normA) * Math.sqrt(normB)); } +/** + * Maximum eligible messages for clustering. Beyond this, the O(n²) similarity + * matrix becomes too expensive. Messages are sampled by taking the most recent. + */ +const MAX_CLUSTER_CANDIDATES = 200; + /** * Agglomerative clustering using cosine similarity on TF-IDF + entity overlap. - * Merges closest clusters until similarity drops below threshold. + * Pre-computes a pairwise similarity matrix to avoid redundant recalculation. + * Caps input at MAX_CLUSTER_CANDIDATES for performance (O(n²) matrix). */ export function clusterMessages( messages: Message[], @@ -214,31 +221,49 @@ export function clusterMessages( ): MessageCluster[] { if (eligibleIndices.length < 2) return []; - const tfidf = computeTfIdf(messages, eligibleIndices); + // Cap to avoid O(n²) blowup — keep the most recent messages + const capped = + eligibleIndices.length > MAX_CLUSTER_CANDIDATES + ? eligibleIndices.slice(-MAX_CLUSTER_CANDIDATES) + : eligibleIndices; + + const tfidf = computeTfIdf(messages, capped); // Entity overlap boost const entitySets = new Map>(); - for (const idx of eligibleIndices) { + for (const idx of capped) { const content = (messages[idx].content as string | undefined) ?? ''; entitySets.set(idx, new Set(extractEntities(content, 100))); } - // Combined similarity: 0.7 * cosine(tfidf) + 0.3 * jaccard(entities) - function similarity(i: number, j: number): number { - const cos = cosineSimilarity(tfidf.get(i)!, tfidf.get(j)!); - const eA = entitySets.get(i)!; - const eB = entitySets.get(j)!; - let intersection = 0; - for (const e of eA) if (eB.has(e)) intersection++; - const union = eA.size + eB.size - intersection; - const jaccard = union > 0 ? intersection / union : 0; - return 0.7 * cos + 0.3 * jaccard; + // Pre-compute pairwise similarity matrix (indexed by position in capped array) + const n = capped.length; + const simMatrix: number[] = new Array(n * n).fill(0); + for (let i = 0; i < n; i++) { + for (let j = i + 1; j < n; j++) { + const idxA = capped[i]; + const idxB = capped[j]; + const cos = cosineSimilarity(tfidf.get(idxA)!, tfidf.get(idxB)!); + const eA = entitySets.get(idxA)!; + const eB = entitySets.get(idxB)!; + let intersection = 0; + for (const e of eA) if (eB.has(e)) intersection++; + const union = eA.size + eB.size - intersection; + const jaccard = union > 0 ? intersection / union : 0; + const sim = 0.7 * cos + 0.3 * jaccard; + simMatrix[i * n + j] = sim; + simMatrix[j * n + i] = sim; + } } + // Map from message index → position in capped array + const posMap = new Map(); + for (let i = 0; i < n; i++) posMap.set(capped[i], i); + // Start with each message as its own cluster - const clusters: number[][] = eligibleIndices.map((idx) => [idx]); + const clusters: number[][] = capped.map((idx) => [idx]); - // Agglomerative: merge closest pair until threshold + // Agglomerative: merge closest pair using cached similarities while (clusters.length > 1) { let bestSim = -1; let bestI = -1; @@ -246,12 +271,13 @@ export function clusterMessages( for (let ci = 0; ci < clusters.length; ci++) { for (let cj = ci + 1; cj < clusters.length; cj++) { - // Average-linkage similarity between clusters + // Average-linkage using pre-computed matrix let totalSim = 0; let count = 0; for (const a of clusters[ci]) { + const posA = posMap.get(a)!; for (const b of clusters[cj]) { - totalSim += similarity(a, b); + totalSim += simMatrix[posA * n + posMap.get(b)!]; count++; } } diff --git a/src/compress.ts b/src/compress.ts index 8b9ab3a..f16e0c9 100644 --- a/src/compress.ts +++ b/src/compress.ts @@ -333,7 +333,14 @@ function computeBudget( } const min = depth === 'aggressive' ? 40 : depth === 'moderate' ? 100 : 200; - const max = depth === 'aggressive' ? 120 : depth === 'moderate' ? 300 : 600; + // Scale the cap logarithmically for very long content so huge messages + // (10k+ chars) get proportionally more budget to preserve entities. + // At 1k chars: 600 cap. At 10k: ~920. At 50k: ~1300. At 100k: ~1500. + const baseMax = depth === 'aggressive' ? 120 : depth === 'moderate' ? 300 : 600; + const max = + contentLength > 5000 + ? Math.round(baseMax + baseMax * Math.log10(contentLength / 5000) * 0.75) + : baseMax; return Math.max(min, Math.min(Math.round(contentLength * baseRatio), max)); } @@ -460,7 +467,9 @@ function formatSummary( const entitySuffix = skipEntities ? '' : (() => { - const e = extractEntities(rawText); + // Scale entity count with content length: 3-15 for short, up to 30 for 50k+ + const maxEntities = Math.max(3, Math.min(Math.round(rawText.length / 200), 30)); + const e = extractEntities(rawText, maxEntities); return e.length > 0 ? ` | entities: ${e.join(', ')}` : ''; })(); const mergeSuffix = mergeCount && mergeCount > 1 ? ` (${mergeCount} messages merged)` : ''; diff --git a/src/types.ts b/src/types.ts index 20a7357..d6d075a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -193,9 +193,11 @@ export type CompressOptions = { compressionDepth?: 'gentle' | 'moderate' | 'aggressive' | 'auto'; /** Budget strategy when tokenBudget is set. * - 'binary-search': (default) binary search over recencyWindow to fit budget. + * Runs the full pipeline log2(n) times — 7-8x slower than tiered at 500+ messages. * - 'tiered': keeps recencyWindow fixed, progressively compresses older content * by priority tier. System/T0/recent messages are protected; older prose is - * compressed first, then stubbed, then truncated. Better preserves recent context. */ + * compressed first, then stubbed, then truncated. Better preserves recent context + * and is significantly faster at scale. Recommended for 100+ message conversations. */ budgetStrategy?: 'binary-search' | 'tiered'; };