From ce15ce72a5a7d0a565eb03df330d2e9968353cd0 Mon Sep 17 00:00:00 2001 From: "haozhe.yang" Date: Mon, 25 May 2026 14:06:23 +0800 Subject: [PATCH] fix(agent-core): tier anti-repeat reminders at streak counts 3, 5, and 8 Replace the single threshold of 7 with tiered reminders triggered at streak counts 3, 5, and 8.\nThe first reminder is a generic nudge, while the second and third include\nthe tool name, repeat count, and arguments for stronger guidance. --- .../agent-core/src/agent/turn/tool-dedup.ts | 51 +++++--- .../test/agent/turn/tool-dedup.test.ts | 114 +++++++++++++++--- 2 files changed, 128 insertions(+), 37 deletions(-) diff --git a/packages/agent-core/src/agent/turn/tool-dedup.ts b/packages/agent-core/src/agent/turn/tool-dedup.ts index c360589..e90aad0 100644 --- a/packages/agent-core/src/agent/turn/tool-dedup.ts +++ b/packages/agent-core/src/agent/turn/tool-dedup.ts @@ -4,15 +4,28 @@ import type { ExecutableToolResult } from '../../loop/types'; import { canonicalTelemetryArgs } from './canonical-args'; -const CROSS_STEP_DEDUP_TRIGGER_COUNT = 7; - -const REMINDER_TEXT = +const REMINDER_TEXT_1 = '\n\n\n' + 'You are repeating the exact same tool call with identical parameters.' + ' Please carefully analyze the previous result. If the task is not yet complete,' + ' try a different method or parameters instead of repeating the same call.' + '\n'; +function makeReminderText2(toolName: string, repeatCount: number, args: unknown): string { + const argsStr = canonicalTelemetryArgs(args); + return ( + '\n\n\n' + + 'You have repeatedly called the same tool with identical parameters many times.\n' + + 'Repeated tool call detected:\n' + + `- tool: ${toolName}\n` + + `- repeated_times: ${String(repeatCount)}\n` + + `- arguments: ${argsStr}\n` + + 'The previous repeated calls did not make progress. Do not call this exact same tool with the exact same arguments again.\n' + + 'Carefully inspect the latest tool result and choose a different next action, different parameters, or finish the task if enough evidence has been gathered.' + + '\n' + ); +} + interface Deferred { readonly promise: Promise; resolve(value: T): void; @@ -30,18 +43,18 @@ function makeKey(toolName: string, args: unknown): string { return `${toolName} ${canonicalTelemetryArgs(args)}`; } -function appendReminder(result: ExecutableToolResult): ExecutableToolResult { +function appendReminder(result: ExecutableToolResult, reminderText: string): ExecutableToolResult { const output = result.output; let newOutput: string | ContentPart[]; if (typeof output === 'string') { - newOutput = output + REMINDER_TEXT; + newOutput = output + reminderText; } else { const arr: ContentPart[] = [...output]; const last = arr.at(-1); if (last !== undefined && last.type === 'text') { - arr[arr.length - 1] = { type: 'text', text: last.text + REMINDER_TEXT }; + arr[arr.length - 1] = { type: 'text', text: last.text + reminderText }; } else { - arr.push({ type: 'text', text: REMINDER_TEXT }); + arr.push({ type: 'text', text: reminderText }); } newOutput = arr; } @@ -67,10 +80,10 @@ const DEDUP_PLACEHOLDER_RESULT: ExecutableToolResult = { output: '' }; * Two behaviours are layered: * - Same-step dedup: a duplicate `(toolName, args)` issued in the same LLM step * reuses the original call's result instead of executing the tool twice. - * - Cross-step dedup: when the exact same call is repeated for - * `CROSS_STEP_DEDUP_TRIGGER_COUNT` consecutive occurrences (counting across - * steps), the result returned to the model is suffixed with a system reminder - * nudging it to try a different approach. + * - Cross-step dedup: when the exact same call is repeated consecutively + * across steps, the result returned to the model is suffixed with a system + * reminder at specific streak thresholds (3, 5, and 8) to nudge the model + * to try a different approach. */ export class ToolCallDeduplicator { private stepDeferreds = new Map>(); @@ -150,8 +163,8 @@ export class ToolCallDeduplicator { */ async finalizeResult( toolCallId: string, - _toolName: string, - _args: unknown, + toolName: string, + args: unknown, result: ExecutableToolResult, ): Promise { // Use the key recorded at registration time, NOT a fresh key from the args @@ -181,8 +194,12 @@ export class ToolCallDeduplicator { } } - const finalResult = - streak >= CROSS_STEP_DEDUP_TRIGGER_COUNT ? appendReminder(result) : result; + let finalResult = result; + if (streak === 3) { + finalResult = appendReminder(result, REMINDER_TEXT_1); + } else if (streak === 5 || streak === 8) { + finalResult = appendReminder(result, makeReminderText2(toolName, streak, args)); + } this.stepDeferreds.get(key)?.resolve(finalResult); return finalResult; @@ -190,6 +207,6 @@ export class ToolCallDeduplicator { } export const __testing = { - CROSS_STEP_DEDUP_TRIGGER_COUNT, - REMINDER_TEXT, + REMINDER_TEXT_1, + makeReminderText2, }; diff --git a/packages/agent-core/test/agent/turn/tool-dedup.test.ts b/packages/agent-core/test/agent/turn/tool-dedup.test.ts index 964f0e4..8e2247b 100644 --- a/packages/agent-core/test/agent/turn/tool-dedup.test.ts +++ b/packages/agent-core/test/agent/turn/tool-dedup.test.ts @@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest'; import type { ExecutableToolResult } from '../../../src/loop/types'; import { ToolCallDeduplicator, __testing } from '../../../src/agent/turn/tool-dedup'; -const { REMINDER_TEXT, CROSS_STEP_DEDUP_TRIGGER_COUNT } = __testing; +const { REMINDER_TEXT_1, makeReminderText2 } = __testing; function okResult(text: string): ExecutableToolResult { return { output: text }; @@ -73,10 +73,10 @@ describe('ToolCallDeduplicator', () => { }); describe('cross-step streak', () => { - it(`does not inject reminder below ${String(CROSS_STEP_DEDUP_TRIGGER_COUNT)} consecutive`, async () => { + it('does not inject reminder below 3 consecutive', async () => { const dedup = new ToolCallDeduplicator(); let last: ExecutableToolResult | undefined; - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); dedup.endStep(); @@ -85,22 +85,72 @@ describe('ToolCallDeduplicator', () => { expect(last!.output as string).not.toContain(''); }); - it(`injects reminder at exactly ${String(CROSS_STEP_DEDUP_TRIGGER_COUNT)} consecutive`, async () => { + it('injects reminder1 at exactly 3 consecutive', async () => { const dedup = new ToolCallDeduplicator(); let last: ExecutableToolResult | undefined; - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT; i += 1) { + for (let i = 0; i < 3; i += 1) { dedup.beginStep(); last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); dedup.endStep(); } expect(last!.output as string).toContain(''); expect(last!.output as string).toContain('repeating the exact same tool call'); + expect(last!.output as string).not.toContain('repeated_times'); + }); + + it('does not inject reminder at 4 consecutive', async () => { + const dedup = new ToolCallDeduplicator(); + let last: ExecutableToolResult | undefined; + for (let i = 0; i < 4; i += 1) { + dedup.beginStep(); + last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); + dedup.endStep(); + } + expect(last!.output as string).not.toContain(''); + }); + + it('injects reminder2 at exactly 5 consecutive', async () => { + const dedup = new ToolCallDeduplicator(); + let last: ExecutableToolResult | undefined; + for (let i = 0; i < 5; i += 1) { + dedup.beginStep(); + last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); + dedup.endStep(); + } + expect(last!.output as string).toContain(''); + expect(last!.output as string).toContain('repeated_times: 5'); + expect(last!.output as string).toContain('tool: Read'); + expect(last!.output as string).toContain('arguments:'); + }); + + it('does not inject reminder at 6 or 7 consecutive', async () => { + const dedup = new ToolCallDeduplicator(); + let last: ExecutableToolResult | undefined; + for (let i = 0; i < 7; i += 1) { + dedup.beginStep(); + last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); + dedup.endStep(); + } + expect(last!.output as string).not.toContain(''); + }); + + it('injects reminder2 at exactly 8 consecutive', async () => { + const dedup = new ToolCallDeduplicator(); + let last: ExecutableToolResult | undefined; + for (let i = 0; i < 8; i += 1) { + dedup.beginStep(); + last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R')); + dedup.endStep(); + } + expect(last!.output as string).toContain(''); + expect(last!.output as string).toContain('repeated_times: 8'); + expect(last!.output as string).toContain('tool: Read'); }); it('resets streak when a different call is interleaved', async () => { const dedup = new ToolCallDeduplicator(); - // 6× Read({p:1}) — should NOT trigger yet - for (let i = 0; i < 6; i += 1) { + // 2× Read({p:1}) — should NOT trigger yet + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); await runOriginal(dedup, `a${String(i)}`, 'Read', { p: 1 }, okResult('R')); dedup.endStep(); @@ -116,15 +166,15 @@ describe('ToolCallDeduplicator', () => { expect(last.output as string).not.toContain(''); }); - it('same-step dups inherit the reminder when streak triggers on original', async () => { + it('same-step dups inherit reminder1 when streak triggers on original', async () => { const dedup = new ToolCallDeduplicator(); - // Build streak up to N-1 across previous steps. - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + // Build streak up to 2 across previous steps. + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); await runOriginal(dedup, `p${String(i)}`, 'Read', { p: 1 }, okResult('R')); dedup.endStep(); } - // Next step: same call appears twice. First is the original (triggers reminder), + // Next step: same call appears twice. First is the original (triggers reminder1 at streak=3), // second is a same-step dup that should inherit it. dedup.beginStep(); const original = await runOriginal( @@ -140,18 +190,20 @@ describe('ToolCallDeduplicator', () => { dedup.endStep(); expect(original.output as string).toContain(''); + expect(original.output as string).toContain('repeating the exact same tool call'); expect(finalDup.output as string).toContain(''); + expect(finalDup.output as string).toContain('repeating the exact same tool call'); }); it('same-step spam alone does not trigger reminder', async () => { const dedup = new ToolCallDeduplicator(); - // 7 occurrences of the same call within a single step, but no prior + // 8 occurrences of the same call within a single step, but no prior // streak — the trigger is about sustained behaviour across steps, not // intra-step spam. Same-step dedup already short-circuits execution. dedup.beginStep(); const cached = dedup.checkSameStep('orig', 'Read', { p: 1 }); expect(cached).toBeNull(); - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + for (let i = 0; i < 7; i += 1) { dedup.checkSameStep(`dup${String(i)}`, 'Read', { p: 1 }); } const final = await dedup.finalizeResult('orig', 'Read', { p: 1 }, okResult('R')); @@ -160,13 +212,13 @@ describe('ToolCallDeduplicator', () => { }); describe('reminder injection into ContentPart[] outputs', () => { - it('appends to a trailing text part', async () => { + it('appends reminder1 to a trailing text part at streak 3', async () => { const dedup = new ToolCallDeduplicator(); const arrayResult: ExecutableToolResult = { output: [{ type: 'text', text: 'hello' }], }; - // Force streak = trigger by running N-1 prior steps then this one. - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + // Build streak up to 2 prior steps then this one (streak=3). + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); await runOriginal(dedup, `p${String(i)}`, 'X', {}, okResult('R')); dedup.endStep(); @@ -177,7 +229,27 @@ describe('ToolCallDeduplicator', () => { const arr = final.output as Array<{ type: string; text: string }>; expect(arr).toHaveLength(1); expect(arr[0]!.type).toBe('text'); - expect(arr[0]!.text).toBe('hello' + REMINDER_TEXT); + expect(arr[0]!.text).toBe('hello' + REMINDER_TEXT_1); + }); + + it('appends reminder2 to a trailing text part at streak 5', async () => { + const dedup = new ToolCallDeduplicator(); + const arrayResult: ExecutableToolResult = { + output: [{ type: 'text', text: 'hello' }], + }; + // Build streak up to 4 prior steps then this one (streak=5). + for (let i = 0; i < 4; i += 1) { + dedup.beginStep(); + await runOriginal(dedup, `p${String(i)}`, 'X', { a: 1 }, okResult('R')); + dedup.endStep(); + } + dedup.beginStep(); + const final = await runOriginal(dedup, 'final', 'X', { a: 1 }, arrayResult); + dedup.endStep(); + const arr = final.output as Array<{ type: string; text: string }>; + expect(arr).toHaveLength(1); + expect(arr[0]!.type).toBe('text'); + expect(arr[0]!.text).toBe('hello' + makeReminderText2('X', 5, { a: 1 })); }); it('pushes a new text part when trailing part is non-text', async () => { @@ -185,7 +257,8 @@ describe('ToolCallDeduplicator', () => { const arrayResult: ExecutableToolResult = { output: [{ type: 'image_url', imageUrl: { url: 'data:foo' } }], }; - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + // Build streak to 3. + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); await runOriginal(dedup, `p${String(i)}`, 'X', {}, okResult('R')); dedup.endStep(); @@ -197,12 +270,13 @@ describe('ToolCallDeduplicator', () => { expect(arr).toHaveLength(2); expect(arr[0]!.type).toBe('image_url'); expect(arr[1]!.type).toBe('text'); - expect(arr[1]!.text).toBe(REMINDER_TEXT); + expect(arr[1]!.text).toBe(REMINDER_TEXT_1); }); it('preserves isError flag when injecting reminder', async () => { const dedup = new ToolCallDeduplicator(); - for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) { + // Build streak to 3. + for (let i = 0; i < 2; i += 1) { dedup.beginStep(); await runOriginal(dedup, `p${String(i)}`, 'X', {}, errResult('boom')); dedup.endStep();