Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 34 additions & 17 deletions packages/agent-core/src/agent/turn/tool-dedup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,28 @@ import type { ExecutableToolResult } from '../../loop/types';

import { canonicalTelemetryArgs } from './canonical-args';

const CROSS_STEP_DEDUP_TRIGGER_COUNT = 7;

const REMINDER_TEXT =
const REMINDER_TEXT_1 =
'\n\n<system-reminder>\n' +
'You are repeating the exact same tool call with identical parameters.' +
' Please carefully analyze the previous result. If the task is not yet complete,' +
' try a different method or parameters instead of repeating the same call.' +
'\n</system-reminder>';

function makeReminderText2(toolName: string, repeatCount: number, args: unknown): string {
const argsStr = canonicalTelemetryArgs(args);
return (
'\n\n<system-reminder>\n' +
'You have repeatedly called the same tool with identical parameters many times.\n' +
'Repeated tool call detected:\n' +
`- tool: ${toolName}\n` +
`- repeated_times: ${String(repeatCount)}\n` +
`- arguments: ${argsStr}\n` +
'The previous repeated calls did not make progress. Do not call this exact same tool with the exact same arguments again.\n' +
'Carefully inspect the latest tool result and choose a different next action, different parameters, or finish the task if enough evidence has been gathered.' +
'\n</system-reminder>'
);
}

interface Deferred<T> {
readonly promise: Promise<T>;
resolve(value: T): void;
Expand All @@ -30,18 +43,18 @@ function makeKey(toolName: string, args: unknown): string {
return `${toolName} ${canonicalTelemetryArgs(args)}`;
}

function appendReminder(result: ExecutableToolResult): ExecutableToolResult {
function appendReminder(result: ExecutableToolResult, reminderText: string): ExecutableToolResult {
const output = result.output;
let newOutput: string | ContentPart[];
if (typeof output === 'string') {
newOutput = output + REMINDER_TEXT;
newOutput = output + reminderText;
} else {
const arr: ContentPart[] = [...output];
const last = arr.at(-1);
if (last !== undefined && last.type === 'text') {
arr[arr.length - 1] = { type: 'text', text: last.text + REMINDER_TEXT };
arr[arr.length - 1] = { type: 'text', text: last.text + reminderText };
} else {
arr.push({ type: 'text', text: REMINDER_TEXT });
arr.push({ type: 'text', text: reminderText });
}
newOutput = arr;
}
Expand All @@ -67,10 +80,10 @@ const DEDUP_PLACEHOLDER_RESULT: ExecutableToolResult = { output: '' };
* Two behaviours are layered:
* - Same-step dedup: a duplicate `(toolName, args)` issued in the same LLM step
* reuses the original call's result instead of executing the tool twice.
* - Cross-step dedup: when the exact same call is repeated for
* `CROSS_STEP_DEDUP_TRIGGER_COUNT` consecutive occurrences (counting across
* steps), the result returned to the model is suffixed with a system reminder
* nudging it to try a different approach.
* - Cross-step dedup: when the exact same call is repeated consecutively
* across steps, the result returned to the model is suffixed with a system
* reminder at specific streak thresholds (3, 5, and 8) to nudge the model
* to try a different approach.
*/
export class ToolCallDeduplicator {
private stepDeferreds = new Map<string, Deferred<ExecutableToolResult>>();
Expand Down Expand Up @@ -150,8 +163,8 @@ export class ToolCallDeduplicator {
*/
async finalizeResult(
toolCallId: string,
_toolName: string,
_args: unknown,
toolName: string,
args: unknown,
result: ExecutableToolResult,
): Promise<ExecutableToolResult> {
// Use the key recorded at registration time, NOT a fresh key from the args
Expand Down Expand Up @@ -181,15 +194,19 @@ export class ToolCallDeduplicator {
}
}

const finalResult =
streak >= CROSS_STEP_DEDUP_TRIGGER_COUNT ? appendReminder(result) : result;
let finalResult = result;
if (streak === 3) {
finalResult = appendReminder(result, REMINDER_TEXT_1);
} else if (streak === 5 || streak === 8) {
finalResult = appendReminder(result, makeReminderText2(toolName, streak, args));
}

this.stepDeferreds.get(key)?.resolve(finalResult);
return finalResult;
}
}

export const __testing = {
CROSS_STEP_DEDUP_TRIGGER_COUNT,
REMINDER_TEXT,
REMINDER_TEXT_1,
makeReminderText2,
};
114 changes: 94 additions & 20 deletions packages/agent-core/test/agent/turn/tool-dedup.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest';
import type { ExecutableToolResult } from '../../../src/loop/types';
import { ToolCallDeduplicator, __testing } from '../../../src/agent/turn/tool-dedup';

const { REMINDER_TEXT, CROSS_STEP_DEDUP_TRIGGER_COUNT } = __testing;
const { REMINDER_TEXT_1, makeReminderText2 } = __testing;

function okResult(text: string): ExecutableToolResult {
return { output: text };
Expand Down Expand Up @@ -73,10 +73,10 @@ describe('ToolCallDeduplicator', () => {
});

describe('cross-step streak', () => {
it(`does not inject reminder below ${String(CROSS_STEP_DEDUP_TRIGGER_COUNT)} consecutive`, async () => {
it('does not inject reminder below 3 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
Expand All @@ -85,22 +85,72 @@ describe('ToolCallDeduplicator', () => {
expect(last!.output as string).not.toContain('<system-reminder>');
});

it(`injects reminder at exactly ${String(CROSS_STEP_DEDUP_TRIGGER_COUNT)} consecutive`, async () => {
it('injects reminder1 at exactly 3 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT; i += 1) {
for (let i = 0; i < 3; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
expect(last!.output as string).toContain('<system-reminder>');
expect(last!.output as string).toContain('repeating the exact same tool call');
expect(last!.output as string).not.toContain('repeated_times');
});

it('does not inject reminder at 4 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < 4; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
expect(last!.output as string).not.toContain('<system-reminder>');
});

it('injects reminder2 at exactly 5 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < 5; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
expect(last!.output as string).toContain('<system-reminder>');
expect(last!.output as string).toContain('repeated_times: 5');
expect(last!.output as string).toContain('tool: Read');
expect(last!.output as string).toContain('arguments:');
});

it('does not inject reminder at 6 or 7 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < 7; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
expect(last!.output as string).not.toContain('<system-reminder>');
});

it('injects reminder2 at exactly 8 consecutive', async () => {
const dedup = new ToolCallDeduplicator();
let last: ExecutableToolResult | undefined;
for (let i = 0; i < 8; i += 1) {
dedup.beginStep();
last = await runOriginal(dedup, `c${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
expect(last!.output as string).toContain('<system-reminder>');
expect(last!.output as string).toContain('repeated_times: 8');
expect(last!.output as string).toContain('tool: Read');
});

it('resets streak when a different call is interleaved', async () => {
const dedup = new ToolCallDeduplicator();
// 6× Read({p:1}) — should NOT trigger yet
for (let i = 0; i < 6; i += 1) {
// 2× Read({p:1}) — should NOT trigger yet
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `a${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
Expand All @@ -116,15 +166,15 @@ describe('ToolCallDeduplicator', () => {
expect(last.output as string).not.toContain('<system-reminder>');
});

it('same-step dups inherit the reminder when streak triggers on original', async () => {
it('same-step dups inherit reminder1 when streak triggers on original', async () => {
const dedup = new ToolCallDeduplicator();
// Build streak up to N-1 across previous steps.
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
// Build streak up to 2 across previous steps.
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `p${String(i)}`, 'Read', { p: 1 }, okResult('R'));
dedup.endStep();
}
// Next step: same call appears twice. First is the original (triggers reminder),
// Next step: same call appears twice. First is the original (triggers reminder1 at streak=3),
// second is a same-step dup that should inherit it.
dedup.beginStep();
const original = await runOriginal(
Expand All @@ -140,18 +190,20 @@ describe('ToolCallDeduplicator', () => {
dedup.endStep();

expect(original.output as string).toContain('<system-reminder>');
expect(original.output as string).toContain('repeating the exact same tool call');
expect(finalDup.output as string).toContain('<system-reminder>');
expect(finalDup.output as string).toContain('repeating the exact same tool call');
});

it('same-step spam alone does not trigger reminder', async () => {
const dedup = new ToolCallDeduplicator();
// 7 occurrences of the same call within a single step, but no prior
// 8 occurrences of the same call within a single step, but no prior
// streak — the trigger is about sustained behaviour across steps, not
// intra-step spam. Same-step dedup already short-circuits execution.
dedup.beginStep();
const cached = dedup.checkSameStep('orig', 'Read', { p: 1 });
expect(cached).toBeNull();
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
for (let i = 0; i < 7; i += 1) {
dedup.checkSameStep(`dup${String(i)}`, 'Read', { p: 1 });
}
const final = await dedup.finalizeResult('orig', 'Read', { p: 1 }, okResult('R'));
Expand All @@ -160,13 +212,13 @@ describe('ToolCallDeduplicator', () => {
});

describe('reminder injection into ContentPart[] outputs', () => {
it('appends to a trailing text part', async () => {
it('appends reminder1 to a trailing text part at streak 3', async () => {
const dedup = new ToolCallDeduplicator();
const arrayResult: ExecutableToolResult = {
output: [{ type: 'text', text: 'hello' }],
};
// Force streak = trigger by running N-1 prior steps then this one.
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
// Build streak up to 2 prior steps then this one (streak=3).
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `p${String(i)}`, 'X', {}, okResult('R'));
dedup.endStep();
Expand All @@ -177,15 +229,36 @@ describe('ToolCallDeduplicator', () => {
const arr = final.output as Array<{ type: string; text: string }>;
expect(arr).toHaveLength(1);
expect(arr[0]!.type).toBe('text');
expect(arr[0]!.text).toBe('hello' + REMINDER_TEXT);
expect(arr[0]!.text).toBe('hello' + REMINDER_TEXT_1);
});

it('appends reminder2 to a trailing text part at streak 5', async () => {
const dedup = new ToolCallDeduplicator();
const arrayResult: ExecutableToolResult = {
output: [{ type: 'text', text: 'hello' }],
};
// Build streak up to 4 prior steps then this one (streak=5).
for (let i = 0; i < 4; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `p${String(i)}`, 'X', { a: 1 }, okResult('R'));
dedup.endStep();
}
dedup.beginStep();
const final = await runOriginal(dedup, 'final', 'X', { a: 1 }, arrayResult);
dedup.endStep();
const arr = final.output as Array<{ type: string; text: string }>;
expect(arr).toHaveLength(1);
expect(arr[0]!.type).toBe('text');
expect(arr[0]!.text).toBe('hello' + makeReminderText2('X', 5, { a: 1 }));
});

it('pushes a new text part when trailing part is non-text', async () => {
const dedup = new ToolCallDeduplicator();
const arrayResult: ExecutableToolResult = {
output: [{ type: 'image_url', imageUrl: { url: 'data:foo' } }],
};
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
// Build streak to 3.
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `p${String(i)}`, 'X', {}, okResult('R'));
dedup.endStep();
Expand All @@ -197,12 +270,13 @@ describe('ToolCallDeduplicator', () => {
expect(arr).toHaveLength(2);
expect(arr[0]!.type).toBe('image_url');
expect(arr[1]!.type).toBe('text');
expect(arr[1]!.text).toBe(REMINDER_TEXT);
expect(arr[1]!.text).toBe(REMINDER_TEXT_1);
});

it('preserves isError flag when injecting reminder', async () => {
const dedup = new ToolCallDeduplicator();
for (let i = 0; i < CROSS_STEP_DEDUP_TRIGGER_COUNT - 1; i += 1) {
// Build streak to 3.
for (let i = 0; i < 2; i += 1) {
dedup.beginStep();
await runOriginal(dedup, `p${String(i)}`, 'X', {}, errResult('boom'));
dedup.endStep();
Expand Down
Loading