Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
calculateTotalContextTokens,
getEffectiveContextThreshold,
getContextStatus,
getRealTokenCount,
} from "./strategies/utils"

type Strategy = (
Expand Down Expand Up @@ -94,7 +95,9 @@ export function createChatMessageTransformHandler(
fallbackContextWindow: config.tools.todoReminder.fallbackContextWindow,
warningThresholdPercent: config.tools.todoReminder.warningThresholdPercent,
})
const currentTokens = calculateTotalContextTokens(state, output.messages)
const realTokens = getRealTokenCount(output.messages)
const currentTokens =
realTokens ?? calculateTotalContextTokens(state, output.messages)
const status = getContextStatus(currentTokens, threshold.rawWindow)

state.contextPressure = {
Expand Down
33 changes: 25 additions & 8 deletions lib/messages/prune.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,22 @@ export const filterStepMarkers = (
const parts = Array.isArray(msg.parts) ? msg.parts : []
const originalLength = parts.length

msg.parts = parts.filter((part) => {
const filtered = parts.filter((part) => {
if (part.type === "step-start" || part.type === "step-finish") {
return false
}
return true
})

// SAFETY: Never leave a message with empty parts — providers reject
// assistant messages with content: [] as "Improperly formed request"
if (filtered.length === 0 && originalLength > 0) {
msg.parts = [{ type: "text" as const, text: " " } as any]
} else {
msg.parts = filtered
}

totalRemoved += originalLength - msg.parts.length
}

if (totalRemoved > 0) {
Expand Down Expand Up @@ -715,16 +725,23 @@ export const prune = (
reasoningContent = part.text
}

// Handle pruned tool parts - replace with placeholder for layout consistency
// Handle pruned tool parts - replace output only, preserve ToolPart structure
// CRITICAL: Replacing ToolPart with TextPart breaks the tool-call/tool-result
// pairing that toModelMessages() needs. Anthropic rejects requests with orphaned
// tool results or empty content blocks ("Improperly formed request").
if (part.type === "tool" && part.callID && prunedToolIds.has(part.callID)) {
const toolName = part.tool || "tool"
const placeholder = createPrunedToolPlaceholder(toolName)
// Replace the tool part with a text placeholder part
parts[partIndex] = {
type: "text" as const,
text: placeholder,
} as any
logger.debug(`Pruned tool part ${part.callID} (${toolName})`)
// Keep the ToolPart intact, only replace the output content
if (
part.state &&
(part.state.status === "completed" || part.state.status === "error")
) {
const stateMut = part.state as { output: unknown; attachments?: unknown }
stateMut.output = placeholder
stateMut.attachments = undefined
}
logger.debug(`Pruned tool output ${part.callID} (${toolName})`)
continue
}

Expand Down
25 changes: 25 additions & 0 deletions lib/strategies/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,31 @@ export function calculateTotalContextTokens(state: SessionState, messages: WithP
return total
}

/**
* Extract the real token count from the last assistant message's provider-reported usage.
*
* OpenCode populates msg.info.tokens on assistant messages after each LLM response with:
* { input, output, reasoning, cache: { read, write } }
*
* This is the authoritative count including system prompts, tool schemas, and provider
* formatting overhead that our heuristic estimator misses.
*
* @returns The real token count, or null if no assistant message has token data
*/
export function getRealTokenCount(messages: WithParts[]): number | null {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]
if (!msg || msg.info.role !== "assistant") continue

const tokens = (msg.info as any).tokens
if (!tokens || typeof tokens.input !== "number") continue

// Match OpenCode's own isOverflow formula: input + cache.read + output
return tokens.input + (tokens.cache?.read ?? 0) + tokens.output
}
return null
}

export const calculateTokensSaved = (
state: SessionState,
messages: WithParts[],
Expand Down
75 changes: 66 additions & 9 deletions tests/messages/prune.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect, beforeEach } from "vitest"
import { prune, injectHashesIntoToolOutputs } from "../../lib/messages/prune"
import { prune, filterStepMarkers, injectHashesIntoToolOutputs } from "../../lib/messages/prune"
import { stripHashTags } from "../../lib/state/hash-registry"
import type { SessionState, WithParts } from "../../lib/state"
import type { PluginConfig } from "../../lib/config"
Expand Down Expand Up @@ -96,7 +96,7 @@ describe("prune", () => {
})

describe("pruneToolOutputs", () => {
it("should replace pruned tool parts with placeholder", () => {
it("should replace pruned tool output while preserving ToolPart structure", () => {
const state = createMockState(["call_123"])
const messages: WithParts[] = [
createMessage("msg_1", [
Expand All @@ -112,10 +112,13 @@ describe("prune", () => {

prune(state, mockLogger as any, mockConfig, messages)

// Tool part should be replaced with text placeholder for layout consistency
// Tool part should keep its structure, only output replaced
expect(messages[0].parts.length).toBe(1)
expect((messages[0].parts[0] as any).type).toBe("text")
expect((messages[0].parts[0] as any).text).toBe("[read() output pruned]")
expect((messages[0].parts[0] as any).type).toBe("tool")
expect((messages[0].parts[0] as any).callID).toBe("call_123")
expect((messages[0].parts[0] as any).state.status).toBe("completed")
expect((messages[0].parts[0] as any).state.output).toBe("[read() output pruned]")
expect((messages[0].parts[0] as any).state.attachments).toBeUndefined()
})

it("should keep non-pruned tool parts", () => {
Expand All @@ -141,7 +144,7 @@ describe("prune", () => {
expect(output).toBe(originalOutput)
})

it("should replace errored tools in prune list with placeholder", () => {
it("should replace errored tool output while preserving ToolPart structure", () => {
const state = createMockState(["call_error"])
const messages: WithParts[] = [
createMessage("msg_1", [
Expand All @@ -157,10 +160,12 @@ describe("prune", () => {

prune(state, mockLogger as any, mockConfig, messages)

// Errored tools in prune list are replaced with placeholder
// Errored tools in prune list keep structure, only output replaced
expect(messages[0].parts.length).toBe(1)
expect((messages[0].parts[0] as any).type).toBe("text")
expect((messages[0].parts[0] as any).text).toBe("[read() output pruned]")
expect((messages[0].parts[0] as any).type).toBe("tool")
expect((messages[0].parts[0] as any).callID).toBe("call_error")
expect((messages[0].parts[0] as any).state.status).toBe("error")
expect((messages[0].parts[0] as any).state.output).toBe("[read() output pruned]")
})
})

Expand Down Expand Up @@ -509,4 +514,56 @@ describe("prune", () => {
)
})
})

describe("filterStepMarkers", () => {
it("should remove step-start and step-finish parts", () => {
const config = {
...createMockConfig(),
strategies: { aggressivePruning: { pruneStepMarkers: true } },
} as any
const messages: WithParts[] = [
createMessage("msg_1", [
{ type: "step-start" },
{ type: "text", text: "hello" },
{ type: "step-finish" },
]),
]

filterStepMarkers(messages, config, createMockLogger() as any)

expect(messages[0].parts.length).toBe(1)
expect((messages[0].parts[0] as any).type).toBe("text")
})

it("should inject placeholder when all parts are step markers", () => {
const config = {
...createMockConfig(),
strategies: { aggressivePruning: { pruneStepMarkers: true } },
} as any
const messages: WithParts[] = [
createMessage("msg_1", [{ type: "step-start" }, { type: "step-finish" }]),
]

filterStepMarkers(messages, config, createMockLogger() as any)

// Should never leave empty parts — providers reject content: []
expect(messages[0].parts.length).toBe(1)
expect((messages[0].parts[0] as any).type).toBe("text")
expect((messages[0].parts[0] as any).text).toBe(" ")
})

it("should not modify messages when config is disabled", () => {
const config = {
...createMockConfig(),
strategies: { aggressivePruning: { pruneStepMarkers: false } },
} as any
const messages: WithParts[] = [
createMessage("msg_1", [{ type: "step-start" }, { type: "text", text: "hello" }]),
]

filterStepMarkers(messages, config, createMockLogger() as any)

expect(messages[0].parts.length).toBe(2)
})
})
})
105 changes: 105 additions & 0 deletions tests/strategies/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { describe, it, expect } from "vitest"
import { getRealTokenCount } from "../../lib/strategies/utils"
import type { WithParts } from "../../lib/state"

const createAssistantMsg = (id: string, tokens?: any, parts: any[] = []): WithParts =>
({
info: {
id,
role: "assistant" as const,
time: { created: Date.now(), completed: Date.now() },
...(tokens ? { tokens } : {}),
},
parts,
}) as any

const createUserMsg = (id: string): WithParts =>
({
info: { id, role: "user" as const, time: { created: Date.now() } },
parts: [{ type: "text", text: "hello" }],
}) as any

describe("getRealTokenCount", () => {
it("should return real token count from last assistant message", () => {
const messages: WithParts[] = [
createUserMsg("u1"),
createAssistantMsg("a1", {
input: 5000,
output: 1000,
reasoning: 500,
cache: { read: 2000, write: 1000 },
}),
]

// Formula: input + cache.read + output = 5000 + 2000 + 1000 = 8000
expect(getRealTokenCount(messages)).toBe(8000)
})

it("should use the LAST assistant message with tokens", () => {
const messages: WithParts[] = [
createUserMsg("u1"),
createAssistantMsg("a1", {
input: 1000,
output: 500,
reasoning: 0,
cache: { read: 0, write: 0 },
}),
createUserMsg("u2"),
createAssistantMsg("a2", {
input: 8000,
output: 2000,
reasoning: 1000,
cache: { read: 3000, write: 500 },
}),
]

// Should use a2: 8000 + 3000 + 2000 = 13000
expect(getRealTokenCount(messages)).toBe(13000)
})

it("should return null when no assistant messages exist", () => {
const messages: WithParts[] = [createUserMsg("u1")]
expect(getRealTokenCount(messages)).toBeNull()
})

it("should return null when assistant messages have no tokens field", () => {
const messages: WithParts[] = [
createUserMsg("u1"),
createAssistantMsg("a1"), // no tokens
]
expect(getRealTokenCount(messages)).toBeNull()
})

it("should handle missing cache gracefully", () => {
const messages: WithParts[] = [
createAssistantMsg("a1", {
input: 5000,
output: 1000,
reasoning: 0,
// no cache field
}),
]

// 5000 + 0 (no cache.read) + 1000 = 6000
expect(getRealTokenCount(messages)).toBe(6000)
})

it("should skip assistant messages without valid tokens.input", () => {
const messages: WithParts[] = [
createAssistantMsg("a1", {
input: 3000,
output: 500,
cache: { read: 1000, write: 0 },
}),
createUserMsg("u1"),
createAssistantMsg("a2", { foo: "bar" }), // invalid tokens
]

// Should skip a2, use a1: 3000 + 1000 + 500 = 4500
expect(getRealTokenCount(messages)).toBe(4500)
})

it("should return null for empty messages array", () => {
expect(getRealTokenCount([])).toBeNull()
})
})
Loading