tuanhung303 · tuanhung303 · Feb 13, 2026 · Feb 12, 2026
diff --git a/lib/hooks.ts b/lib/hooks.ts
@@ -28,6 +28,7 @@ import {
     calculateTotalContextTokens,
     getEffectiveContextThreshold,
     getContextStatus,
+    getRealTokenCount,
 } from "./strategies/utils"
 
 type Strategy = (
@@ -94,7 +95,9 @@ export function createChatMessageTransformHandler(
                     fallbackContextWindow: config.tools.todoReminder.fallbackContextWindow,
                     warningThresholdPercent: config.tools.todoReminder.warningThresholdPercent,
                 })
-                const currentTokens = calculateTotalContextTokens(state, output.messages)
+                const realTokens = getRealTokenCount(output.messages)
+                const currentTokens =
+                    realTokens ?? calculateTotalContextTokens(state, output.messages)
                 const status = getContextStatus(currentTokens, threshold.rawWindow)
 
                 state.contextPressure = {

diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts
@@ -27,12 +27,22 @@ export const filterStepMarkers = (
         const parts = Array.isArray(msg.parts) ? msg.parts : []
         const originalLength = parts.length
 
-        msg.parts = parts.filter((part) => {
+        const filtered = parts.filter((part) => {
             if (part.type === "step-start" || part.type === "step-finish") {
                 return false
             }
             return true
         })
+
+        // SAFETY: Never leave a message with empty parts — providers reject
+        // assistant messages with content: [] as "Improperly formed request"
+        if (filtered.length === 0 && originalLength > 0) {
+            msg.parts = [{ type: "text" as const, text: " " } as any]
+        } else {
+            msg.parts = filtered
+        }
+
+        totalRemoved += originalLength - msg.parts.length
     }
 
     if (totalRemoved > 0) {
@@ -715,16 +725,23 @@ export const prune = (
                 reasoningContent = part.text
             }
 
-            // Handle pruned tool parts - replace with placeholder for layout consistency
+            // Handle pruned tool parts - replace output only, preserve ToolPart structure
+            // CRITICAL: Replacing ToolPart with TextPart breaks the tool-call/tool-result
+            // pairing that toModelMessages() needs. Anthropic rejects requests with orphaned
+            // tool results or empty content blocks ("Improperly formed request").
             if (part.type === "tool" && part.callID && prunedToolIds.has(part.callID)) {
                 const toolName = part.tool || "tool"
                 const placeholder = createPrunedToolPlaceholder(toolName)
-                // Replace the tool part with a text placeholder part
-                parts[partIndex] = {
-                    type: "text" as const,
-                    text: placeholder,
-                } as any
-                logger.debug(`Pruned tool part ${part.callID} (${toolName})`)
+                // Keep the ToolPart intact, only replace the output content
+                if (
+                    part.state &&
+                    (part.state.status === "completed" || part.state.status === "error")
+                ) {
+                    const stateMut = part.state as { output: unknown; attachments?: unknown }
+                    stateMut.output = placeholder
+                    stateMut.attachments = undefined
+                }
+                logger.debug(`Pruned tool output ${part.callID} (${toolName})`)
                 continue
             }
 

diff --git a/lib/strategies/utils.ts b/lib/strategies/utils.ts
@@ -214,6 +214,31 @@ export function calculateTotalContextTokens(state: SessionState, messages: WithP
     return total
 }
 
+/**
+ * Extract the real token count from the last assistant message's provider-reported usage.
+ *
+ * OpenCode populates msg.info.tokens on assistant messages after each LLM response with:
+ *   { input, output, reasoning, cache: { read, write } }
+ *
+ * This is the authoritative count including system prompts, tool schemas, and provider
+ * formatting overhead that our heuristic estimator misses.
+ *
+ * @returns The real token count, or null if no assistant message has token data
+ */
+export function getRealTokenCount(messages: WithParts[]): number | null {
+    for (let i = messages.length - 1; i >= 0; i--) {
+        const msg = messages[i]
+        if (!msg || msg.info.role !== "assistant") continue
+
+        const tokens = (msg.info as any).tokens
+        if (!tokens || typeof tokens.input !== "number") continue
+
+        // Match OpenCode's own isOverflow formula: input + cache.read + output
+        return tokens.input + (tokens.cache?.read ?? 0) + tokens.output
+    }
+    return null
+}
+
 export const calculateTokensSaved = (
     state: SessionState,
     messages: WithParts[],

diff --git a/tests/messages/prune.test.ts b/tests/messages/prune.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect, beforeEach } from "vitest"
-import { prune, injectHashesIntoToolOutputs } from "../../lib/messages/prune"
+import { prune, filterStepMarkers, injectHashesIntoToolOutputs } from "../../lib/messages/prune"
 import { stripHashTags } from "../../lib/state/hash-registry"
 import type { SessionState, WithParts } from "../../lib/state"
 import type { PluginConfig } from "../../lib/config"
@@ -96,7 +96,7 @@ describe("prune", () => {
     })
 
     describe("pruneToolOutputs", () => {
-        it("should replace pruned tool parts with placeholder", () => {
+        it("should replace pruned tool output while preserving ToolPart structure", () => {
             const state = createMockState(["call_123"])
             const messages: WithParts[] = [
                 createMessage("msg_1", [
@@ -112,10 +112,13 @@ describe("prune", () => {
 
             prune(state, mockLogger as any, mockConfig, messages)
 
-            // Tool part should be replaced with text placeholder for layout consistency
+            // Tool part should keep its structure, only output replaced
             expect(messages[0].parts.length).toBe(1)
-            expect((messages[0].parts[0] as any).type).toBe("text")
-            expect((messages[0].parts[0] as any).text).toBe("[read() output pruned]")
+            expect((messages[0].parts[0] as any).type).toBe("tool")
+            expect((messages[0].parts[0] as any).callID).toBe("call_123")
+            expect((messages[0].parts[0] as any).state.status).toBe("completed")
+            expect((messages[0].parts[0] as any).state.output).toBe("[read() output pruned]")
+            expect((messages[0].parts[0] as any).state.attachments).toBeUndefined()
         })
 
         it("should keep non-pruned tool parts", () => {
@@ -141,7 +144,7 @@ describe("prune", () => {
             expect(output).toBe(originalOutput)
         })
 
-        it("should replace errored tools in prune list with placeholder", () => {
+        it("should replace errored tool output while preserving ToolPart structure", () => {
             const state = createMockState(["call_error"])
             const messages: WithParts[] = [
                 createMessage("msg_1", [
@@ -157,10 +160,12 @@ describe("prune", () => {
 
             prune(state, mockLogger as any, mockConfig, messages)
 
-            // Errored tools in prune list are replaced with placeholder
+            // Errored tools in prune list keep structure, only output replaced
             expect(messages[0].parts.length).toBe(1)
-            expect((messages[0].parts[0] as any).type).toBe("text")
-            expect((messages[0].parts[0] as any).text).toBe("[read() output pruned]")
+            expect((messages[0].parts[0] as any).type).toBe("tool")
+            expect((messages[0].parts[0] as any).callID).toBe("call_error")
+            expect((messages[0].parts[0] as any).state.status).toBe("error")
+            expect((messages[0].parts[0] as any).state.output).toBe("[read() output pruned]")
         })
     })
 
@@ -509,4 +514,56 @@ describe("prune", () => {
             )
         })
     })
+
+    describe("filterStepMarkers", () => {
+        it("should remove step-start and step-finish parts", () => {
+            const config = {
+                ...createMockConfig(),
+                strategies: { aggressivePruning: { pruneStepMarkers: true } },
+            } as any
+            const messages: WithParts[] = [
+                createMessage("msg_1", [
+                    { type: "step-start" },
+                    { type: "text", text: "hello" },
+                    { type: "step-finish" },
+                ]),
+            ]
+
+            filterStepMarkers(messages, config, createMockLogger() as any)
+
+            expect(messages[0].parts.length).toBe(1)
+            expect((messages[0].parts[0] as any).type).toBe("text")
+        })
+
+        it("should inject placeholder when all parts are step markers", () => {
+            const config = {
+                ...createMockConfig(),
+                strategies: { aggressivePruning: { pruneStepMarkers: true } },
+            } as any
+            const messages: WithParts[] = [
+                createMessage("msg_1", [{ type: "step-start" }, { type: "step-finish" }]),
+            ]
+
+            filterStepMarkers(messages, config, createMockLogger() as any)
+
+            // Should never leave empty parts — providers reject content: []
+            expect(messages[0].parts.length).toBe(1)
+            expect((messages[0].parts[0] as any).type).toBe("text")
+            expect((messages[0].parts[0] as any).text).toBe(" ")
+        })
+
+        it("should not modify messages when config is disabled", () => {
+            const config = {
+                ...createMockConfig(),
+                strategies: { aggressivePruning: { pruneStepMarkers: false } },
+            } as any
+            const messages: WithParts[] = [
+                createMessage("msg_1", [{ type: "step-start" }, { type: "text", text: "hello" }]),
+            ]
+
+            filterStepMarkers(messages, config, createMockLogger() as any)
+
+            expect(messages[0].parts.length).toBe(2)
+        })
+    })
 })
diff --git a/tests/strategies/utils.test.ts b/tests/strategies/utils.test.ts
@@ -0,0 +1,105 @@
+import { describe, it, expect } from "vitest"
+import { getRealTokenCount } from "../../lib/strategies/utils"
+import type { WithParts } from "../../lib/state"
+
+const createAssistantMsg = (id: string, tokens?: any, parts: any[] = []): WithParts =>
+    ({
+        info: {
+            id,
+            role: "assistant" as const,
+            time: { created: Date.now(), completed: Date.now() },
+            ...(tokens ? { tokens } : {}),
+        },
+        parts,
+    }) as any
+
+const createUserMsg = (id: string): WithParts =>
+    ({
+        info: { id, role: "user" as const, time: { created: Date.now() } },
+        parts: [{ type: "text", text: "hello" }],
+    }) as any
+
+describe("getRealTokenCount", () => {
+    it("should return real token count from last assistant message", () => {
+        const messages: WithParts[] = [
+            createUserMsg("u1"),
+            createAssistantMsg("a1", {
+                input: 5000,
+                output: 1000,
+                reasoning: 500,
+                cache: { read: 2000, write: 1000 },
+            }),
+        ]
+
+        // Formula: input + cache.read + output = 5000 + 2000 + 1000 = 8000
+        expect(getRealTokenCount(messages)).toBe(8000)
+    })
+
+    it("should use the LAST assistant message with tokens", () => {
+        const messages: WithParts[] = [
+            createUserMsg("u1"),
+            createAssistantMsg("a1", {
+                input: 1000,
+                output: 500,
+                reasoning: 0,
+                cache: { read: 0, write: 0 },
+            }),
+            createUserMsg("u2"),
+            createAssistantMsg("a2", {
+                input: 8000,
+                output: 2000,
+                reasoning: 1000,
+                cache: { read: 3000, write: 500 },
+            }),
+        ]
+
+        // Should use a2: 8000 + 3000 + 2000 = 13000
+        expect(getRealTokenCount(messages)).toBe(13000)
+    })
+
+    it("should return null when no assistant messages exist", () => {
+        const messages: WithParts[] = [createUserMsg("u1")]
+        expect(getRealTokenCount(messages)).toBeNull()
+    })
+
+    it("should return null when assistant messages have no tokens field", () => {
+        const messages: WithParts[] = [
+            createUserMsg("u1"),
+            createAssistantMsg("a1"), // no tokens
+        ]
+        expect(getRealTokenCount(messages)).toBeNull()
+    })
+
+    it("should handle missing cache gracefully", () => {
+        const messages: WithParts[] = [
+            createAssistantMsg("a1", {
+                input: 5000,
+                output: 1000,
+                reasoning: 0,
+                // no cache field
+            }),
+        ]
+
+        // 5000 + 0 (no cache.read) + 1000 = 6000
+        expect(getRealTokenCount(messages)).toBe(6000)
+    })
+
+    it("should skip assistant messages without valid tokens.input", () => {
+        const messages: WithParts[] = [
+            createAssistantMsg("a1", {
+                input: 3000,
+                output: 500,
+                cache: { read: 1000, write: 0 },
+            }),
+            createUserMsg("u1"),
+            createAssistantMsg("a2", { foo: "bar" }), // invalid tokens
+        ]
+
+        // Should skip a2, use a1: 3000 + 1000 + 500 = 4500
+        expect(getRealTokenCount(messages)).toBe(4500)
+    })
+
+    it("should return null for empty messages array", () => {
+        expect(getRealTokenCount([])).toBeNull()
+    })
+})