From b58e416eaf5c18b80eeda32cdc8b980bfc4ed483 Mon Sep 17 00:00:00 2001
From: guazi04 <qinyang_mou@HHDT2026020024.local>
Date: Thu, 5 Mar 2026 19:32:35 +0800
Subject: [PATCH] fix: clear tool output and attachments during compaction
 prune
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compaction prunes old tool outputs, only the `time.compacted` timestamp
was being set — the actual output string and attachments array remained in
storage indefinitely. This caused unbounded storage growth (observed 774 MB
database in 3 weeks, with 81% being part data).

Changes:
- Clear output and attachments when prune marks parts as compacted
- Add prune() calls before both compaction triggers in prompt.ts to reduce
  context before LLM summarization
- Add tests for prune clearing behavior and config respect

Fixes part of #16101
---
 packages/opencode/src/session/compaction.ts   |   2 +
 packages/opencode/src/session/prompt.ts       |   2 +
 .../opencode/test/session/compaction.test.ts  | 270 ++++++++++++++++++
 3 files changed, 274 insertions(+)

diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 79884d641ea..d8aaaa91bb5 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -91,6 +91,8 @@ export namespace SessionCompaction {
       for (const part of toPrune) {
         if (part.state.status === "completed") {
           part.state.time.compacted = Date.now()
+          part.state.output = ""
+          part.state.attachments = undefined
           await Session.updatePart(part)
         }
       }
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 4f77920cc98..9be415ef6a4 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -539,6 +539,7 @@ export namespace SessionPrompt {
         continue
       }
 
+      await SessionCompaction.prune({ sessionID })
       // context overflow, needs compaction
       if (
         lastFinished &&
@@ -703,6 +704,7 @@ export namespace SessionPrompt {
 
       if (result === "stop") break
       if (result === "compact") {
+        await SessionCompaction.prune({ sessionID })
         await SessionCompaction.create({
           sessionID,
           agent: lastUser.agent,
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index 452926d12e1..1f8f824dfd6 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -6,6 +6,8 @@ import { Instance } from "../../src/project/instance"
 import { Log } from "../../src/util/log"
 import { tmpdir } from "../fixture/fixture"
 import { Session } from "../../src/session"
+import { MessageV2 } from "../../src/session/message-v2"
+import { Identifier } from "../../src/id/id"
 import type { Provider } from "../../src/provider/provider"
 
 Log.init({ print: false })
@@ -227,6 +229,274 @@ describe("session.compaction.isOverflow", () => {
   })
 })
 
+describe("session.compaction.prune", () => {
+  test("clears output and attachments on pruned parts", async () => {
+    await using tmp = await tmpdir({ git: true })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const session = await Session.create({})
+        const sid = session.id
+
+        // We need 3+ user turns so prune skips the latest 2.
+        // Tool outputs in the oldest turn must exceed PRUNE_PROTECT (40k tokens)
+        // and the pruned portion must exceed PRUNE_MINIMUM (20k tokens).
+        // Token estimate = chars / 4, so 200k chars ≈ 50k tokens.
+        const big = "x".repeat(200_000)
+
+        // Turn 1 (oldest) — will be pruned
+        const user1 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user1.id,
+          sessionID: sid,
+          type: "text",
+          text: "first",
+        })
+        const asst1: MessageV2.Assistant = {
+          id: Identifier.ascending("message"),
+          role: "assistant",
+          sessionID: sid,
+          mode: "default",
+          agent: "default",
+          path: { cwd: tmp.path, root: tmp.path },
+          cost: 0,
+          tokens: { output: 0, input: 0, reasoning: 0, cache: { read: 0, write: 0 } },
+          modelID: "gpt-4",
+          providerID: "openai",
+          parentID: user1.id,
+          time: { created: Date.now() },
+          finish: "end_turn",
+        }
+        await Session.updateMessage(asst1)
+        const toolPart = await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: asst1.id,
+          sessionID: sid,
+          type: "tool",
+          callID: "call-1",
+          tool: "read",
+          state: {
+            status: "completed",
+            input: {},
+            output: big,
+            title: "Read",
+            metadata: {},
+            time: { start: Date.now(), end: Date.now() },
+            attachments: [
+              {
+                id: Identifier.ascending("part"),
+                messageID: asst1.id,
+                sessionID: sid,
+                type: "file",
+                mime: "image/png",
+                url: "data:image/png;base64,abc",
+              },
+            ],
+          },
+        } satisfies MessageV2.ToolPart)
+
+        // Turn 2
+        const user2 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user2.id,
+          sessionID: sid,
+          type: "text",
+          text: "second",
+        })
+        const asst2: MessageV2.Assistant = {
+          id: Identifier.ascending("message"),
+          role: "assistant",
+          sessionID: sid,
+          mode: "default",
+          agent: "default",
+          path: { cwd: tmp.path, root: tmp.path },
+          cost: 0,
+          tokens: { output: 0, input: 0, reasoning: 0, cache: { read: 0, write: 0 } },
+          modelID: "gpt-4",
+          providerID: "openai",
+          parentID: user2.id,
+          time: { created: Date.now() },
+          finish: "end_turn",
+        }
+        await Session.updateMessage(asst2)
+
+        // Turn 3 (latest)
+        const user3 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user3.id,
+          sessionID: sid,
+          type: "text",
+          text: "third",
+        })
+
+        // Run prune
+        await SessionCompaction.prune({ sessionID: sid })
+
+        // Verify the tool part was pruned with output and attachments cleared
+        const msgs = await Session.messages({ sessionID: sid })
+        const pruned = msgs
+          .flatMap((m) => m.parts)
+          .find((p) => p.type === "tool" && p.id === toolPart.id)
+        expect(pruned).toBeDefined()
+        if (pruned?.type === "tool" && pruned.state.status === "completed") {
+          expect(pruned.state.time.compacted).toBeNumber()
+          expect(pruned.state.output).toBe("")
+          expect(pruned.state.attachments).toBeUndefined()
+        } else {
+          throw new Error("expected completed tool part")
+        }
+
+        await Session.remove(sid)
+      },
+    })
+  })
+
+  test("respects disabled config (compaction.prune = false)", async () => {
+    await using tmp = await tmpdir({
+      git: true,
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({ compaction: { prune: false } }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const session = await Session.create({})
+        const sid = session.id
+        const big = "x".repeat(200_000)
+
+        // Turn 1 (oldest)
+        const user1 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user1.id,
+          sessionID: sid,
+          type: "text",
+          text: "first",
+        })
+        const asst1: MessageV2.Assistant = {
+          id: Identifier.ascending("message"),
+          role: "assistant",
+          sessionID: sid,
+          mode: "default",
+          agent: "default",
+          path: { cwd: tmp.path, root: tmp.path },
+          cost: 0,
+          tokens: { output: 0, input: 0, reasoning: 0, cache: { read: 0, write: 0 } },
+          modelID: "gpt-4",
+          providerID: "openai",
+          parentID: user1.id,
+          time: { created: Date.now() },
+          finish: "end_turn",
+        }
+        await Session.updateMessage(asst1)
+        const toolPart = await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: asst1.id,
+          sessionID: sid,
+          type: "tool",
+          callID: "call-1",
+          tool: "read",
+          state: {
+            status: "completed",
+            input: {},
+            output: big,
+            title: "Read",
+            metadata: {},
+            time: { start: Date.now(), end: Date.now() },
+          },
+        } satisfies MessageV2.ToolPart)
+
+        // Turn 2
+        const user2 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user2.id,
+          sessionID: sid,
+          type: "text",
+          text: "second",
+        })
+
+        // Turn 3 (latest)
+        const user3 = await Session.updateMessage({
+          id: Identifier.ascending("message"),
+          role: "user",
+          sessionID: sid,
+          agent: "default",
+          model: { providerID: "openai", modelID: "gpt-4" },
+          time: { created: Date.now() },
+        })
+        await Session.updatePart({
+          id: Identifier.ascending("part"),
+          messageID: user3.id,
+          sessionID: sid,
+          type: "text",
+          text: "third",
+        })
+
+        // Run prune with config disabled
+        await SessionCompaction.prune({ sessionID: sid })
+
+        // Verify the tool part was NOT pruned
+        const msgs = await Session.messages({ sessionID: sid })
+        const part = msgs
+          .flatMap((m) => m.parts)
+          .find((p) => p.type === "tool" && p.id === toolPart.id)
+        expect(part).toBeDefined()
+        if (part?.type === "tool" && part.state.status === "completed") {
+          expect(part.state.time.compacted).toBeUndefined()
+          expect(part.state.output).toBe(big)
+        } else {
+          throw new Error("expected completed tool part")
+        }
+
+        await Session.remove(sid)
+      },
+    })
+  })
+})
+
 describe("util.token.estimate", () => {
   test("estimates tokens from text (4 chars per token)", () => {
     const text = "x".repeat(4000)