Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,60 @@ const plugin: Plugin = (async (ctx) => {
primary_tools: [...existingPrimaryTools, "context_prune"],
}
logger.info("Added 'context_prune' to experimental.primary_tools via config mutation")

// Disable OpenCode's built-in compaction and tool output pruning.
// ACP plugin takes full ownership of context management via the
// experimental.chat.messages.transform hook. Without this, OpenCode's
// own compaction (summarize + drop history) and pruning (replace old
// tool outputs with "[Old tool result content cleared]") conflict with
// the plugin's more granular pruning strategies.
;(opencodeConfig as Record<string, unknown>).compaction = { auto: false, prune: false }
logger.info("Disabled OpenCode built-in compaction — ACP manages context")
},
// Last-resort safety net: if compaction triggers despite being disabled
// (e.g., user re-enables it in their config), inject plugin state so the
// summary preserves critical context about what we've been tracking.
"experimental.session.compacting": async (
_input: { sessionID: string },
output: { context: string[]; prompt?: string },
) => {
const contextLines: string[] = []

// Inject active todo state
const activeTodos = state.todos.filter(
(t) => t.status === "in_progress" || t.status === "pending",
)
if (activeTodos.length > 0) {
contextLines.push("## Active Tasks")
for (const todo of activeTodos) {
contextLines.push(`- [${todo.status}] ${todo.content} (${todo.priority})`)
}
}

// Inject tracked file paths
const trackedFiles = Array.from(state.cursors.files.pathToCallIds.keys())
if (trackedFiles.length > 0) {
contextLines.push("## Files Being Tracked")
contextLines.push(trackedFiles.slice(0, 20).join("\n"))
}

// Inject pruning stats
contextLines.push("## Context Management Stats")
contextLines.push(`- Total tokens saved by ACP plugin: ${state.stats.totalPruneTokens}`)
contextLines.push(`- Tool outputs pruned: ${state.prune.toolIds.length}`)
contextLines.push(`- Current turn: ${state.currentTurn}`)

if (state.contextPressure) {
contextLines.push(
`- Context pressure: ${state.contextPressure.contextPercent}% (${state.contextPressure.statusLabel})`,
)
}

output.context.push(contextLines.join("\n"))
logger.info("Injected ACP state into compaction context", {
activeTodos: activeTodos.length,
trackedFiles: trackedFiles.length,
})
},
}
}) satisfies Plugin
Expand Down
6 changes: 6 additions & 0 deletions lib/config/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,11 @@ export const DEFAULT_CONFIG: PluginConfig = {
stateQuerySupersede: true,
truncateOldErrors: true,
},
tokenBudget: {
warningThreshold: 0.7,
criticalThreshold: 0.85,
targetPercent: 0.6,
protectedRecentTurns: 2,
},
},
}
43 changes: 43 additions & 0 deletions lib/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,51 @@ export const AggressivePruningSchema = z.object({
.describe("Truncate old error outputs to first line only, removing stack traces"),
})

export const TokenBudgetSchema = z.object({
/** Warning threshold — start proactive pruning of tool outputs (0.0-1.0) */
warningThreshold: z
.number()
.min(0)
.max(1)
.default(0.7)
.describe(
"Context usage percentage that triggers proactive tool output pruning (0.7 = 70%)",
),
/** Critical threshold — also prune reasoning blocks (0.0-1.0) */
criticalThreshold: z
.number()
.min(0)
.max(1)
.default(0.85)
.describe("Context usage percentage that triggers reasoning block pruning (0.85 = 85%)"),
/** Target percentage to prune down to */
targetPercent: z
.number()
.min(0)
.max(1)
.default(0.6)
.describe("Target context usage percentage after proactive pruning (0.6 = 60%)"),
/** Override model context window size (tokens). If set, ignores auto-detection. */
modelContextOverride: z
.number()
.positive()
.optional()
.describe(
"Override model context window size in tokens. If set, ignores auto-detection from model ID",
),
/** Number of recent turns protected from proactive pruning */
protectedRecentTurns: z
.number()
.int()
.min(0)
.default(2)
.describe("Number of recent turns protected from proactive pruning"),
})

export const StrategiesSchema = z.object({
purgeErrors: PurgeErrorsSchema,
aggressivePruning: AggressivePruningSchema,
tokenBudget: TokenBudgetSchema,
})

export const PluginConfigSchema = z.object({
Expand Down Expand Up @@ -305,5 +347,6 @@ export type Tools = z.infer<typeof ToolsSchema>
export type Commands = z.infer<typeof CommandsSchema>
export type PurgeErrors = z.infer<typeof PurgeErrorsSchema>
export type AggressivePruning = z.infer<typeof AggressivePruningSchema>
export type TokenBudget = z.infer<typeof TokenBudgetSchema>
export type Strategies = z.infer<typeof StrategiesSchema>
export type PluginConfig = z.infer<typeof PluginConfigSchema>
3 changes: 2 additions & 1 deletion lib/hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { Logger } from "./logger"
import type { PluginConfig } from "./config"
import type { OpenCodeClient } from "./client"
import { syncSessionState } from "./state/index"
import { purgeErrors } from "./strategies"
import { purgeErrors, proactivePrune } from "./strategies"
import {
prune,
injectHashesIntoToolOutputs,
Expand Down Expand Up @@ -39,6 +39,7 @@ type Strategy = (

const PRUNE_STRATEGIES: Record<string, Strategy> = {
purgeErrors,
proactivePrune,
prune,
}

Expand Down
1 change: 1 addition & 0 deletions lib/strategies/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ export { PruneToolContext } from "./_types"

// Strategy implementations
export { purgeErrors } from "./purge-errors"
export { proactivePrune } from "./proactive-prune"

// Tool operations
export {
Expand Down
234 changes: 234 additions & 0 deletions lib/strategies/proactive-prune.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import { PluginConfig } from "../config"
import { Logger } from "../logger"
import type { SessionState, WithParts } from "../state"
import { buildToolIdList } from "../messages/utils"
import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns"
import { calculateTokensSaved, countTokens } from "./utils"
import { getPruneCache } from "../state/utils"
import { isMessageCompacted } from "../shared-utils"

/**
* Proactive Prune strategy — replaces OpenCode's built-in PRUNE_PROTECT mechanism.
*
* When the plugin disables OpenCode's compaction (auto: false, prune: false),
* this strategy takes ownership of keeping context within budget.
*
* Thresholds (based on contextPressure.contextPercent):
* - 70-84%: Prune oldest tool outputs (largest first, skip recent 2 turns)
* - 85%+: Also prune reasoning blocks from older messages
*
* This runs as part of the PRUNE_STRATEGIES pipeline in hooks.ts,
* executing on every turn via experimental.chat.messages.transform.
*/

/** Minimum tokens a tool output must have to be worth proactive pruning */
const MIN_PRUNE_TOKENS = 200

export const proactivePrune = (
state: SessionState,
logger: Logger,
config: PluginConfig,
messages: WithParts[],
): void => {
const pressure = state.contextPressure
const budget = config.strategies.tokenBudget
const warningPercent = Math.round(budget.warningThreshold * 100)
const criticalPercent = Math.round(budget.criticalThreshold * 100)
const targetPercent = Math.round(budget.targetPercent * 100)
const protectedRecentTurns = budget.protectedRecentTurns

if (!pressure || pressure.contextPercent < warningPercent) {
return
}

logger.info("Proactive prune triggered", {
percent: pressure.contextPercent,
tokens: pressure.contextTokens,
limit: pressure.effectiveLimit,
})

const protectedTools = config.tools.settings.protectedTools
const { prunedToolIds, prunedReasoningPartIds } = getPruneCache(state)

// Calculate how many tokens we need to free
const targetTokens = Math.floor(pressure.effectiveLimit * (targetPercent / 100))
let tokensToFree = pressure.contextTokens - targetTokens
if (tokensToFree <= 0) return

let totalFreed = 0

// Phase 1: Prune oldest tool outputs (largest first, skip recent turns)
const toolCandidates = collectToolCandidates(
state,
messages,
protectedTools,
prunedToolIds,
config,
protectedRecentTurns,
)

for (const candidate of toolCandidates) {
if (totalFreed >= tokensToFree) break

state.prune.toolIds.push(candidate.callId)
state.stats.totalPruneTokens += candidate.tokens
state.stats.totalPruneMessages += 1
state.stats.strategyStats.autoSupersede.context.count += 1
state.stats.strategyStats.autoSupersede.context.tokens += candidate.tokens
totalFreed += candidate.tokens

logger.debug(`Proactive-pruned tool ${candidate.toolName} (${candidate.tokens} tokens)`, {
callId: candidate.callId,
})
}

// Phase 2: If still over critical threshold, prune reasoning blocks
if (pressure.contextPercent >= criticalPercent && totalFreed < tokensToFree) {
const reasoningCandidates = collectReasoningCandidates(
state,
messages,
prunedReasoningPartIds,
protectedRecentTurns,
)

for (const candidate of reasoningCandidates) {
if (totalFreed >= tokensToFree) break

state.prune.reasoningPartIds.push(candidate.partId)
state.stats.totalPruneTokens += candidate.tokens
state.stats.strategyStats.manualDiscard.thinking.count += 1
state.stats.strategyStats.manualDiscard.thinking.tokens += candidate.tokens
totalFreed += candidate.tokens

logger.debug(`Proactive-pruned reasoning block (${candidate.tokens} tokens)`, {
partId: candidate.partId,
})
}
}

if (totalFreed > 0) {
// Invalidate cache since we modified prune arrays
state._cache = undefined

logger.info("Proactive prune complete", {
tokensFreed: totalFreed,
targetFreed: tokensToFree,
newEstimatedTokens: pressure.contextTokens - totalFreed,
})
}
}

interface ToolCandidate {
callId: string
toolName: string
tokens: number
turn: number
}

/**
* Collect tool output candidates for pruning, sorted by token count descending.
* Skips protected tools, recent turns, already-pruned, and small outputs.
*/
function collectToolCandidates(
state: SessionState,
messages: WithParts[],
protectedTools: string[],
prunedToolIds: Set<string>,
config: PluginConfig,
protectedRecentTurns: number,
): ToolCandidate[] {
const candidates: ToolCandidate[] = []
const recentTurnThreshold = state.currentTurn - protectedRecentTurns

for (const msg of messages) {
if (isMessageCompacted(state, msg)) continue

const parts = Array.isArray(msg.parts) ? msg.parts : []
for (const part of parts) {
if (part.type !== "tool" || !part.callID) continue
if (prunedToolIds.has(part.callID)) continue
if (protectedTools.includes(part.tool)) continue
if (part.state?.status !== "completed") continue

// Check turn age
const metadata = state.toolParameters.get(part.callID)
if (metadata && metadata.turn > recentTurnThreshold) continue

// Check protected file paths
if (metadata) {
const filePath = getFilePathFromParameters(metadata.parameters)
if (isProtectedFilePath(filePath, config.protectedFilePatterns)) continue
}

// Estimate token count
const output = part.state.output
if (!output) continue
const content = typeof output === "string" ? output : JSON.stringify(output)
const tokens = countTokens(content)
if (tokens < MIN_PRUNE_TOKENS) continue

candidates.push({
callId: part.callID,
toolName: part.tool,
tokens,
turn: metadata?.turn ?? 0,
})
}
}

// Sort: oldest first, then largest first within same turn
return candidates.sort((a, b) => {
if (a.turn !== b.turn) return a.turn - b.turn
return b.tokens - a.tokens
})
}

interface ReasoningCandidate {
partId: string
tokens: number
turn: number
}

/**
* Collect reasoning block candidates for pruning, sorted oldest-first then largest-first.
* Skips recent turns and already-pruned blocks.
*/
function collectReasoningCandidates(
state: SessionState,
messages: WithParts[],
prunedReasoningPartIds: Set<string>,
protectedRecentTurns: number,
): ReasoningCandidate[] {
const candidates: ReasoningCandidate[] = []
const recentTurnThreshold = state.currentTurn - protectedRecentTurns

// Estimate turn from message position (messages are chronological)
let estimatedTurn = 0
for (const msg of messages) {
if (msg.info.role === "user") estimatedTurn++
if (isMessageCompacted(state, msg)) continue
if (msg.info.role !== "assistant") continue

// Skip recent turns
if (estimatedTurn > recentTurnThreshold) continue

const parts = Array.isArray(msg.parts) ? msg.parts : []
for (let partIndex = 0; partIndex < parts.length; partIndex++) {
const part = parts[partIndex]
if (!part || part.type !== "reasoning" || !part.text) continue

const partId = `${msg.info.id}:${partIndex}`
if (prunedReasoningPartIds.has(partId)) continue

const tokens = countTokens(part.text)
if (tokens < MIN_PRUNE_TOKENS) continue

candidates.push({ partId, tokens, turn: estimatedTurn })
}
}

return candidates.sort((a, b) => {
if (a.turn !== b.turn) return a.turn - b.turn
return b.tokens - a.tokens
})
}
Loading
Loading