Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 45 additions & 76 deletions lib/messages/prune.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,14 @@ function createPrunedPlaceholder(originalText: string): string {
function createPrunedToolPlaceholder(toolName: string): string {
return `[${toolName}() output pruned]`
}
// Hash tag names for trailing format
const TOOL_HASH_TAG = "tool_hash"
const MESSAGE_HASH_TAG = "message_hash"
const REASONING_HASH_TAG = "reasoning_hash"

/** Create trailing hash tag */
const createHashTag = (tagName: string, hash: string): string =>
`\n<${tagName}>${hash}</${tagName}>`

/** Check if content already has hash tag with specific hash anywhere in content */
const hasHashTag = (content: string, tagName: string, hash: string): boolean => {
const regex = new RegExp(`<${tagName}>${hash}</${tagName}>`, "i")
/** Self-closing hash reference: \n<acp:type prunable_hash="x"/> */
const createHashRef = (type: string, hash: string): string =>
`\n<acp:${type} prunable_hash="${hash}"/>`

/** Check if content already has an ACP hash tag with specific type and hash */
const hasHashTag = (content: string, type: string, hash: string): boolean => {
const escaped = hash.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
const regex = new RegExp(`<acp:${type}\\s+prunable_hash="${escaped}"`, "i")
return regex.test(content)
}

Expand Down Expand Up @@ -226,13 +222,13 @@ export const injectHashesIntoToolOutputs = (

// Skip if already has hash prefix (format: xxxxxx - 6 hex chars)
// Skip if already has this specific hash tag anywhere in content
if (part.state.output && hasHashTag(part.state.output, TOOL_HASH_TAG, hash)) {
if (part.state.output && hasHashTag(part.state.output, "tool", hash)) {
continue
}

// Append trailing hash tag
// Append self-closing hash ref to tool output
if (part.state.output) {
part.state.output = `${part.state.output}${createHashTag(TOOL_HASH_TAG, hash)}`
part.state.output = `${part.state.output}${createHashRef("tool", hash)}`
logger.debug(`Injected hash ${hash} into ${part.tool} output`)
}
}
Expand Down Expand Up @@ -354,18 +350,20 @@ export const injectHashesIntoAssistantMessages = (
logger.debug(`Generated hash ${segmentHash} for segment ${segmentId}`)
}

const hashTag = `<${tag.tagName}_hash>${segmentHash}</${tag.tagName}_hash>`
const injectionPoint = tag.end + offsetShift
const openTag = `<${tag.tagName}>`
const newOpenTag = `<${tag.tagName} prunable_hash="${segmentHash}">`
// Replace the opening tag with the attributed version
const openTagStart = tag.start + offsetShift
newText =
newText.slice(0, injectionPoint) +
hashTag +
newText.slice(injectionPoint)
offsetShift += hashTag.length
newText.slice(0, openTagStart) +
newOpenTag +
newText.slice(openTagStart + openTag.length)
offsetShift += newOpenTag.length - openTag.length
}
part.text = newText
}

part.text = `${part.text}${createHashTag(MESSAGE_HASH_TAG, hash)}`
part.text = `${part.text}${createHashRef("message", hash)}`
logger.debug(`Injected hash ${hash} into assistant text part`)
} else {
logger.debug(`Registered hash ${hash} for assistant text part (no injection)`)
Expand All @@ -388,11 +386,11 @@ export const injectHashesIntoAssistantMessages = (
if (lastToolPart) {
const toolState = (lastToolPart as any).state
const hashesToInject = messageHashes.filter(
(hash) => !hasHashTag(toolState.output, MESSAGE_HASH_TAG, hash),
(hash) => !hasHashTag(toolState.output, "message", hash),
)
if (hashesToInject.length > 0) {
const tags = hashesToInject
.map((hash) => createHashTag(MESSAGE_HASH_TAG, hash))
.map((hash) => createHashRef("message", hash))
.join("")
toolState.output = `${toolState.output}${tags}`
logger.debug(
Expand Down Expand Up @@ -509,7 +507,7 @@ export const injectHashesIntoReasoningBlocks = (
// 3) synthetic text part (last resort)
if (reasoningHashes.length > 0) {
const hashTags = reasoningHashes
.map((hash) => createHashTag(REASONING_HASH_TAG, hash))
.map((hash) => createHashRef("reasoning", hash))
.join("")

// Primary: inject into last completed tool output (never stripped)
Expand All @@ -526,11 +524,11 @@ export const injectHashesIntoReasoningBlocks = (
const toolState = (lastToolPart as any).state
// Filter hashes already present in tool output
const hashesToInject = reasoningHashes.filter(
(hash) => !hasHashTag(toolState.output, REASONING_HASH_TAG, hash),
(hash) => !hasHashTag(toolState.output, "reasoning", hash),
)
if (hashesToInject.length > 0) {
const tags = hashesToInject
.map((hash) => createHashTag(REASONING_HASH_TAG, hash))
.map((hash) => createHashRef("reasoning", hash))
.join("")
toolState.output = `${toolState.output}${tags}`
logger.debug(
Expand All @@ -546,11 +544,11 @@ export const injectHashesIntoReasoningBlocks = (

if (firstTextPart) {
const hashesToInject = reasoningHashes.filter(
(hash) => !hasHashTag(firstTextPart.text, REASONING_HASH_TAG, hash),
(hash) => !hasHashTag(firstTextPart.text, "reasoning", hash),
)
if (hashesToInject.length > 0) {
const tags = hashesToInject
.map((hash) => createHashTag(REASONING_HASH_TAG, hash))
.map((hash) => createHashRef("reasoning", hash))
.join("")
firstTextPart.text = `${firstTextPart.text}${tags}`
logger.debug(
Expand Down Expand Up @@ -746,10 +744,10 @@ export const prune = (
if (prunedSegmentIds.size > 0) {
let text = part.text || ""

// Scan for all segment hash tags in the text
// Scan for all segment tags with prunable_hash attribute
const segmentHashMatches = Array.from(
text.matchAll(
/<([a-zA-Z0-9_]+)_hash>([a-f0-9]{6}(?:_\d+)?)<\/(\1)_hash>/gi,
/<([a-zA-Z0-9_]+)\s+prunable_hash="([a-f0-9]{6}(?:_\d+)?)">([\s\S]*?)<\/\1>/gi,
),
)

Expand All @@ -761,48 +759,20 @@ export const prune = (

const tagName = match[1]
const segmentHash = match[2]
const fullHashTag = match[0]
const hashTagIndex = match.index

if (
tagName &&
segmentHash &&
fullHashTag &&
prunedSegmentIds.has(segmentHash)
) {
// Find the preceding tag of the same type
const closingTag = `</${tagName}>`
const closingTagIndex = text.lastIndexOf(closingTag, hashTagIndex)

if (
closingTagIndex !== -1 &&
closingTagIndex + closingTag.length === hashTagIndex
) {
const openingTag = `<${tagName}>`
const openingTagIndex = text.lastIndexOf(
openingTag,
closingTagIndex,
)

if (openingTagIndex !== -1) {
// Found the full segment: openingTag...closingTag + hashTag
const segmentContent = text.substring(
openingTagIndex + openingTag.length,
closingTagIndex,
)
const placeholder = `[${tagName} pruned: ${segmentContent
.trim()
.substring(0, 10)}...]`

text =
text.slice(0, openingTagIndex) +
placeholder +
text.slice(hashTagIndex + fullHashTag.length)
logger.debug(
`Pruned segment ${segmentHash} from part ${partId}`,
)
}
}
const segmentContent = match[3]
const fullMatch = match[0]
const matchIndex = match.index

if (tagName && segmentHash && prunedSegmentIds.has(segmentHash)) {
const placeholder = `[${tagName} pruned: ${(segmentContent || "")
.trim()
.substring(0, 10)}...]`

text =
text.slice(0, matchIndex) +
placeholder +
text.slice(matchIndex + fullMatch.length)
logger.debug(`Pruned segment ${segmentHash} from part ${partId}`)
}
}
part.text = text
Expand Down Expand Up @@ -858,10 +828,9 @@ export function stripAllHashTagsFromMessages(
for (const part of parts) {
if (!part) continue

// Text parts: preserve reasoning_hash and message_hash for LLM visibility
// Only strip tool_hash (already visible in tool outputs) and segment hashes
// Text parts: strip all hash tags (LLM sees hashes via refs in tool outputs)
if (part.type === "text" && typeof part.text === "string") {
const stripped = stripHashTagsSelective(part.text, ["reasoning", "message"])
const stripped = stripHashTags(part.text)
if (stripped !== part.text) {
part.text = stripped
totalStripped++
Expand Down
2 changes: 1 addition & 1 deletion lib/messages/todo-reminder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ I've noticed your todo list hasn't been updated for {turns} turns. Before contin
### 1. Reflect — What changed? Any new risks or blockers?
### 2. Update — Call \`todowrite\` to sync progress
### 3. Prune — Call \`context\` to discard/distill noise
Use hash tags from outputs (\`<tool_hash>\`, \`<message_hash>\`, \`<reasoning_hash>\`) to target content.
Use prunable_hash values from \`<acp:tool>\`, \`<acp:message>\`, \`<acp:reasoning>\` tags to target content.
{stuck_task_guidance}
---
`
Expand Down
16 changes: 7 additions & 9 deletions lib/prompts/context-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ All hash tags appear in **tool outputs** (the primary visible channel):

| Hash Type | What It Targets | Where You Find It |
|-----------|----------------|-------------------|
| \`<tool_hash>\` | Tool call output (read, glob, bash, etc.) | In that tool's output |
| \`<reasoning_hash>\` | Thinking/reasoning block | In the last tool output of the same response |
| \`<message_hash>\` | Assistant text response | In the last tool output of the same response |
| \`<acp:tool prunable_hash>\` | Tool call output (read, glob, bash, etc.) | Wraps the tool's output |
| \`<acp:reasoning prunable_hash>\` | Thinking/reasoning block | Self-closing ref in last tool output |
| \`<acp:message prunable_hash>\` | Assistant text response | Self-closing ref in last tool output |

All hash types use the same 6-char hex format (e.g., \`a1b2c3\`). Use any hash with discard/distill.

Expand All @@ -29,13 +29,11 @@ All hash types use the same 6-char hex format (e.g., \`a1b2c3\`). Use any hash w

Assistant: Here is my analysis...

[glob: found 47 files in src/]
<tool_hash>a1b2c3</tool_hash>
<acp:tool prunable_hash="a1b2c3">[glob: found 47 files in src/]</acp:tool>

[read: auth.ts - 200 lines of code]
<tool_hash>d4e5f6</tool_hash>
<reasoning_hash>abc123</reasoning_hash>
<message_hash>fed987</message_hash>
<acp:tool prunable_hash="d4e5f6">[read: auth.ts - 200 lines of code]</acp:tool>
<acp:reasoning prunable_hash="abc123"/>
<acp:message prunable_hash="fed987"/>

Detailed findings from analysis:
- Authentication: Currently using sessions...
Expand Down
80 changes: 62 additions & 18 deletions lib/state/hash-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,47 +168,91 @@ export class UnifiedHashRegistry {
}

/**
* Regex for detecting any *_hash XML tag pattern
* Matches: <anything_hash>xxxxxx</anything_hash> or <anything_hash>xxxxxx_N</anything_hash>
* Captures: type and hash value (including collision suffix)
* Supports collision suffix (_2, _3, etc.) for hash deduplication
* ACP hash tag regexes.
*
* Two families:
* Namespaced (tool/message/reasoning wrappers created by ACP):
* <acp:type prunable_hash="xxxxxx">content</acp:type> (wrapping)
* <acp:type prunable_hash="xxxxxx"/> (self-closing ref)
*
* Plain attribute (segments — existing XML tags in content):
* <file prunable_hash="xxxxxx">content</file> (attribute on existing tag)
*
* Supports collision suffix (_2, _3, etc.) for hash deduplication.
*/
export const HASH_TAG_REGEX = /<([a-zA-Z_][a-zA-Z0-9_]*)_hash>([a-f0-9]{6}(?:_\d+)?)<\/\1_hash>/gi

/** Namespaced wrapping: <acp:type prunable_hash="xxxxxx">...</acp:type> */
export const ACP_WRAP_REGEX =
/<acp:([a-zA-Z_][a-zA-Z0-9_]*)\s+prunable_hash="([a-f0-9]{6}(?:_\d+)?)">([\s\S]*?)<\/acp:\1>/gi

/** Namespaced self-closing: <acp:type prunable_hash="xxxxxx"/> */
export const ACP_REF_REGEX =
/<acp:([a-zA-Z_][a-zA-Z0-9_]*)\s+prunable_hash="([a-f0-9]{6}(?:_\d+)?)"\s*\/>/gi

/** Plain attribute on existing tags: <tag prunable_hash="xxxxxx">...</tag> (NOT acp: prefixed) */
export const ATTR_HASH_REGEX =
/<(?!acp:)([a-zA-Z_][a-zA-Z0-9_]*)\s+prunable_hash="([a-f0-9]{6}(?:_\d+)?)">([\s\S]*?)<\/\1>/gi

/** Combined: matches all three formats (for extraction/detection) */
export const ALL_HASH_REGEX =
/<(?:acp:)?([a-zA-Z_][a-zA-Z0-9_]*)\s+prunable_hash="([a-f0-9]{6}(?:_\d+)?)"(?:\s*\/>|>([\s\S]*?)<\/(?:acp:)?\1>)/gi

/**
* Strip all *_hash tags from content
* Strip all hash tags from content.
* - Namespaced wrappers: unwrapped (inner content preserved, acp tags removed)
* - Namespaced self-closing refs: removed entirely
* - Plain attribute tags: attribute removed, tag structure preserved
*/
export function stripHashTags(content: string): string {
return content.replace(HASH_TAG_REGEX, "")
// 1. Unwrap namespaced wrappers: <acp:type prunable_hash="x">content</acp:type> → content
let result = content.replace(ACP_WRAP_REGEX, "$3")
// 2. Remove namespaced self-closing refs: <acp:type prunable_hash="x"/> → ""
result = result.replace(ACP_REF_REGEX, "")
// 3. Strip attribute from plain tags: <tag prunable_hash="x">content</tag> → <tag>content</tag>
result = result.replace(ATTR_HASH_REGEX, "<$1>$3</$1>")
return result
}

/**
* Strip *_hash tags from content, but preserve specified types.
* Strip hash tags from content, but preserve specified types.
* @param content - The text content to process
* @param keepTypes - Array of type prefixes to preserve (e.g., ["reasoning", "message"])
* These match the prefix before "_hash" in the tag name.
* @returns Content with non-kept hash tags removed
* @param keepTypes - Array of type names to preserve (e.g., ["reasoning", "message"])
* @returns Content with non-kept hash tags unwrapped/removed
*/
export function stripHashTagsSelective(content: string, keepTypes: string[]): string {
const keepSet = new Set(keepTypes.map((t) => t.toLowerCase()))
return content.replace(HASH_TAG_REGEX, (match, type: string) => {
if (keepSet.has(type.toLowerCase())) {
return match
}
// 1. Unwrap namespaced wrappers, but preserve kept types entirely
let result = content.replace(
ACP_WRAP_REGEX,
(match, type: string, _hash: string, inner: string) => {
if (keepSet.has(type.toLowerCase())) return match
return inner
},
)
// 2. Remove namespaced self-closing refs, but preserve kept types
result = result.replace(ACP_REF_REGEX, (match, type: string) => {
if (keepSet.has(type.toLowerCase())) return match
return ""
})
// 3. Strip attribute from plain tags, but preserve kept types
result = result.replace(ATTR_HASH_REGEX, (match, tag: string, _hash: string, inner: string) => {
if (keepSet.has(tag.toLowerCase())) return match
return `<${tag}>${inner}</${tag}>`
})
return result
}

/**
* Extract hash entries from content
* Returns array of detected hash entries with type and hash
* Extract hash entries from content.
* Parses all formats: namespaced wrapping, self-closing, and plain attribute tags.
* Returns array of detected hash entries with type and hash.
*/
export function extractHashTags(
content: string,
): Array<{ type: string; hash: string; position: number }> {
const results: Array<{ type: string; hash: string; position: number }> = []

for (const match of content.matchAll(HASH_TAG_REGEX)) {
for (const match of content.matchAll(ALL_HASH_REGEX)) {
const type = match[1]
const hash = match[2]
if (type && hash) {
Expand Down
Loading
Loading