Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions openhands-sdk/openhands/sdk/context/condenser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,68 @@ def handles_condensation_requests(self) -> bool:
"""
return False

# Shared token-budget utilities (for condenser authors)
@staticmethod
def compute_token_budget(llm, token_margin_ratio: float) -> int | None:
"""Compute usable input-token budget for a target LLM.

Returns an integer budget (>= 0) or None if limits are unknown.
"""
try:
max_input = getattr(llm, "max_input_tokens", None)
if not max_input:
return None
max_output = getattr(llm, "max_output_tokens", 0) or 0
headroom = int(max_input * token_margin_ratio)
return max(0, int(max_input) - int(max_output) - headroom)
except Exception:
return None

@staticmethod
def estimate_token_count(llm, events) -> int:
"""Estimate tokens for a sequence of LLMConvertibleEvent using the given LLM.

Falls back to 0 on failure.
"""
try:
from openhands.sdk.event.base import LLMConvertibleEvent

messages = LLMConvertibleEvent.events_to_messages(list(events))
return int(llm.get_token_count(messages))
except Exception:
return 0

@staticmethod
def max_tail_within_budget(view: View, llm, keep_first: int, budget: int) -> int:
"""Binary-search the longest tail we can keep under the token budget.

Counts tokens using the provided LLM. The head is fixed to keep_first.
Returns number of tail events to keep (>= 0).
"""
from openhands.sdk.event.base import LLMConvertibleEvent

head = view[:keep_first]
total_len = len(view)
max_tail_possible = max(0, total_len - keep_first)
low, high = 0, max_tail_possible
best = 0
while low <= high:
mid = (low + high) // 2
kept_events = list(head) + (list(view[-mid:]) if mid > 0 else [])
msgs = LLMConvertibleEvent.events_to_messages(kept_events)
try:
t = int(llm.get_token_count(msgs))
except Exception:
# If counting fails, be conservative and stop expanding
high = mid - 1
continue
if t <= budget:
best = mid
low = mid + 1
else:
high = mid - 1
return best


class PipelinableCondenserBase(CondenserBase):
"""Abstract condenser interface which may be pipelined. (Since a pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import Field, model_validator

from openhands.sdk.context.condenser.base import RollingCondenser
from openhands.sdk.context.condenser.base import CondenserBase, RollingCondenser
from openhands.sdk.context.prompts import render_template
from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
Expand All @@ -15,6 +15,9 @@ class LLMSummarizingCondenser(RollingCondenser):
llm: LLM
max_size: int = Field(default=120, gt=0)
keep_first: int = Field(default=4, ge=0)
token_margin_ratio: float = Field(
default=0.1, ge=0.0, le=0.5
) # reserve headroom of context window

@model_validator(mode="after")
def validate_keep_first_vs_max_size(self):
Expand All @@ -32,19 +35,50 @@ def handles_condensation_requests(self) -> bool:
def should_condense(self, view: View) -> bool:
if view.unhandled_condensation_request:
return True

# Prefer token-aware check when LLM has context window info and
# we can estimate message tokens. Fallback to event-count otherwise.
try:
budget = CondenserBase.compute_token_budget(
self.llm, self.token_margin_ratio
)
if budget is not None:
total_tokens = CondenserBase.estimate_token_count(self.llm, view.events)
return total_tokens > budget
except Exception:
# Any failure falls back to count-based behavior
pass

return len(view) > self.max_size

@observe(ignore_inputs=["view"])
def get_condensation(self, view: View) -> Condensation:
head = view[: self.keep_first]
target_size = self.max_size // 2
if view.unhandled_condensation_request:
# Condensation triggered by a condensation request
# should be calculated based on the view size.
target_size = len(view) // 2
# Number of events to keep from the tail -- target size, minus however many
# prefix events from the head, minus one for the summarization event
events_from_tail = target_size - len(head) - 1

# Prefer token-aware trimming if we have model limits; otherwise
# fall back to event-count based trimming as before.
events_from_tail: int | None = None
try:
budget = CondenserBase.compute_token_budget(
self.llm, self.token_margin_ratio
)
if budget is not None:
# Binary search the max tail we can keep within budget
events_from_tail = CondenserBase.max_tail_within_budget(
view=view, llm=self.llm, keep_first=self.keep_first, budget=budget
)
except Exception:
events_from_tail = None

if events_from_tail is None:
target_size = self.max_size // 2
if view.unhandled_condensation_request:
# Condensation triggered by a condensation request
# should be calculated based on the view size.
target_size = len(view) // 2
# Number of events to keep from the tail -- target size, minus however many
# prefix events from the head, minus one for the summarization event
events_from_tail = max(0, target_size - len(head) - 1)

summary_event_content: str = ""

Expand All @@ -55,7 +89,11 @@ def get_condensation(self, view: View) -> Condensation:
summary_event_content = message_content.text

# Identify events to be forgotten (those not in head or tail)
forgotten_events = view[self.keep_first : -events_from_tail]
forgotten_events = (
view[self.keep_first : -events_from_tail]
if events_from_tail > 0
else view[self.keep_first :]
)

# Convert events to strings for the template
event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
Expand Down
Loading