From c633b1ca552c290de8419b413bbced14996eeb8b Mon Sep 17 00:00:00 2001
From: vinci <xyin@zju.edu.cn>
Date: Wed, 11 Mar 2026 17:32:50 +0800
Subject: [PATCH 1/4] feat: add ContextCompressor for context overflow handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new context compression mechanism inspired by opencode's
compaction approach. Features include:
- Token-based overflow detection
- Tool output pruning to reduce context size
- LLM-based conversation summarization

🤖 Generated with [Qoder][https://qoder.com]
---
 ms_agent/memory/condenser/__init__.py         |   6 +
 .../memory/condenser/context_compressor.py    | 210 ++++++++++++++++++
 ms_agent/memory/utils.py                      |   2 +
 3 files changed, 218 insertions(+)
 create mode 100644 ms_agent/memory/condenser/context_compressor.py

diff --git a/ms_agent/memory/condenser/__init__.py b/ms_agent/memory/condenser/__init__.py
index e69de29bb..0ece6c459 100644
--- a/ms_agent/memory/condenser/__init__.py
+++ b/ms_agent/memory/condenser/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+from .code_condenser import CodeCondenser
+from .context_compressor import ContextCompressor
+from .refine_condenser import RefineCondenser
+
+__all__ = ['CodeCondenser', 'RefineCondenser', 'ContextCompressor']
diff --git a/ms_agent/memory/condenser/context_compressor.py b/ms_agent/memory/condenser/context_compressor.py
new file mode 100644
index 000000000..855157015
--- /dev/null
+++ b/ms_agent/memory/condenser/context_compressor.py
@@ -0,0 +1,210 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+Context Compressor - Inspired by opencode's context compaction mechanism.
+
+Core concepts:
+1. Token overflow detection - Monitor token usage against context limits
+2. Tool output pruning - Compress old tool call outputs to save context
+3. Summary compaction - Use LLM to generate conversation summary
+
+Reference: desktop/opencode/packages/opencode/src/session/compaction.ts
+"""
+
+from typing import List, Optional
+
+import json
+from ms_agent.llm import LLM, Message
+from ms_agent.memory import Memory
+from ms_agent.utils.logger import logger
+
+
+# Default summary prompt template (from opencode)
+SUMMARY_PROMPT = """Summarize this conversation to help continue the work.
+
+Focus on:
+- Goal: What is the user trying to accomplish?
+- Instructions: Important user requirements or constraints
+- Discoveries: Notable findings during the conversation
+- Accomplished: What's done, in progress, and remaining
+- Relevant files: Files read, edited, or created
+
+Keep it concise but comprehensive enough for another agent to continue."""
+
+
+class ContextCompressor(Memory):
+    """Context compression tool inspired by opencode's compaction mechanism.
+
+    Features:
+    1. Token-based overflow detection
+    2. Tool output pruning for old tool calls
+    3. LLM-based conversation summarization
+    """
+
+    def __init__(self, config):
+        super().__init__(config)
+        mem_config = getattr(config.memory, 'context_compressor', None)
+        if mem_config is None:
+            mem_config = config.memory
+
+        # Token thresholds (inspired by opencode's PRUNE constants)
+        self.context_limit = getattr(mem_config, 'context_limit', 128000)
+        self.prune_protect = getattr(mem_config, 'prune_protect', 40000)
+        self.prune_minimum = getattr(mem_config, 'prune_minimum', 20000)
+        self.reserved_buffer = getattr(mem_config, 'reserved_buffer', 20000)
+
+        # Summary prompt
+        self.summary_prompt = getattr(mem_config, 'summary_prompt',
+                                      SUMMARY_PROMPT)
+
+        # LLM for summarization
+        self.llm: Optional[LLM] = None
+        if getattr(mem_config, 'enable_summary', True):
+            try:
+                self.llm = LLM.from_config(config)
+            except Exception as e:
+                logger.warning(f'Failed to init LLM for summary: {e}')
+
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count from text.
+        Simple heuristic: ~4 chars per token for mixed content.
+        """
+        if not text:
+            return 0
+        return len(text) // 4
+
+    def estimate_message_tokens(self, msg: Message) -> int:
+        """Estimate tokens for a single message."""
+        total = 0
+        if msg.content:
+            content = msg.content if isinstance(msg.content,
+                                                str) else json.dumps(
+                                                    msg.content,
+                                                    ensure_ascii=False)
+            total += self.estimate_tokens(content)
+        if msg.tool_calls:
+            total += self.estimate_tokens(json.dumps(msg.tool_calls))
+        if msg.reasoning_content:
+            total += self.estimate_tokens(msg.reasoning_content)
+        return total
+
+    def estimate_total_tokens(self, messages: List[Message]) -> int:
+        """Estimate total tokens for all messages."""
+        return sum(self.estimate_message_tokens(m) for m in messages)
+
+    def is_overflow(self, messages: List[Message]) -> bool:
+        """Check if messages exceed context limit."""
+        total = self.estimate_total_tokens(messages)
+        usable = self.context_limit - self.reserved_buffer
+        return total >= usable
+
+    def prune_tool_outputs(self, messages: List[Message]) -> List[Message]:
+        """Prune old tool outputs to reduce context size.
+
+        Strategy (from opencode):
+        - Scan backwards through messages
+        - Protect the most recent tool outputs (prune_protect tokens)
+        - Truncate older tool outputs
+        """
+        result = []
+        total_tool_tokens = 0
+        pruned_count = 0
+
+        # Process in reverse to protect recent outputs
+        for msg in reversed(messages):
+            if msg.role == 'tool' and msg.content:
+                content_str = msg.content if isinstance(
+                    msg.content, str) else json.dumps(msg.content,
+                                                      ensure_ascii=False)
+                tokens = self.estimate_tokens(content_str)
+                total_tool_tokens += tokens
+
+                # Prune if beyond protection threshold
+                if total_tool_tokens > self.prune_protect:
+                    msg = Message(
+                        role=msg.role,
+                        content='[Output truncated to save context]',
+                        tool_call_id=msg.tool_call_id,
+                        name=msg.name,
+                    )
+                    pruned_count += 1
+            result.append(msg)
+
+        if pruned_count > 0:
+            logger.info(f'Pruned {pruned_count} tool outputs')
+
+        return list(reversed(result))
+
+    def summarize(self, messages: List[Message]) -> Optional[str]:
+        """Generate conversation summary using LLM."""
+        if not self.llm:
+            return None
+
+        # Build conversation text for summarization
+        conv_parts = []
+        for msg in messages:
+            role = msg.role.upper()
+            content = msg.content if isinstance(msg.content, str) else str(
+                msg.content)
+            if content:
+                conv_parts.append(f'{role}: {content[:2000]}')
+
+        conversation = '\n'.join(conv_parts)
+        query = f'{self.summary_prompt}\n\n---\n{conversation}'
+
+        try:
+            response = self.llm.generate(
+                [Message(role='user', content=query)], stream=False)
+            return response.content
+        except Exception as e:
+            logger.error(f'Summary generation failed: {e}')
+            return None
+
+    def compress(self, messages: List[Message]) -> List[Message]:
+        """Compress messages when context overflows.
+
+        Steps:
+        1. Try pruning tool outputs first
+        2. If still overflow, generate summary and replace history
+        """
+        if not self.is_overflow(messages):
+            return messages
+
+        logger.info('Context overflow detected, starting compression')
+
+        # Step 1: Prune tool outputs
+        pruned = self.prune_tool_outputs(messages)
+        if not self.is_overflow(pruned):
+            return pruned
+
+        # Step 2: Generate summary
+        summary = self.summarize(messages)
+        if not summary:
+            logger.warning('Summary failed, returning pruned messages')
+            return pruned
+
+        # Keep system prompt and replace history with summary
+        result = []
+        for msg in messages:
+            if msg.role == 'system':
+                result.append(msg)
+                break
+
+        result.append(
+            Message(
+                role='user',
+                content=f'[Conversation Summary]\n{summary}\n\n'
+                'Please continue based on this summary.'))
+
+        # Keep the most recent user message if different
+        if messages and messages[-1].role == 'user':
+            last_user = messages[-1]
+            if last_user.content and last_user.content != result[-1].content:
+                result.append(last_user)
+
+        logger.info(
+            f'Compressed {len(messages)} messages to {len(result)} messages')
+        return result
+
+    async def run(self, messages: List[Message]) -> List[Message]:
+        """Main entry point for context compression."""
+        return self.compress(messages)
diff --git a/ms_agent/memory/utils.py b/ms_agent/memory/utils.py
index ae0e1223f..b7e20ad30 100644
--- a/ms_agent/memory/utils.py
+++ b/ms_agent/memory/utils.py
@@ -2,6 +2,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 from .condenser.code_condenser import CodeCondenser
+from .condenser.context_compressor import ContextCompressor
 from .condenser.refine_condenser import RefineCondenser
 from .default_memory import DefaultMemory
 from .diversity import Diversity
@@ -11,6 +12,7 @@
     'diversity': Diversity,
     'code_condenser': CodeCondenser,
     'refine_condenser': RefineCondenser,
+    'context_compressor': ContextCompressor,
 }
 
 

From 02809649f2fc89b090a27b419b24a49ff4bc057b Mon Sep 17 00:00:00 2001
From: vinci <xyin@zju.edu.cn>
Date: Fri, 20 Mar 2026 14:50:53 +0800
Subject: [PATCH 2/4] fix lint error

---
 ms_agent/memory/condenser/context_compressor.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/ms_agent/memory/condenser/context_compressor.py b/ms_agent/memory/condenser/context_compressor.py
index 855157015..06a7ce75e 100644
--- a/ms_agent/memory/condenser/context_compressor.py
+++ b/ms_agent/memory/condenser/context_compressor.py
@@ -17,7 +17,6 @@
 from ms_agent.memory import Memory
 from ms_agent.utils.logger import logger
 
-
 # Default summary prompt template (from opencode)
 SUMMARY_PROMPT = """Summarize this conversation to help continue the work.
 
@@ -76,10 +75,9 @@ def estimate_message_tokens(self, msg: Message) -> int:
         """Estimate tokens for a single message."""
         total = 0
         if msg.content:
-            content = msg.content if isinstance(msg.content,
-                                                str) else json.dumps(
-                                                    msg.content,
-                                                    ensure_ascii=False)
+            content = msg.content if isinstance(
+                msg.content, str) else json.dumps(
+                    msg.content, ensure_ascii=False)
             total += self.estimate_tokens(content)
         if msg.tool_calls:
             total += self.estimate_tokens(json.dumps(msg.tool_calls))
@@ -113,8 +111,8 @@ def prune_tool_outputs(self, messages: List[Message]) -> List[Message]:
         for msg in reversed(messages):
             if msg.role == 'tool' and msg.content:
                 content_str = msg.content if isinstance(
-                    msg.content, str) else json.dumps(msg.content,
-                                                      ensure_ascii=False)
+                    msg.content, str) else json.dumps(
+                        msg.content, ensure_ascii=False)
                 tokens = self.estimate_tokens(content_str)
                 total_tool_tokens += tokens
 
@@ -152,8 +150,8 @@ def summarize(self, messages: List[Message]) -> Optional[str]:
         query = f'{self.summary_prompt}\n\n---\n{conversation}'
 
         try:
-            response = self.llm.generate(
-                [Message(role='user', content=query)], stream=False)
+            response = self.llm.generate([Message(role='user', content=query)],
+                                         stream=False)
             return response.content
         except Exception as e:
             logger.error(f'Summary generation failed: {e}')

From 0702ce84546bb74f345f145240adde9e63ba97e2 Mon Sep 17 00:00:00 2001
From: "vinci.grape" <xyin@zju.edu.cn>
Date: Mon, 23 Mar 2026 17:41:41 +0800
Subject: [PATCH 3/4] fix typos

---
 .../memory/condenser/context_compressor.py    | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/ms_agent/memory/condenser/context_compressor.py b/ms_agent/memory/condenser/context_compressor.py
index 06a7ce75e..88ab8f997 100644
--- a/ms_agent/memory/condenser/context_compressor.py
+++ b/ms_agent/memory/condenser/context_compressor.py
@@ -1,14 +1,4 @@
 # Copyright (c) ModelScope Contributors. All rights reserved.
-"""
-Context Compressor - Inspired by opencode's context compaction mechanism.
-
-Core concepts:
-1. Token overflow detection - Monitor token usage against context limits
-2. Tool output pruning - Compress old tool call outputs to save context
-3. Summary compaction - Use LLM to generate conversation summary
-
-Reference: desktop/opencode/packages/opencode/src/session/compaction.ts
-"""
 
 from typing import List, Optional
 
@@ -31,12 +21,14 @@
 
 
 class ContextCompressor(Memory):
-    """Context compression tool inspired by opencode's compaction mechanism.
+    """Context Compressor - Inspired by opencode's context compaction mechanism.
+
+    Core concepts:
+    1. Token overflow detection - Monitor token usage against context limits
+    2. Tool output pruning - Compress old tool call outputs to save context
+    3. Summary compaction - Use LLM to generate conversation summary
 
-    Features:
-    1. Token-based overflow detection
-    2. Tool output pruning for old tool calls
-    3. LLM-based conversation summarization
+    Reference: opencode/packages/opencode/src/session/compaction.ts
     """
 
     def __init__(self, config):

From 89d7e4b69a591cb4b81255b27b9c535d6ac7b048 Mon Sep 17 00:00:00 2001
From: "vinci.grape" <xyin@zju.edu.cn>
Date: Mon, 23 Mar 2026 20:15:26 +0800
Subject: [PATCH 4/4] fix memory

---
 .../memory/condenser/context_compressor.py    | 67 ++++++++++++-------
 1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/ms_agent/memory/condenser/context_compressor.py b/ms_agent/memory/condenser/context_compressor.py
index 88ab8f997..9bec9bcf1 100644
--- a/ms_agent/memory/condenser/context_compressor.py
+++ b/ms_agent/memory/condenser/context_compressor.py
@@ -63,8 +63,8 @@ def estimate_tokens(self, text: str) -> int:
             return 0
         return len(text) // 4
 
-    def estimate_message_tokens(self, msg: Message) -> int:
-        """Estimate tokens for a single message."""
+    def _estimate_message_tokens_from_content(self, msg: Message) -> int:
+        """Heuristic token count from message body (no API usage fields)."""
         total = 0
         if msg.content:
             content = msg.content if isinstance(
@@ -77,8 +77,34 @@ def estimate_message_tokens(self, msg: Message) -> int:
             total += self.estimate_tokens(msg.reasoning_content)
         return total
 
+    def estimate_message_tokens(self, msg: Message) -> int:
+        """Tokens for one message: prefer ``Message`` usage, else content heuristic."""
+        pt = int(getattr(msg, 'prompt_tokens', 0) or 0)
+        ct = int(getattr(msg, 'completion_tokens', 0) or 0)
+        if pt or ct:
+            return pt + ct
+        return self._estimate_message_tokens_from_content(msg)
+
     def estimate_total_tokens(self, messages: List[Message]) -> int:
-        """Estimate total tokens for all messages."""
+        """Total tokens for the conversation."""
+        last_usage_idx = -1
+        for i in range(len(messages) - 1, -1, -1):
+            m = messages[i]
+            if m.role != 'assistant':
+                continue
+            pt = int(getattr(m, 'prompt_tokens', 0) or 0)
+            ct = int(getattr(m, 'completion_tokens', 0) or 0)
+            if pt or ct:
+                last_usage_idx = i
+                break
+        if last_usage_idx >= 0:
+            m = messages[last_usage_idx]
+            base = int(getattr(m, 'prompt_tokens', 0) or 0) + int(
+                getattr(m, 'completion_tokens', 0) or 0)
+            tail = sum(
+                self._estimate_message_tokens_from_content(x)
+                for x in messages[last_usage_idx + 1:])
+            return base + tail
         return sum(self.estimate_message_tokens(m) for m in messages)
 
     def is_overflow(self, messages: List[Message]) -> bool:
@@ -95,34 +121,27 @@ def prune_tool_outputs(self, messages: List[Message]) -> List[Message]:
         - Protect the most recent tool outputs (prune_protect tokens)
         - Truncate older tool outputs
         """
-        result = []
         total_tool_tokens = 0
         pruned_count = 0
 
-        # Process in reverse to protect recent outputs
-        for msg in reversed(messages):
-            if msg.role == 'tool' and msg.content:
-                content_str = msg.content if isinstance(
-                    msg.content, str) else json.dumps(
-                        msg.content, ensure_ascii=False)
-                tokens = self.estimate_tokens(content_str)
-                total_tool_tokens += tokens
-
-                # Prune if beyond protection threshold
-                if total_tool_tokens > self.prune_protect:
-                    msg = Message(
-                        role=msg.role,
-                        content='[Output truncated to save context]',
-                        tool_call_id=msg.tool_call_id,
-                        name=msg.name,
-                    )
-                    pruned_count += 1
-            result.append(msg)
+        for idx in range(len(messages) - 1, -1, -1):
+            msg = messages[idx]
+            if msg.role != 'tool' or not msg.content:
+                continue
+            content_str = msg.content if isinstance(
+                msg.content, str) else json.dumps(
+                    msg.content, ensure_ascii=False)
+            tokens = self.estimate_tokens(content_str)
+            total_tool_tokens += tokens
+
+            if total_tool_tokens > self.prune_protect:
+                msg.content = '[Output truncated to save context]'
+                pruned_count += 1
 
         if pruned_count > 0:
             logger.info(f'Pruned {pruned_count} tool outputs')
 
-        return list(reversed(result))
+        return messages
 
     def summarize(self, messages: List[Message]) -> Optional[str]:
         """Generate conversation summary using LLM."""