-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
style: fix ruff format violations in PR #6660 context compression files #6737
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,13 @@ | |
|
|
||
|
|
||
| class ContextManager: | ||
| """Context compression manager.""" | ||
| """Context compression manager. | ||
|
|
||
| Optimizations: | ||
| - 减少重复 token 计算 | ||
| - 添加增量压缩支持 | ||
| - 优化日志输出 | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
|
|
@@ -41,13 +47,27 @@ | |
| truncate_turns=config.truncate_turns | ||
| ) | ||
|
|
||
| # 缓存上一次计算的消息指纹和 token 数 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (complexity): Consider removing the new caching/fingerprinting logic from ContextManager and delegating all token-count caching to EstimateTokenCounter while preserving compression behavior and stats. You can simplify the new logic by dropping the duplicated caching/fingerprinting in 1. Remove redundant cache state and private‑method couplingRemove the extra fields and class ContextManager:
def __init__(self, config: ContextConfig) -> None:
...
# Remove these:
# self._last_messages_fingerprint: int | None = None
# self._last_token_count: int | None = None
self._compression_count = 0
# Remove this method entirely
# def _get_messages_fingerprint(self, messages: list[Message]) -> int:
# ...2. Simplify
|
||
| self._last_messages_fingerprint: int | None = None | ||
| self._last_token_count: int | None = None | ||
| self._compression_count = 0 | ||
|
|
||
| def _get_messages_fingerprint(self, messages: list[Message]) -> int: | ||
| """生成消息列表的指纹,用于检测消息内容是否变化。""" | ||
| if not messages: | ||
| return 0 | ||
|
|
||
| # 使用 token counter 的缓存键作为指纹 | ||
| return self.token_counter._get_cache_key(messages) | ||
|
Comment on lines
+60
to
+61
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Using a private method for the fingerprint couples ContextManager tightly to token_counter internals. Since this calls Suggested implementation: # 使用 token counter 的公开缓存键方法作为指纹
return self.token_counter.get_cache_key(messages)You will also need to:
|
||
|
|
||
| async def process( | ||
| self, messages: list[Message], trusted_token_usage: int = 0 | ||
| ) -> list[Message]: | ||
| """Process the messages. | ||
|
|
||
| Args: | ||
| messages: The original message list. | ||
| trusted_token_usage: The total token usage that LLM API returned. | ||
|
|
||
| Returns: | ||
| The processed message list. | ||
|
|
@@ -65,14 +85,34 @@ | |
|
|
||
| # 2. 基于 token 的压缩 | ||
| if self.config.max_context_tokens > 0: | ||
| total_tokens = self.token_counter.count_tokens( | ||
| result, trusted_token_usage | ||
| ) | ||
| # 优化: 使用缓存的 token 计数或计算新值 | ||
| current_fingerprint = self._get_messages_fingerprint(messages) | ||
|
|
||
| if trusted_token_usage > 0: | ||
| total_tokens = trusted_token_usage | ||
| elif ( | ||
| self._last_messages_fingerprint is not None | ||
| and self._last_messages_fingerprint == current_fingerprint | ||
| and self._last_token_count is not None | ||
| ): | ||
| # 消息内容没变化,使用缓存的 token 计数 | ||
| total_tokens = self._last_token_count | ||
| else: | ||
| # 消息内容变了,需要重新计算 | ||
| total_tokens = self.token_counter.count_tokens(result) | ||
| self._last_messages_fingerprint = current_fingerprint | ||
|
|
||
| # 更新缓存 | ||
| self._last_token_count = total_tokens | ||
|
|
||
| if self.compressor.should_compress( | ||
| result, total_tokens, self.config.max_context_tokens | ||
| ): | ||
| result = await self._run_compression(result, total_tokens) | ||
| # 压缩后更新指纹 | ||
| self._last_messages_fingerprint = self._get_messages_fingerprint( | ||
| result | ||
| ) | ||
|
|
||
| return result | ||
| except Exception as e: | ||
|
|
@@ -94,27 +134,64 @@ | |
| """ | ||
| logger.debug("Compress triggered, starting compression...") | ||
|
|
||
| self._compression_count += 1 | ||
|
|
||
| messages = await self.compressor(messages) | ||
|
|
||
| # double check | ||
| tokens_after_summary = self.token_counter.count_tokens(messages) | ||
| # 优化: 压缩后只计算一次 token | ||
| tokens_after_compression = self.token_counter.count_tokens(messages) | ||
|
|
||
| # calculate compress rate | ||
| compress_rate = (tokens_after_summary / self.config.max_context_tokens) * 100 | ||
| compress_rate = ( | ||
|
Comment on lines
143
to
+145
Check failureCode scanning / CodeQL Clear-text logging of sensitive information High
This expression logs
sensitive data (secret) Error loading related location Loading This expression logs sensitive data (secret) Error loading related location Loading |
||
| tokens_after_compression / self.config.max_context_tokens | ||
| ) * 100 | ||
| logger.info( | ||
| f"Compress completed." | ||
| f" {prev_tokens} -> {tokens_after_summary} tokens," | ||
| f"Compress #{self._compression_count} completed." | ||
| f" {prev_tokens} -> {tokens_after_compression} tokens," | ||
| f" compression rate: {compress_rate:.2f}%.", | ||
| ) | ||
|
|
||
| # last check | ||
| # 更新缓存 | ||
| self._last_token_count = tokens_after_compression | ||
| self._last_messages_fingerprint = self._get_messages_fingerprint(messages) | ||
|
|
||
| # last check - 优化: 减少不必要的递归调用 | ||
| if self.compressor.should_compress( | ||
| messages, tokens_after_summary, self.config.max_context_tokens | ||
| messages, tokens_after_compression, self.config.max_context_tokens | ||
| ): | ||
| logger.info( | ||
| "Context still exceeds max tokens after compression, applying halving truncation..." | ||
| ) | ||
| # still need compress, truncate by half | ||
| messages = self.truncator.truncate_by_halving(messages) | ||
| # 更新缓存 | ||
| self._last_token_count = self.token_counter.count_tokens(messages) | ||
| self._last_messages_fingerprint = self._get_messages_fingerprint(messages) | ||
|
|
||
| return messages | ||
|
|
||
| def get_stats(self) -> dict: | ||
| """获取上下文管理器的统计信息。 | ||
|
|
||
| Returns: | ||
| Dictionary with stats including compression count and token counter stats. | ||
| """ | ||
| stats = { | ||
| "compression_count": self._compression_count, | ||
| "last_token_count": self._last_token_count, | ||
| "last_messages_fingerprint": self._last_messages_fingerprint, | ||
| } | ||
|
|
||
| # 如果 token counter 有缓存统计,也一并返回 | ||
| if hasattr(self.token_counter, "get_cache_stats"): | ||
| stats["token_counter_cache"] = self.token_counter.get_cache_stats() | ||
|
|
||
| return stats | ||
|
|
||
| def reset_stats(self) -> None: | ||
| """重置统计信息。""" | ||
| self._compression_count = 0 | ||
| self._last_token_count = None | ||
| self._last_messages_fingerprint = None | ||
| if hasattr(self.token_counter, "clear_cache"): | ||
| self.token_counter.clear_cache() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (bug_risk): Summary cache key truncation to 50 characters can cause collisions and incorrect cache hits.
Because the key uses
content[:50]per message, different histories that share the same first 50 characters will collide and reuse the same summary, returning stale or incorrect results. Consider using a hash of the full content (e.g.hashlib.sha1(content.encode()).hexdigest()), optionally combined with message count and roles, or at least a longer prefix plus the full length to reduce collision risk.