Remove redundant seq_id and add resource cleanup

JamePeng · JamePeng · commit 5776f1592de5 · 2026-02-27T00:53:25.000+08:00
- Removed `seq_id` from `HybridCheckpointCache` initialization to make it a stateless, global multi-sequence manager.
- Added `close()` and `__del__()` methods to safely release C++ context references and prevent memory leaks.
diff --git a/llama_cpp/llama_cache.py b/llama_cpp/llama_cache.py
@@ -333,11 +333,10 @@ class HybridCheckpointCache(BaseLlamaCache):
     Manager for RNN state snapshots (Checkpoints) tailored for Hybrid/Recurrent models.
     Provides rollback capabilities for models that cannot physically truncate KV cache.
     """
-    def __init__(self, ctx: llama_cpp.llama_context_p, seq_id: int = 0, max_checkpoints: int = 16, verbose: bool = False):
+    def __init__(self, ctx: llama_cpp.llama_context_p, max_checkpoints: int = 16, verbose: bool = False):
         if ctx is None:
             raise ValueError("HybridCheckpointCache: Failed to create HybridCheckpointCache with model context")
         self._ctx = ctx
-        self.seq_id = seq_id
         self.max_checkpoints = max_checkpoints
         self.checkpoints: list[HybridCheckpoint] = []
         self._current_size = 0
@@ -362,6 +361,16 @@ def clear(self):
         if self.verbose:
             print("HybridCheckpointCache: cleared")
 
+    def close(self):
+        self.checkpoints = None
+        self._ctx = None
+        self._get_size_ext = None
+        self._get_data_ext = None
+        self._set_data_ext = None
+
+    def __del__(self) -> None:
+        self.close()
+
     # Helper tools
 
     def _hash_prefix(self, tokens: List[int], length: int) -> str: