Skip to content

Commit 5776f15

Browse files
committed
Remove redundant seq_id and add resource cleanup
- Removed `seq_id` from `HybridCheckpointCache` initialization to make it a stateless, global multi-sequence manager. - Added `close()` and `__del__()` methods to safely release C++ context references and prevent memory leaks.
1 parent f6dda38 commit 5776f15

1 file changed

Lines changed: 11 additions & 2 deletions

File tree

llama_cpp/llama_cache.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,11 +333,10 @@ class HybridCheckpointCache(BaseLlamaCache):
333333
Manager for RNN state snapshots (Checkpoints) tailored for Hybrid/Recurrent models.
334334
Provides rollback capabilities for models that cannot physically truncate KV cache.
335335
"""
336-
def __init__(self, ctx: llama_cpp.llama_context_p, seq_id: int = 0, max_checkpoints: int = 16, verbose: bool = False):
336+
def __init__(self, ctx: llama_cpp.llama_context_p, max_checkpoints: int = 16, verbose: bool = False):
337337
if ctx is None:
338338
raise ValueError("HybridCheckpointCache: Failed to create HybridCheckpointCache with model context")
339339
self._ctx = ctx
340-
self.seq_id = seq_id
341340
self.max_checkpoints = max_checkpoints
342341
self.checkpoints: list[HybridCheckpoint] = []
343342
self._current_size = 0
@@ -362,6 +361,16 @@ def clear(self):
362361
if self.verbose:
363362
print("HybridCheckpointCache: cleared")
364363

364+
def close(self):
365+
self.checkpoints = None
366+
self._ctx = None
367+
self._get_size_ext = None
368+
self._get_data_ext = None
369+
self._set_data_ext = None
370+
371+
def __del__(self) -> None:
372+
self.close()
373+
365374
# Helper tools
366375

367376
def _hash_prefix(self, tokens: List[int], length: int) -> str:

0 commit comments

Comments
 (0)