MZWNET
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/lint.yaml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 10 additions & 0 deletions b/‎Makefile‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 14 additions & 0 deletions b/‎README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎llama_cpp/_ggml.py‎
Lines changed: 1 addition & 1 deletion b/‎llama_cpp/_ggml.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/_internals.py‎
Lines changed: 60 additions & 38 deletions b/‎llama_cpp/_internals.py‎
Lines changed: 60 additions & 38 deletions
diff --git a/‎llama_cpp/_logger.py‎
Lines changed: 4 additions & 1 deletion b/‎llama_cpp/_logger.py‎
Lines changed: 4 additions & 1 deletion
@@ -0,0 +1,29 @@
+name: Lint
+
+on:
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Ruff
+        run: python -m pip install "ruff>=0.15.7"
+
+      - name: Lint with Ruff
+        run: python -m ruff check llama_cpp tests
+
+      - name: Check formatting with Ruff
+        run: python -m ruff format --check llama_cpp tests
@@ -66,6 +66,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+.ruff_cache/
 cover/
 
 # Translations
 
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- chore(dev): Add Ruff-based formatting and a safe lint baseline, and run it in CI for pull requests and pushes to `main`
 - fix(ci): Run macOS CI on supported Apple Silicon and Intel runners by @abetlen in #2150
 - fix(ci): Use the `hf` CLI instead of the deprecated `huggingface-cli` name in GitHub Actions and docs by @abetlen in #2149
 
 
@@ -67,6 +67,14 @@ deploy.gh-docs:
 test:
 	python3 -m pytest --full-trace -v
 
+lint:
+	python3 -m ruff check llama_cpp tests
+	python3 -m ruff format --check llama_cpp tests
+
+format:
+	python3 -m ruff check --fix llama_cpp tests
+	python3 -m ruff format llama_cpp tests
+
 docker:
 	docker build -t llama-cpp-python:latest -f docker/simple/Dockerfile .
 
@@ -93,5 +101,7 @@ clean:
 	build.sdist \
 	deploy.pypi \
 	deploy.gh-docs \
+	lint \
+	format \
 	docker \
 	clean
@@ -752,6 +752,9 @@ pip install --upgrade pip
 # Install with pip
 pip install -e .
 
+# install development tooling (tests, docs, ruff)
+pip install -e '.[dev]'
+
 # if you want to use the fastapi / openapi server
 pip install -e '.[server]'
 
@@ -768,6 +771,17 @@ Now try running the tests
 pytest
 ```
 
+And check formatting / linting before opening a PR:
+
+```bash
+python -m ruff check llama_cpp tests
+python -m ruff format --check llama_cpp tests
+
+# or use the Makefile targets
+make lint
+make format
+```
+
 There's a `Makefile` available with useful targets.
 A typical workflow would look like this:
 
 
@@ -2,11 +2,11 @@
 
 This module provides a minimal interface for working with ggml tensors from llama-cpp-python
 """
+
 import os
 import pathlib
 
 import llama_cpp._ctypes_extensions as ctypes_ext
 
 libggml_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
 libggml = ctypes_ext.load_shared_library("ggml", libggml_base_path)
-
@@ -355,7 +355,9 @@ def get_embeddings_seq(self, seq_id: int):
     # Sampling functions - deprecated, use LlamaSampler instead
 
     def set_rng_seed(self, seed: int):
-        raise NotImplementedError("set_rng_seed is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "set_rng_seed is deprecated, use LlamaSampler instead"
+        )
 
     def sample_repetition_penalties(
         self,
@@ -366,30 +368,44 @@ def sample_repetition_penalties(
         penalty_freq: float,
         penalty_present: float,
     ):
-        raise NotImplementedError("sample_repetition_penalties is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_repetition_penalties is deprecated, use LlamaSampler instead"
+        )
 
     def sample_softmax(self, candidates: "_LlamaTokenDataArray"):
-        raise NotImplementedError("sample_softmax is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_softmax is deprecated, use LlamaSampler instead"
+        )
 
     def sample_top_k(self, candidates: "_LlamaTokenDataArray", k: int, min_keep: int):
-        raise NotImplementedError("sample_top_k is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_top_k is deprecated, use LlamaSampler instead"
+        )
 
     def sample_top_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int):
-        raise NotImplementedError("sample_top_p is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_top_p is deprecated, use LlamaSampler instead"
+        )
 
     def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int):
-        raise NotImplementedError("sample_min_p is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_min_p is deprecated, use LlamaSampler instead"
+        )
 
     def sample_typical(
         self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int
     ):
-        raise NotImplementedError("sample_typical is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_typical is deprecated, use LlamaSampler instead"
+        )
 
     def sample_temp(self, candidates: "_LlamaTokenDataArray", temp: float):
         raise NotImplementedError("sample_temp is deprecated, use LlamaSampler instead")
 
     def sample_grammar(self, candidates: "_LlamaTokenDataArray", grammar: LlamaGrammar):
-        raise NotImplementedError("sample_grammar is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_grammar is deprecated, use LlamaSampler instead"
+        )
 
     def sample_token_mirostat(
         self,
@@ -399,7 +415,9 @@ def sample_token_mirostat(
         m: int,
         mu: llama_cpp.CtypesPointerOrRef[ctypes.c_float],
     ) -> int:
-        raise NotImplementedError("sample_token_mirostat is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_token_mirostat is deprecated, use LlamaSampler instead"
+        )
 
     def sample_token_mirostat_v2(
         self,
@@ -408,17 +426,25 @@ def sample_token_mirostat_v2(
         eta: float,
         mu: llama_cpp.CtypesPointerOrRef[ctypes.c_float],
     ) -> int:
-        raise NotImplementedError("sample_token_mirostat_v2 is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_token_mirostat_v2 is deprecated, use LlamaSampler instead"
+        )
 
     def sample_token_greedy(self, candidates: "_LlamaTokenDataArray") -> int:
-        raise NotImplementedError("sample_token_greedy is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_token_greedy is deprecated, use LlamaSampler instead"
+        )
 
     def sample_token(self, candidates: "_LlamaTokenDataArray") -> int:
-        raise NotImplementedError("sample_token is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "sample_token is deprecated, use LlamaSampler instead"
+        )
 
     # Grammar
     def grammar_accept_token(self, grammar: LlamaGrammar, token: int):
-        raise NotImplementedError("grammar_accept_token is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "grammar_accept_token is deprecated, use LlamaSampler instead"
+        )
 
     def reset_timings(self):
         llama_cpp.llama_perf_context_reset(self.ctx)
@@ -602,16 +628,16 @@ def sample(
         logits_array: Optional[npt.NDArray[np.single]] = None,
     ):
         # This method is deprecated in favor of using LlamaSampler directly
-        raise NotImplementedError("LlamaSamplingContext.sample is deprecated, use LlamaSampler instead")
+        raise NotImplementedError(
+            "LlamaSamplingContext.sample is deprecated, use LlamaSampler instead"
+        )
 
     def accept(self, ctx_main: LlamaContext, id: int, apply_grammar: bool):
         self.prev.append(id)
 
 
 class CustomSampler:
-    def __init__(
-        self, apply_func: Callable[[llama_cpp.llama_token_data_array], None]
-    ):
+    def __init__(self, apply_func: Callable[[llama_cpp.llama_token_data_array], None]):
         self.apply_func = apply_func
 
         def apply_wrapper(
@@ -723,28 +749,28 @@ def add_grammar(self, model: LlamaModel, grammar: LlamaGrammar):
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
 
     def add_grammar_lazy_patterns(
-        self, 
-        model: LlamaModel, 
+        self,
+        model: LlamaModel,
         grammar: LlamaGrammar,
         trigger_patterns: List[str],
-        trigger_tokens: List[int]
+        trigger_tokens: List[int],
     ):
         # Convert patterns to C array
         pattern_ptrs = (ctypes.c_char_p * len(trigger_patterns))()
         for i, pattern in enumerate(trigger_patterns):
             pattern_ptrs[i] = pattern.encode("utf-8")
-        
+
         # Convert tokens to C array
         token_array = (llama_cpp.llama_token * len(trigger_tokens))(*trigger_tokens)
-        
+
         sampler = llama_cpp.llama_sampler_init_grammar_lazy_patterns(
             model.vocab,
             grammar._grammar.encode("utf-8"),
             grammar._root.encode("utf-8"),
             pattern_ptrs,
             len(trigger_patterns),
             token_array,
-            len(trigger_tokens)
+            len(trigger_tokens),
         )
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
 
@@ -771,13 +797,13 @@ def add_dry(
         dry_base: float,
         dry_allowed_length: int,
         dry_penalty_last_n: int,
-        seq_breakers: List[str]
+        seq_breakers: List[str],
     ):
         # Convert seq_breakers to C array
         breaker_ptrs = (ctypes.c_char_p * len(seq_breakers))()
         for i, breaker in enumerate(seq_breakers):
             breaker_ptrs[i] = breaker.encode("utf-8")
-        
+
         sampler = llama_cpp.llama_sampler_init_dry(
             model.vocab,
             n_ctx_train,
@@ -786,25 +812,19 @@ def add_dry(
             dry_allowed_length,
             dry_penalty_last_n,
             breaker_ptrs,
-            len(seq_breakers)
+            len(seq_breakers),
         )
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
 
-    def add_logit_bias(
-        self, 
-        n_vocab: int, 
-        logit_bias: Dict[int, float]
-    ):
+    def add_logit_bias(self, n_vocab: int, logit_bias: Dict[int, float]):
         # Convert logit_bias dict to C array
         bias_array = (llama_cpp.llama_logit_bias * len(logit_bias))()
         for i, (token, bias) in enumerate(logit_bias.items()):
             bias_array[i].token = token
             bias_array[i].bias = bias
-        
+
         sampler = llama_cpp.llama_sampler_init_logit_bias(
-            n_vocab,
-            len(logit_bias),
-            bias_array
+            n_vocab, len(logit_bias), bias_array
         )
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
 
@@ -838,15 +858,17 @@ def reset(self):
     def clone(self):
         # NOTE: Custom samplers cannot be cloned due to Python callback limitations
         if self.custom_samplers:
-            raise NotImplementedError("Cannot clone LlamaSampler that contains custom samplers")
-        
+            raise NotImplementedError(
+                "Cannot clone LlamaSampler that contains custom samplers"
+            )
+
         cloned_sampler = llama_cpp.llama_sampler_clone(self.sampler)
         # Create a new wrapper around the cloned sampler
         new_sampler = LlamaSampler.__new__(LlamaSampler)
         new_sampler.sampler = cloned_sampler
         new_sampler.custom_samplers = []
         new_sampler._exit_stack = ExitStack()
-        
+
         def free_sampler():
             if new_sampler.sampler is not None:
                 llama_cpp.llama_sampler_free(new_sampler.sampler)
 
@@ -25,6 +25,7 @@
 
 _last_log_level = GGML_LOG_LEVEL_TO_LOGGING_LEVEL[0]
 
+
 # typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
 @llama_cpp.llama_log_callback
 def llama_log_callback(
@@ -34,7 +35,9 @@ def llama_log_callback(
 ):
     # TODO: Correctly implement continue previous log
     global _last_log_level
-    log_level = GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level] if level != 5 else _last_log_level
+    log_level = (
+        GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level] if level != 5 else _last_log_level
+    )
     if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
         print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
     _last_log_level = log_level