diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 7307c85ab..29a6913c2 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04, windows-2019, macos-13] + os: [ubuntu-22.04, windows-2022, macos-14, macos-15] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml index 745b2e602..63b4c26ea 100644 --- a/.github/workflows/build-wheels-cuda.yaml +++ b/.github/workflows/build-wheels-cuda.yaml @@ -20,7 +20,7 @@ jobs: id: set-matrix run: | $matrix = @{ - 'os' = @('ubuntu-latest', 'windows-2019') + 'os' = @('ubuntu-22.04', 'windows-2022') 'pyver' = @("3.9", "3.10", "3.11", "3.12") 'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1") 'releasetag' = @("basic") diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index 9b97bf2f5..98f511e4a 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-13, macos-14, macos-15] + os: [macos-14, macos-15] steps: - uses: actions/checkout@v4 @@ -23,32 +23,21 @@ jobs: with: python-version: "3.12" cache: 'pip' - + - name: Install dependencies (Linux/MacOS) - if: runner.os != 'Windows' run: | python -m pip install --upgrade pip python -m pip install uv RUST_LOG=trace python -m uv pip install -e .[all] --verbose shell: bash - - name: Install dependencies (Windows) - if: runner.os == 'Windows' - env: - RUST_LOG: trace - run: | - python -m pip install --upgrade pip - python -m pip install uv - python -m uv pip install -e .[all] --verbose - shell: cmd - - name: Build wheels uses: pypa/cibuildwheel@v2.22.0 env: # disable repair CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_ARCHS: "arm64" - CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on" + CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on -DCMAKE_CROSSCOMPILING=ON" CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*" with: package-dir: . @@ -69,7 +58,7 @@ jobs: with: merge-multiple: true path: dist2 - + - uses: softprops/action-gh-release@v2 with: files: dist2/* diff --git a/CHANGELOG.md b/CHANGELOG.md index e08e52c10..6e336962f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.11] + +- fix: Update reference to `llama_kv_cache_clear` in Llama.embed. Closes #2037 by @abetlen in 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc + ## [0.3.10] - feat: Update llama.cpp to ggerganov/llama.cpp@8846aace4934ad29651ea61b8c7e3f6b0556e3d2 diff --git a/docker/simple/Dockerfile b/docker/simple/Dockerfile index 3594df1a5..06483d44e 100644 --- a/docker/simple/Dockerfile +++ b/docker/simple/Dockerfile @@ -9,6 +9,7 @@ ARG IMAGE # Update and upgrade the existing packages RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ + git \ python3 \ python3-pip \ ninja-build \ diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index 11a511390..e35c5014e 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.3.10" +__version__ = "0.3.11" diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index cdc05c7ad..2e93670e6 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1041,7 +1041,7 @@ def embed( data: Union[List[List[float]], List[List[List[float]]]] = [] def decode_batch(seq_sizes: List[int]): - llama_cpp.llama_kv_cache_clear(self._ctx.ctx) + llama_cpp.llama_kv_self_clear(self._ctx.ctx) self._ctx.decode(self._batch) self._batch.reset() @@ -1112,7 +1112,7 @@ def decode_batch(seq_sizes: List[int]): output = data[0] if isinstance(input, str) else data - llama_cpp.llama_kv_cache_clear(self._ctx.ctx) + llama_cpp.llama_kv_self_clear(self._ctx.ctx) self.reset() if return_count: diff --git a/tests/test_llama.py b/tests/test_llama.py index fc182ae20..0a1a9f5ad 100644 --- a/tests/test_llama.py +++ b/tests/test_llama.py @@ -216,3 +216,19 @@ def logit_processor_func(input_ids, logits): assert number_1 != number_2 assert number_1 == number_3 + + +def test_real_llama_embeddings(llama_cpp_model_path): + model = llama_cpp.Llama( + llama_cpp_model_path, + n_ctx=32, + n_batch=32, + n_ubatch=32, + n_threads=multiprocessing.cpu_count(), + n_threads_batch=multiprocessing.cpu_count(), + logits_all=False, + flash_attn=True, + embedding=True + ) + # Smoke test for now + model.embed("Hello World")