Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-and-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04, windows-2019, macos-13]
os: [ubuntu-22.04, windows-2022, macos-14, macos-15]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-latest', 'windows-2019')
'os' = @('ubuntu-22.04', 'windows-2022')
'pyver' = @("3.9", "3.10", "3.11", "3.12")
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
'releasetag' = @("basic")
Expand Down
19 changes: 4 additions & 15 deletions .github/workflows/build-wheels-metal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-13, macos-14, macos-15]
os: [macos-14, macos-15]

steps:
- uses: actions/checkout@v4
Expand All @@ -23,32 +23,21 @@ jobs:
with:
python-version: "3.12"
cache: 'pip'

- name: Install dependencies (Linux/MacOS)
if: runner.os != 'Windows'
run: |
python -m pip install --upgrade pip
python -m pip install uv
RUST_LOG=trace python -m uv pip install -e .[all] --verbose
shell: bash

- name: Install dependencies (Windows)
if: runner.os == 'Windows'
env:
RUST_LOG: trace
run: |
python -m pip install --upgrade pip
python -m pip install uv
python -m uv pip install -e .[all] --verbose
shell: cmd

- name: Build wheels
uses: pypa/cibuildwheel@v2.22.0
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ARCHS: "arm64"
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on -DCMAKE_CROSSCOMPILING=ON"
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
with:
package-dir: .
Expand All @@ -69,7 +58,7 @@ jobs:
with:
merge-multiple: true
path: dist2

- uses: softprops/action-gh-release@v2
with:
files: dist2/*
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.3.11]

- fix: Update reference to `llama_kv_cache_clear` in Llama.embed. Closes #2037 by @abetlen in 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc

## [0.3.10]

- feat: Update llama.cpp to ggerganov/llama.cpp@8846aace4934ad29651ea61b8c7e3f6b0556e3d2
Expand Down
1 change: 1 addition & 0 deletions docker/simple/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ARG IMAGE

# Update and upgrade the existing packages
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
git \
python3 \
python3-pip \
ninja-build \
Expand Down
2 changes: 1 addition & 1 deletion llama_cpp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .llama_cpp import *
from .llama import *

__version__ = "0.3.10"
__version__ = "0.3.11"
4 changes: 2 additions & 2 deletions llama_cpp/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ def embed(
data: Union[List[List[float]], List[List[List[float]]]] = []

def decode_batch(seq_sizes: List[int]):
llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
llama_cpp.llama_kv_self_clear(self._ctx.ctx)
self._ctx.decode(self._batch)
self._batch.reset()

Expand Down Expand Up @@ -1112,7 +1112,7 @@ def decode_batch(seq_sizes: List[int]):

output = data[0] if isinstance(input, str) else data

llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
llama_cpp.llama_kv_self_clear(self._ctx.ctx)
self.reset()

if return_count:
Expand Down
16 changes: 16 additions & 0 deletions tests/test_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,19 @@ def logit_processor_func(input_ids, logits):

assert number_1 != number_2
assert number_1 == number_3


def test_real_llama_embeddings(llama_cpp_model_path):
model = llama_cpp.Llama(
llama_cpp_model_path,
n_ctx=32,
n_batch=32,
n_ubatch=32,
n_threads=multiprocessing.cpu_count(),
n_threads_batch=multiprocessing.cpu_count(),
logits_all=False,
flash_attn=True,
embedding=True
)
# Smoke test for now
model.embed("Hello World")