Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 0 additions & 97 deletions .devops/cuda-new.Dockerfile

This file was deleted.

5 changes: 3 additions & 2 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
rocmPackages,
vulkan-headers,
vulkan-loader,
curl,
openssl,
shaderc,
useBlas ?
builtins.all (x: !x) [
Expand Down Expand Up @@ -160,7 +160,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
++ optionals useMpi [ mpi ]
++ optionals useRocm rocmBuildInputs
++ optionals useBlas [ blas ]
++ optionals useVulkan vulkanBuildInputs;
++ optionals useVulkan vulkanBuildInputs
++ [ openssl ];

cmakeFlags =
[
Expand Down
21 changes: 21 additions & 0 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,27 @@ jobs:
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-win-intel-vulkan:
runs-on: [self-hosted, Windows, X64, Intel]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

- name: Test
id: ggml-ci
shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}"
env:
MSYSTEM: UCRT64
CHERE_INVOKING: 1
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
run: |
vulkaninfo --summary
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
# a valid python environment for testing
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

ggml-ci-intel-openvino-gpu-low-perf:
runs-on: [self-hosted, Linux, Intel, OpenVINO]

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ jobs:
cmake -B build -S . \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGPU_TARGETS="gfx1030" \
-DGGML_HIP=ON
cmake --build build --config Release -j $(nproc)
Expand Down Expand Up @@ -990,6 +991,7 @@ jobs:
-DROCM_DIR="${env:HIP_PATH}" `
-DGGML_HIP=ON `
-DGGML_HIP_ROCWMMA_FATTN=ON `
-DGPU_TARGETS="gfx1100" `
-DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ jobs:
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/hip-quality-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
run: |
cmake -B build -S . \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DGPU_TARGETS=gfx908 \
-DGPU_TARGETS=gfx942 \
-DGGML_HIP=ON \
-DGGML_HIP_EXPORT_METRICS=Off \
-DCMAKE_HIP_FLAGS="-Werror -Wno-tautological-compare" \
Expand Down
37 changes: 35 additions & 2 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
fi

# Build shared libs on Windows
# to reduce binary size and avoid errors in library loading unit tests
if uname -s | grep -qi nt; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DBUILD_SHARED_LIBS=ON"
fi
fi

if [ ! -z ${GG_BUILD_WEBGPU} ]; then
Expand Down Expand Up @@ -221,7 +226,7 @@ function gg_run_ctest_debug {

set -e

# Check cmake and ctest are installed
# Check required binaries are installed
gg_check_build_requirements

(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
Expand Down Expand Up @@ -252,7 +257,7 @@ function gg_run_ctest_release {

set -e

# Check cmake and ctest are installed
# Check required binaries are installed
gg_check_build_requirements

(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
Expand Down Expand Up @@ -627,10 +632,38 @@ function gg_sum_rerank_tiny {
}

function gg_check_build_requirements {
if ! command -v git &> /dev/null; then
gg_printf 'git not found, please install'
fi

if ! command -v git-lfs &> /dev/null; then
gg_printf 'git-lfs not found, please install'
fi

if ! command -v wget &> /dev/null; then
gg_printf 'wget not found, please install'
fi

if ! command -v python3 &> /dev/null; then
gg_printf 'python3 not found, please install'
fi

if ! command -v pip3 &> /dev/null; then
gg_printf 'pip3 not found, please install'
fi

if ! python3 -m ensurepip --help &> /dev/null; then
gg_printf 'ensurepip not found, please install python3-venv package'
fi

if ! command -v cmake &> /dev/null; then
gg_printf 'cmake not found, please install'
fi

if ! command -v ccache &> /dev/null; then
gg_printf 'ccache not found, please consider installing for faster builds'
fi

if ! command -v ctest &> /dev/null; then
gg_printf 'ctest not found, please install'
fi
Expand Down
8 changes: 8 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.kv_unified = value;
}
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
add_opt(common_arg(
{"--clear-idle"},
{"--no-clear-idle"},
"save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)",
[](common_params & params, bool value) {
params.clear_idle = value;
}
).set_env("LLAMA_ARG_CLEAR_IDLE").set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"--context-shift"},
{"--no-context-shift"},
Expand Down
Loading
Loading