From bbdc8518720322ee4e805b9e375fd65a5948c9c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 01:40:49 -0700 Subject: [PATCH 1/9] chore(deps): bump pypa/cibuildwheel from 2.22.0 to 3.4.1 (#2249) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.22.0 to 3.4.1. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.22...v3.4.1) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-version: 3.4.1 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build-and-release.yaml | 6 +++--- .github/workflows/build-wheels-metal.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 5cfb6469de..6318f73a1e 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -42,7 +42,7 @@ jobs: shell: cmd - name: Build wheels - uses: pypa/cibuildwheel@v2.22.0 + uses: pypa/cibuildwheel@v3.4.1 env: # Keep repair disabled by default for non-Linux platforms in this job. CIBW_REPAIR_WHEEL_COMMAND: "" @@ -80,7 +80,7 @@ jobs: submodules: "recursive" - name: Build wheels - uses: pypa/cibuildwheel@v2.22.0 + uses: pypa/cibuildwheel@v3.4.1 env: CIBW_SKIP: "pp*" CIBW_REPAIR_WHEEL_COMMAND: "LD_LIBRARY_PATH=$PWD/llama_cpp/lib auditwheel repair -w {dest_dir} {wheel}" @@ -133,7 +133,7 @@ jobs: platforms: linux/riscv64 - name: Build wheels - uses: pypa/cibuildwheel@v3.1.2 + uses: pypa/cibuildwheel@v3.4.1 env: CIBW_SKIP: "*musllinux* pp*" CIBW_REPAIR_WHEEL_COMMAND: "" diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index 98f511e4a6..5fd56a00d3 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -32,7 +32,7 @@ jobs: shell: bash - name: Build wheels - uses: pypa/cibuildwheel@v2.22.0 + uses: pypa/cibuildwheel@v3.4.1 env: # disable repair CIBW_REPAIR_WHEEL_COMMAND: "" From dad5d0aacb4bfb0080da3188050d4fabd97d8e50 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 02:03:43 -0700 Subject: [PATCH 2/9] chore(deps): bump actions/cache from 4 to 5 (#2248) Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/test.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b8f5566bb0..83a91a8edd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -32,7 +32,7 @@ jobs: hf download ${{ env.RECURRENT_REPO_ID }} ${{ env.RECURRENT_MODEL_FILE }} hf download ${{ env.HYBRID_REPO_ID }} ${{ env.HYBRID_MODEL_FILE }} - name: Cache model - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} @@ -54,7 +54,7 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Restore model cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} @@ -86,7 +86,7 @@ jobs: cache: 'pip' - name: Restore model cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} @@ -126,7 +126,7 @@ jobs: python3 -c "import platform; print(platform.machine(), platform.architecture())" - name: Restore model cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} @@ -162,7 +162,7 @@ jobs: python3 -c "import platform; print(platform.machine(), platform.architecture())" - name: Restore model cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} From aa944e4cb09337f381281e5880e257f57fd7abc5 Mon Sep 17 00:00:00 2001 From: Andrei Date: Wed, 3 Jun 2026 02:26:38 -0700 Subject: [PATCH 3/9] ci: cache embedding test model (#2250) * ci: cache embedding test model * ci: restore previous model cache --- .github/workflows/test.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 83a91a8edd..d798967675 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -14,7 +14,9 @@ env: RECURRENT_MODEL_FILE: mamba-130m-hf.Q2_K.gguf HYBRID_REPO_ID: tiiuae/Falcon-H1-Tiny-90M-Instruct-GGUF HYBRID_MODEL_FILE: Falcon-H1-Tiny-90M-Instruct-Q2_K.gguf - MODEL_CACHE_KEY: qwen35-q8-mamba130m-q2-falconh1tiny-q2 + EMBEDDING_REPO_ID: CompendiumLabs/bge-small-en-v1.5-gguf + EMBEDDING_MODEL_FILE: bge-small-en-v1.5-q4_k_m.gguf + MODEL_CACHE_KEY: qwen35-q8-mamba130m-q2-falconh1tiny-q2-bge-small-q4 jobs: download-model: @@ -26,16 +28,19 @@ jobs: python-version: "3.9" - name: Install huggingface-hub run: pip install huggingface-hub + - name: Restore model cache + uses: actions/cache@v5 + with: + path: ~/.cache/huggingface/hub + key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} + restore-keys: | + ${{ runner.os }}-model-qwen35-q8-mamba130m-q2-falconh1tiny-q2 - name: Download model run: | hf download ${{ env.REPO_ID }} ${{ env.MODEL_FILE }} hf download ${{ env.RECURRENT_REPO_ID }} ${{ env.RECURRENT_MODEL_FILE }} hf download ${{ env.HYBRID_REPO_ID }} ${{ env.HYBRID_MODEL_FILE }} - - name: Cache model - uses: actions/cache@v5 - with: - path: ~/.cache/huggingface/hub - key: ${{ runner.os }}-model-${{ env.MODEL_CACHE_KEY }} + hf download ${{ env.EMBEDDING_REPO_ID }} ${{ env.EMBEDDING_MODEL_FILE }} build-linux: needs: download-model From b439a84a61ea257538d0660afaf512425297cba3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 02:56:25 -0700 Subject: [PATCH 4/9] chore(deps): bump actions/upload-artifact from 4 to 7 (#2245) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4 to 7. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v4...v7) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build-and-release.yaml | 8 ++++---- .github/workflows/build-wheels-metal.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 6318f73a1e..9978cb66b2 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -66,7 +66,7 @@ jobs: package-dir: . output-dir: wheelhouse - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: wheels-${{ matrix.os }} path: ./wheelhouse/*.whl @@ -95,7 +95,7 @@ jobs: output-dir: wheelhouse - name: Upload wheels as artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: wheels_arm64 path: ./wheelhouse/*.whl @@ -148,7 +148,7 @@ jobs: output-dir: wheelhouse - name: Upload wheels as artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: ${{ matrix.shard.artifact }} path: ./wheelhouse/*.whl @@ -190,7 +190,7 @@ jobs: run: | python -m build --sdist - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: sdist path: ./dist/*.tar.gz diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index 5fd56a00d3..5f5eba6854 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -43,7 +43,7 @@ jobs: package-dir: . output-dir: wheelhouse2 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: wheels-mac_${{ matrix.os }} path: ./wheelhouse2/*.whl From f8bd67df8a9844d6400383a8941ef129401586a6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 02:57:00 -0700 Subject: [PATCH 5/9] chore(deps): bump docker/setup-buildx-action from 3 to 4 (#2246) Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3 to 4. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](https://github.com/docker/setup-buildx-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrei --- .github/workflows/build-docker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index c656958479..3dfef68008 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -29,7 +29,7 @@ jobs: uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v3 From 6e6c4e6db734c2b28e9ca11ade5527f2ba4f0c1e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 03:05:10 -0700 Subject: [PATCH 6/9] chore(deps): bump actions/setup-python from 5 to 6 (#2247) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrei --- .github/workflows/build-and-release.yaml | 4 ++-- .github/workflows/build-wheels-cuda.yaml | 2 +- .github/workflows/build-wheels-metal.yaml | 2 +- .github/workflows/lint.yaml | 2 +- .github/workflows/publish-to-test.yaml | 2 +- .github/workflows/publish.yaml | 2 +- .github/workflows/test-pypi.yaml | 6 +++--- .github/workflows/test.yaml | 10 +++++----- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 9978cb66b2..81c9a961b4 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -19,7 +19,7 @@ jobs: submodules: "recursive" # Used to host cibuildwheel - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: "3.9" @@ -162,7 +162,7 @@ jobs: with: submodules: "recursive" - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: "3.9" diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml index f1b2b8b6b5..3e67d14cba 100644 --- a/.github/workflows/build-wheels-cuda.yaml +++ b/.github/workflows/build-wheels-cuda.yaml @@ -67,7 +67,7 @@ jobs: with: submodules: "recursive" - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: ${{ matrix.pyver }} cache: 'pip' diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index 5f5eba6854..bf2c8bc124 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -19,7 +19,7 @@ jobs: submodules: "recursive" # Used to host cibuildwheel - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: "3.12" cache: 'pip' diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 8b3e6322dc..3c6f5ff45e 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.12" diff --git a/.github/workflows/publish-to-test.yaml b/.github/workflows/publish-to-test.yaml index de3ae42aae..54572bdad9 100644 --- a/.github/workflows/publish-to-test.yaml +++ b/.github/workflows/publish-to-test.yaml @@ -21,7 +21,7 @@ jobs: submodules: "recursive" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.11" cache: 'pip' diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index bb76f53941..3c2ea56d68 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -15,7 +15,7 @@ jobs: submodules: "recursive" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.9" diff --git a/.github/workflows/test-pypi.yaml b/.github/workflows/test-pypi.yaml index 335033bba6..416cd0cddf 100644 --- a/.github/workflows/test-pypi.yaml +++ b/.github/workflows/test-pypi.yaml @@ -12,7 +12,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -48,7 +48,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -84,7 +84,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d798967675..ecba1b40b0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.9" - name: Install huggingface-hub @@ -54,7 +54,7 @@ jobs: submodules: "recursive" - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -85,7 +85,7 @@ jobs: submodules: "recursive" - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -119,7 +119,7 @@ jobs: submodules: "recursive" - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -156,7 +156,7 @@ jobs: submodules: "recursive" - name: Set up Python 3.9 - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.9" From ab7a9b0c864cc82bdcfb7cae911c0bfc606883ba Mon Sep 17 00:00:00 2001 From: Andrei Date: Wed, 3 Jun 2026 04:35:33 -0700 Subject: [PATCH 7/9] feat(ci): add Vulkan wheel builds (#2251) * feat(ci): add Vulkan wheel builds * docs: update changelog for Vulkan wheel builds * docs: add Vulkan wheel install instructions --- .github/workflows/build-wheels-vulkan.yaml | 123 ++++++++++++++++++ .../generate-index-from-release.yaml | 3 +- CHANGELOG.md | 1 + README.md | 9 ++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build-wheels-vulkan.yaml diff --git a/.github/workflows/build-wheels-vulkan.yaml b/.github/workflows/build-wheels-vulkan.yaml new file mode 100644 index 0000000000..760205c839 --- /dev/null +++ b/.github/workflows/build-wheels-vulkan.yaml @@ -0,0 +1,123 @@ +name: Build Wheels (Vulkan) + +on: workflow_dispatch + +permissions: + contents: write + +env: + VULKAN_SDK_VERSION: "1.4.341.0" + VULKAN_SDK_LINUX_SHA256: "ed66477d587a5587dc3601b1c2cdcc1fab5529c505f53a00171876cecd9b4fbe" + +jobs: + build_wheels: + name: Build Vulkan wheel on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + pyver: "3.9" + artifact: wheels-vulkan-ubuntu-22.04 + - os: windows-2022 + pyver: "3.9" + artifact: wheels-vulkan-windows-2022 + + steps: + - name: Set up MSVC + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.pyver }} + cache: "pip" + + - name: Install Vulkan SDK + if: runner.os == 'Linux' + run: | + curl -fL \ + "https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz" \ + -o vulkan-sdk.tar.xz + echo "${VULKAN_SDK_LINUX_SHA256} vulkan-sdk.tar.xz" | sha256sum -c - + mkdir -p "$RUNNER_TEMP/vulkan-sdk" + tar -xf vulkan-sdk.tar.xz -C "$RUNNER_TEMP/vulkan-sdk" + source "$RUNNER_TEMP/vulkan-sdk/${VULKAN_SDK_VERSION}/setup-env.sh" + { + echo "VULKAN_SDK=$VULKAN_SDK" + echo "LD_LIBRARY_PATH=$VULKAN_SDK/lib:${LD_LIBRARY_PATH:-}" + } >> "$GITHUB_ENV" + echo "$VULKAN_SDK/bin" >> "$GITHUB_PATH" + "$VULKAN_SDK/bin/glslc" --version + + - name: Install Vulkan SDK + if: runner.os == 'Windows' + shell: pwsh + run: | + choco install vulkan-sdk --version="$env:VULKAN_SDK_VERSION" --no-progress -y + $vulkanSdk = Join-Path 'C:\VulkanSDK' $env:VULKAN_SDK_VERSION + if (-not (Test-Path $vulkanSdk)) { + throw "Failed to find Vulkan SDK at $vulkanSdk" + } + "VULKAN_SDK=$vulkanSdk" >> $env:GITHUB_ENV + "$vulkanSdk\Bin" >> $env:GITHUB_PATH + & "$vulkanSdk\Bin\glslc.exe" --version + + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build wheel + + - name: Install Windows build dependencies + if: runner.os == 'Windows' + run: python -m pip install ninja + + - name: Build Vulkan wheel + if: runner.os == 'Linux' + run: | + export CMAKE_ARGS="-DGGML_NATIVE=off -DGGML_METAL=OFF -DGGML_VULKAN=on" + python -m build --wheel + mkdir -p wheelhouse + cp dist/*.whl wheelhouse/ + + - name: Build Vulkan wheel + if: runner.os == 'Windows' + shell: pwsh + run: | + $env:CMAKE_GENERATOR = 'Ninja' + $env:CMAKE_ARGS = '-DGGML_NATIVE=off -DGGML_VULKAN=on' + python -m build --wheel + New-Item -ItemType Directory -Force wheelhouse | Out-Null + Copy-Item dist/*.whl wheelhouse/ + + - uses: actions/upload-artifact@v7 + with: + name: ${{ matrix.artifact }} + path: ./wheelhouse/*.whl + + release: + name: Release + needs: [build_wheels] + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: dist + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + # Set release name to -vulkan. + tag_name: ${{ github.ref_name }}-vulkan + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generate-index-from-release.yaml b/.github/workflows/generate-index-from-release.yaml index 255ee67d6f..7fc19e697f 100644 --- a/.github/workflows/generate-index-from-release.yaml +++ b/.github/workflows/generate-index-from-release.yaml @@ -3,7 +3,7 @@ name: Wheels Index on: # Trigger on new release workflow_run: - workflows: ["Release", "Build Wheels (CUDA)", "Build Wheels (Metal)"] + workflows: ["Release", "Build Wheels (CUDA)", "Build Wheels (Metal)", "Build Wheels (Vulkan)"] types: - completed @@ -46,6 +46,7 @@ jobs: ./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' # ./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' # ./scripts/releases-to-pep-503.sh index/whl/cu126 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' + ./scripts/releases-to-pep-503.sh index/whl/vulkan '^[v]?[0-9]+\.[0-9]+\.[0-9]+-vulkan$' ./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$' - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6677fc8fe0..1e2a180fd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat(ci): add Vulkan wheel builds by @abetlen in #2251 - fix: handle additional `from_pretrained` files in subfolders by @TNing in #2085 ## [0.3.25] diff --git a/README.md b/README.md index 7db3e27448..57ede08768 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,15 @@ To install with Vulkan support, set the `GGML_VULKAN=on` environment variable be CMAKE_ARGS="-DGGML_VULKAN=on" pip install llama-cpp-python ``` +**Pre-built Wheel (New)** + +It is also possible to install a pre-built wheel with Vulkan support for Linux or Windows: + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/vulkan +``` +
From 3754c04a168ce35a3e5bbe8f600014fc70970415 Mon Sep 17 00:00:00 2001 From: Andrei Date: Wed, 3 Jun 2026 04:37:24 -0700 Subject: [PATCH 8/9] feat(ci): add ROCm wheel builds (#2252) * feat(ci): add ROCm wheel builds * docs: update changelog for ROCm wheel builds * docs: add ROCm wheel install instructions --- .github/workflows/build-wheels-rocm.yaml | 237 ++++++++++++++++++ .../generate-index-from-release.yaml | 4 +- CHANGELOG.md | 1 + README.md | 16 ++ 4 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build-wheels-rocm.yaml diff --git a/.github/workflows/build-wheels-rocm.yaml b/.github/workflows/build-wheels-rocm.yaml new file mode 100644 index 0000000000..0971953886 --- /dev/null +++ b/.github/workflows/build-wheels-rocm.yaml @@ -0,0 +1,237 @@ +name: Build Wheels (ROCm) + +on: workflow_dispatch + +permissions: + contents: write + +jobs: + build_wheels: + name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ROCm ${{ matrix.rocm }} + runs-on: ${{ matrix.os }} + container: + image: rocm/dev-ubuntu-22.04:${{ matrix.rocm }}-complete + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + pyver: "3.9" + rocm: "7.2.4" + amdgpu_targets: gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201 + + steps: + - name: Install system dependencies + run: | + apt-get update + apt-get install -y --no-install-recommends git cmake lsb-release ninja-build + + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.pyver }} + cache: "pip" + + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build wheel + + - name: Build ROCm wheel + run: | + export ROCM_PATH="${ROCM_PATH:-/opt/rocm}" + export HIP_PATH="${HIP_PATH:-$ROCM_PATH}" + export PATH="$ROCM_PATH/bin:$ROCM_PATH/llvm/bin:$PATH" + export LD_LIBRARY_PATH="$ROCM_PATH/lib:$ROCM_PATH/lib64:${LD_LIBRARY_PATH:-}" + export CC="$ROCM_PATH/llvm/bin/clang" + export CXX="$ROCM_PATH/llvm/bin/clang++" + export HIPCXX="$ROCM_PATH/llvm/bin/clang" + export CMAKE_GENERATOR=Ninja + + hipconfig --version + hipcc --version + + rocm_tag="$(hipconfig --version | sed -E 's/^([0-9]+)\.([0-9]+).*/\1\2/')" + echo "ROCM_VERSION=$rocm_tag" >> "$GITHUB_ENV" + + amdgpu_targets="${{ matrix.amdgpu_targets }}" + export CMAKE_ARGS="-DGGML_HIP=on -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off -DAMDGPU_TARGETS=$amdgpu_targets -DCMAKE_HIP_ARCHITECTURES=$amdgpu_targets" + python -m build --wheel + mkdir -p wheelhouse + cp dist/*.whl wheelhouse/ + + - uses: actions/upload-artifact@v7 + with: + name: wheels-rocm${{ env.ROCM_VERSION }}-${{ matrix.os }} + path: ./wheelhouse/*.whl + + build_wheels_windows_hip: + name: Build Wheel windows-2022 ${{ matrix.pyver }} HIP ${{ matrix.name }} + runs-on: windows-2022 + env: + HIPSDK_INSTALLER_VERSION: "26.Q1" + strategy: + fail-fast: false + matrix: + include: + - name: radeon + pyver: "3.9" + amdgpu_targets: gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032 + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.pyver }} + cache: "pip" + + - name: Set up MSVC + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build wheel + + - name: Grab rocWMMA package + run: | + curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb" + 7z x rocwmma.deb + 7z x data.tar + + - name: Cache ROCm installation + id: cache-rocm + uses: actions/cache@v5 + with: + path: C:\Program Files\AMD\ROCm + key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} + + - name: Install ROCm + if: steps.cache-rocm.outputs.cache-hit != 'true' + run: | + $ErrorActionPreference = "Stop" + Invoke-WebRequest ` + -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-Win11-For-HIP.exe" ` + -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru + $completed = $proc.WaitForExit(1800000) + if (-not $completed) { + $proc.Kill() + throw "ROCm installation timed out after 30 minutes" + } + if ($proc.ExitCode -ne 0) { + throw "ROCm installation failed with exit code $($proc.ExitCode)" + } + + - name: Verify ROCm + run: | + $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 + if (-not $clangPath) { + throw "ROCm installation not found" + } + & $clangPath.FullName --version + + - name: Build HIP wheel + run: | + $ErrorActionPreference = "Stop" + $hipPath = Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path + $rocwmmaInclude = (Join-Path $PWD 'opt\rocm-7.2.1\include').Replace('\', '/') + $amdgpuTargets = "${{ matrix.amdgpu_targets }}" + + $env:HIP_PATH = $hipPath + $env:ROCM_PATH = $hipPath + $env:CMAKE_PREFIX_PATH = $hipPath + $env:HIP_PLATFORM = 'amd' + $env:PATH = "$hipPath\bin;$env:PATH" + $env:CC = "$hipPath\bin\clang.exe" + $env:CXX = "$hipPath\bin\clang++.exe" + $env:HIPCXX = "$hipPath\bin\clang.exe" + $env:CMAKE_GENERATOR = 'Unix Makefiles' + $env:CXXFLAGS = "-I$rocwmmaInclude -Wno-ignored-attributes -Wno-nested-anon-types" + $env:CMAKE_ARGS = "-DGGML_HIP=ON -DGGML_HIP_ROCWMMA_FATTN=ON -DGGML_NATIVE=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGPU_TARGETS=$amdgpuTargets" + + python -m build --wheel + + - name: Bundle ROCm runtime DLLs + run: | + $ErrorActionPreference = "Stop" + $hipPath = Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path + $wheel = Get-ChildItem dist\*.whl | Select-Object -First 1 + python -m wheel unpack $wheel.FullName -d wheel-unpacked + $wheelRoot = Get-ChildItem wheel-unpacked -Directory | Select-Object -First 1 + $libDir = Join-Path $wheelRoot.FullName 'llama_cpp\lib' + New-Item -ItemType Directory -Force $libDir | Out-Null + + $dllPatterns = @( + 'amdhip64.dll', + 'hiprtc*.dll', + 'libhipblas.dll', + 'libhipblaslt.dll', + 'rocblas.dll' + ) + foreach ($pattern in $dllPatterns) { + Copy-Item (Join-Path $hipPath "bin\$pattern") $libDir -ErrorAction SilentlyContinue + } + + New-Item -ItemType Directory -Force (Join-Path $libDir 'rocblas\library') | Out-Null + New-Item -ItemType Directory -Force (Join-Path $libDir 'hipblaslt\library') | Out-Null + Copy-Item "$hipPath\bin\rocblas\library\*" (Join-Path $libDir 'rocblas\library') -Recurse -Force + Copy-Item "$hipPath\bin\hipblaslt\library\*" (Join-Path $libDir 'hipblaslt\library') -Recurse -Force + + Remove-Item dist\*.whl + python -m wheel pack $wheelRoot.FullName -d dist + New-Item -ItemType Directory -Force wheelhouse | Out-Null + Copy-Item dist/*.whl wheelhouse/ + + - uses: actions/upload-artifact@v7 + with: + name: wheels-hip-${{ matrix.name }}-windows-2022 + path: ./wheelhouse/*.whl + + release_rocm: + name: Release ROCm + needs: [build_wheels] + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: dist + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + # Set release name to -rocm. + tag_name: ${{ github.ref_name }}-rocm72 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + release_hip: + name: Release HIP + needs: [build_wheels_windows_hip] + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: dist + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + # Set release name to -hip-radeon. + tag_name: ${{ github.ref_name }}-hip-radeon + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generate-index-from-release.yaml b/.github/workflows/generate-index-from-release.yaml index 7fc19e697f..c93e0be351 100644 --- a/.github/workflows/generate-index-from-release.yaml +++ b/.github/workflows/generate-index-from-release.yaml @@ -3,7 +3,7 @@ name: Wheels Index on: # Trigger on new release workflow_run: - workflows: ["Release", "Build Wheels (CUDA)", "Build Wheels (Metal)", "Build Wheels (Vulkan)"] + workflows: ["Release", "Build Wheels (CUDA)", "Build Wheels (Metal)", "Build Wheels (Vulkan)", "Build Wheels (ROCm)"] types: - completed @@ -46,6 +46,8 @@ jobs: ./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' # ./scripts/releases-to-pep-503.sh index/whl/cu125 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' # ./scripts/releases-to-pep-503.sh index/whl/cu126 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' + ./scripts/releases-to-pep-503.sh index/whl/rocm72 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-rocm72$' + ./scripts/releases-to-pep-503.sh index/whl/hip-radeon '^[v]?[0-9]+\.[0-9]+\.[0-9]+-hip-radeon$' ./scripts/releases-to-pep-503.sh index/whl/vulkan '^[v]?[0-9]+\.[0-9]+\.[0-9]+-vulkan$' ./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$' - name: Upload artifact diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e2a180fd9..f46b697c66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat(ci): add ROCm wheel builds by @abetlen in #2252 - feat(ci): add Vulkan wheel builds by @abetlen in #2251 - fix: handle additional `from_pretrained` files in subfolders by @TNing in #2085 diff --git a/README.md b/README.md index 57ede08768..dd0024676a 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,22 @@ To install with HIP / ROCm support for AMD cards, set the `GGML_HIP=on` environm CMAKE_ARGS="-DGGML_HIP=on" pip install llama-cpp-python ``` +**Pre-built Wheel (New)** + +It is also possible to install a pre-built wheel with ROCm support for Linux: + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/rocm72 +``` + +Or a pre-built wheel with HIP Radeon support for Windows: + +```powershell +pip install llama-cpp-python ` + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/hip-radeon +``` +
From ddaac1033590941c25ce367d3af0768c4f5d5a0b Mon Sep 17 00:00:00 2001 From: Andrei Date: Wed, 3 Jun 2026 04:50:45 -0700 Subject: [PATCH 9/9] feat: update llama.cpp (#2253) * feat: update llama.cpp * docs: update changelog for llama.cpp update --- CHANGELOG.md | 1 + vendor/llama.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f46b697c66..462f0d0c18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: update llama.cpp to ggml-org/llama.cpp@3571fa543 by @abetlen in #2253 - feat(ci): add ROCm wheel builds by @abetlen in #2252 - feat(ci): add Vulkan wheel builds by @abetlen in #2251 - fix: handle additional `from_pretrained` files in subfolders by @TNing in #2085 diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 210a6570ce..3571fa5435 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 210a6570ceda20c5d6439172c09ada08c3754cc9 +Subproject commit 3571fa5435ac9ff243662b1caabc407e8d433c9d