Skip to content

Commit 04b779b

Browse files
committed
feat(llama.cpp): add turboquant support
This PR adds patchset from the great work of @TheTom in https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline that updates the patches against upstream automatically. It also creates necessary scaffolding for doing this with other patches sources. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 6c635e8 commit 04b779b

3 files changed

Lines changed: 70 additions & 12 deletions

File tree

.github/workflows/bump_deps.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,3 @@ jobs:
6363
branch: "update/${{ matrix.variable }}"
6464
body: ${{ steps.bump.outputs.message }}
6565
signoff: true
66-
67-
68-
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Patch sources for the llama-cpp backend.
2+
# Each source declares a fork whose commits are extracted as patches
3+
# and applied on top of upstream llama.cpp during the build.
4+
# See scripts/patch_utils/upgrade_patches.sh for the rebase/regenerate tool.
5+
sources:
6+
- name: turboquant
7+
repo: https://github.com/TheTom/llama-cpp-turboquant.git
8+
branch: feature/turboquant-kv-cache
9+
fork_base: ded446b34c0cd803a0122446b848619adbb458cf
10+
upstream_repo: https://github.com/ggml-org/llama.cpp.git
11+
version_file: Makefile
12+
version_var: LLAMA_VERSION

backend/cpp/llama-cpp/prepare.sh

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,66 @@
11
#!/bin/bash
2+
set -e
23

3-
## Patches
4+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
45

5-
## Apply patches from the `patches` directory
6-
if [ -d "patches" ]; then
7-
for patch in $(ls patches); do
8-
echo "Applying patch $patch"
9-
patch -d llama.cpp/ -p1 < patches/$patch
10-
done
6+
## Generate patches from fork sources if not already present.
7+
## If patches/<name>/ already contains .patch files (vendored), skip fetching.
8+
## Otherwise, clone the fork and generate them on the fly.
9+
if [ -f "patches/sources.yaml" ] && command -v yq &>/dev/null; then
10+
SOURCE_COUNT=$(yq '.sources | length' patches/sources.yaml)
11+
12+
for i in $(seq 0 $((SOURCE_COUNT - 1))); do
13+
NAME=$(yq ".sources[$i].name" patches/sources.yaml)
14+
REPO=$(yq ".sources[$i].repo" patches/sources.yaml)
15+
BRANCH=$(yq ".sources[$i].branch" patches/sources.yaml)
16+
FORK_BASE=$(yq ".sources[$i].fork_base" patches/sources.yaml)
17+
18+
SOURCE_DIR="patches/$NAME"
19+
EXISTING=$(ls "$SOURCE_DIR"/*.patch 2>/dev/null | wc -l)
20+
21+
if [ "$EXISTING" -gt 0 ]; then
22+
echo "Source '$NAME': $EXISTING patches already present — skipping fetch."
23+
else
24+
echo "Source '$NAME': no patches found — fetching from $REPO ($BRANCH)"
25+
26+
TMPDIR=$(mktemp -d)
27+
28+
if git clone --single-branch -b "$BRANCH" --depth=500 "$REPO" "$TMPDIR/fork" 2>&1; then
29+
cd "$TMPDIR/fork"
30+
PATCH_COUNT=$(git rev-list --count "$FORK_BASE"..HEAD 2>/dev/null || echo "0")
31+
echo " $PATCH_COUNT commits to extract"
32+
33+
if [ "$PATCH_COUNT" -gt 0 ]; then
34+
mkdir -p "$SCRIPT_DIR/$SOURCE_DIR"
35+
git format-patch "$FORK_BASE"..HEAD -o "$SCRIPT_DIR/$SOURCE_DIR/" >/dev/null 2>&1
36+
echo " Generated $PATCH_COUNT patches in $SOURCE_DIR/"
37+
fi
38+
cd "$SCRIPT_DIR"
39+
else
40+
echo "WARNING: Failed to clone $REPO — skipping source '$NAME'"
41+
fi
42+
43+
rm -rf "$TMPDIR"
44+
fi
45+
done
46+
elif [ -f "patches/sources.yaml" ]; then
47+
echo "WARNING: yq not found — skipping source-based patch generation. Install yq to enable."
1148
fi
1249

13-
set -e
50+
## Apply patches from source subdirectories (alphabetical order)
51+
if [ -d "patches" ]; then
52+
for source_dir in $(find patches -mindepth 1 -maxdepth 1 -type d | sort); do
53+
for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do
54+
echo "Applying: $p"
55+
patch -d llama.cpp/ -p1 < "$p" || { echo "FAILED: $p"; exit 1; }
56+
done
57+
done
58+
# Apply any top-level .patch files (manual patches)
59+
for p in $(ls patches/*.patch 2>/dev/null | sort); do
60+
echo "Applying: $p"
61+
patch -d llama.cpp/ -p1 < "$p" || { echo "FAILED: $p"; exit 1; }
62+
done
63+
fi
1464

1565
for file in $(ls llama.cpp/tools/server/); do
1666
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
@@ -28,4 +78,3 @@ else
2878
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
2979
fi
3080
set -e
31-

0 commit comments

Comments
 (0)