Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/bump_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ jobs:
variable: "LLAMA_VERSION"
branch: "master"
file: "backend/cpp/llama-cpp/Makefile"
- repository: "TheTom/llama-cpp-turboquant"
variable: "TURBOQUANT_VERSION"
branch: "feature/turboquant-kv-cache"
file: "backend/cpp/llama-cpp/Makefile"
- repository: "ggml-org/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
Expand Down Expand Up @@ -63,6 +67,3 @@ jobs:
branch: "update/${{ matrix.variable }}"
body: ${{ steps.bump.outputs.message }}
signoff: true



2 changes: 2 additions & 0 deletions backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
LLAMA_VERSION?=0fcb3760b2b9a3a496ef14621a7e4dad7a8df90f
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

TURBOQUANT_VERSION?=8ad0f00e9a38df6c29fc10363341dde300f92ae4

CMAKE_ARGS?=
BUILD_TYPE?=
NATIVE?=false
Expand Down
14 changes: 14 additions & 0 deletions backend/cpp/llama-cpp/patches/sources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Patch sources for the llama-cpp backend.
# Each source declares a fork whose commits are extracted as patches
# and applied on top of upstream llama.cpp during the build.
# See scripts/patch_utils/apply_patches.sh for the generic patch engine.
#
# version_var: Makefile variable with the pinned fork commit SHA
# base_var: Makefile variable with the upstream base commit SHA
# Both are read from version_file (relative to backend dir) to compute the diff.
sources:

Check warning on line 9 in backend/cpp/llama-cpp/patches/sources.yaml

View workflow job for this annotation

GitHub Actions / Yamllint

9:1 [document-start] missing document start "---"
- name: turboquant
repo: https://github.com/TheTom/llama-cpp-turboquant.git
version_var: TURBOQUANT_VERSION
base_var: LLAMA_VERSION
version_file: Makefile
17 changes: 6 additions & 11 deletions backend/cpp/llama-cpp/prepare.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
#!/bin/bash
set -e

## Patches
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$SCRIPT_DIR/../../.."

## Apply patches from the `patches` directory
if [ -d "patches" ]; then
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
fi

set -e
## Apply patches from sources and/or local .patch files
"$REPO_ROOT/scripts/patch_utils/apply_patches.sh" "$SCRIPT_DIR" llama.cpp

## Copy server files into grpc-server build directory
for file in $(ls llama.cpp/tools/server/); do
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
done
Expand All @@ -28,4 +24,3 @@ else
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
fi
set -e

151 changes: 151 additions & 0 deletions scripts/patch_utils/apply_patches.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/bin/bash
# apply_patches.sh — Generic patch fetcher and applier for any backend.
#
# Usage: ./apply_patches.sh <source-dir> <target-dir>
#
# <source-dir> Directory containing a patches/ folder (with optional sources.yaml)
# <target-dir> The cloned upstream repo to patch (e.g., llama.cpp/)
#
# Behavior (idempotent):
# 1. If patches/sources.yaml exists and yq is available, for each source:
# - If patches/<name>/ already has .patch files: skip fetching (vendored)
# - Otherwise: clone the fork at a pinned SHA, diff against the pinned
# upstream SHA, and generate patches
# 2. Apply all patches (skips already-applied ones)
# 3. Fails fast on any patch application error
#
# sources.yaml fields:
# name — subdirectory name for this source's patches
# repo — fork git URL
# version_var — Makefile variable holding the pinned fork commit SHA
# base_var — Makefile variable holding the pinned upstream commit SHA
# version_file — Makefile path (relative to backend dir)

set -e

# Use /tmp for patch temp files to avoid macOS long-path issues
export TMPDIR="${TMPDIR_OVERRIDE:-/tmp}"

read_makefile_var() {
grep -m1 "^${1}?=" "$2" | cut -d'=' -f2
}

apply_one_patch() {
local target_dir="$1"
local patch_file="$2"
local label="$3"

if patch -d "$target_dir" -p1 --reverse --dry-run < "$patch_file" >/dev/null 2>&1; then
echo " Already applied, skipping: $label"
return 0
fi

echo " Applying: $label"
patch -d "$target_dir" -p1 --forward < "$patch_file" || { echo "FAILED: $patch_file"; exit 1; }
}

apply_patches() {
local SOURCE_DIR="$(cd "$1" && pwd)"
local TARGET_DIR="$2"
local PATCHES_DIR="$SOURCE_DIR/patches"

if [ ! -d "$PATCHES_DIR" ]; then
return 0
fi

# Phase 1: Generate missing patches from fork sources
if [ -f "$PATCHES_DIR/sources.yaml" ] && command -v yq &>/dev/null; then
local SOURCE_COUNT
SOURCE_COUNT=$(yq '.sources | length' "$PATCHES_DIR/sources.yaml")

for i in $(seq 0 $((SOURCE_COUNT - 1))); do
local NAME REPO VERSION_VAR BASE_VAR VERSION_FILE
NAME=$(yq ".sources[$i].name" "$PATCHES_DIR/sources.yaml")
REPO=$(yq ".sources[$i].repo" "$PATCHES_DIR/sources.yaml")
VERSION_VAR=$(yq ".sources[$i].version_var" "$PATCHES_DIR/sources.yaml")
BASE_VAR=$(yq ".sources[$i].base_var" "$PATCHES_DIR/sources.yaml")
VERSION_FILE=$(yq ".sources[$i].version_file" "$PATCHES_DIR/sources.yaml")

local MAKEFILE="$SOURCE_DIR/$VERSION_FILE"
local FORK_SHA BASE_SHA
FORK_SHA=$(read_makefile_var "$VERSION_VAR" "$MAKEFILE")
BASE_SHA=$(read_makefile_var "$BASE_VAR" "$MAKEFILE")

if [ -z "$FORK_SHA" ] || [ -z "$BASE_SHA" ]; then
echo "WARNING: Could not read $VERSION_VAR or $BASE_VAR from $MAKEFILE — skipping '$NAME'"
continue
fi

local SOURCE_PATCH_DIR="$PATCHES_DIR/$NAME"
local EXISTING
EXISTING=$(ls "$SOURCE_PATCH_DIR"/*.patch 2>/dev/null | wc -l)

if [ "$EXISTING" -gt 0 ]; then
echo "Patches [$NAME]: $EXISTING patches already present — skipping fetch."
else
echo "Patches [$NAME]: generating from $REPO"
echo " base (upstream): ${BASE_SHA:0:12}"
echo " head (fork): ${FORK_SHA:0:12}"

local TMPDIR_CLONE
TMPDIR_CLONE=$(mktemp -d)

if git clone "$REPO" "$TMPDIR_CLONE/fork" 2>&1; then
cd "$TMPDIR_CLONE/fork"

# Fetch the upstream base commit (may not be in the fork's history)
git fetch origin "$FORK_SHA" 2>&1 || true
git checkout "$FORK_SHA" 2>&1

# We need the base commit in the history to compute the diff.
# If the fork is a real GitHub fork, it shares history with upstream.
# Otherwise, fetch it explicitly.
if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
echo " Base commit not in fork history — fetching from upstream"
local UPSTREAM_URL
# Derive upstream URL from base_var context or use llama.cpp default
UPSTREAM_URL=$(yq ".sources[$i].upstream_repo // \"\"" "$PATCHES_DIR/sources.yaml")
if [ -n "$UPSTREAM_URL" ] && [ "$UPSTREAM_URL" != "null" ]; then
git remote add upstream "$UPSTREAM_URL" 2>/dev/null || true
git fetch upstream 2>&1
fi
fi

local PATCH_COUNT
PATCH_COUNT=$(git rev-list --count "$BASE_SHA".."$FORK_SHA" 2>/dev/null || echo "0")
echo " $PATCH_COUNT commits in diff"

if [ "$PATCH_COUNT" -gt 0 ]; then
mkdir -p "$SOURCE_PATCH_DIR"
git format-patch "$BASE_SHA".."$FORK_SHA" -o "$SOURCE_PATCH_DIR/" >/dev/null 2>&1
echo " Generated $PATCH_COUNT patches in patches/$NAME/"
fi
cd "$SOURCE_DIR"
else
echo "WARNING: Failed to clone $REPO — skipping source '$NAME'"
fi

rm -rf "$TMPDIR_CLONE"
fi
done
elif [ -f "$PATCHES_DIR/sources.yaml" ]; then
echo "WARNING: yq not found — skipping source-based patch generation."
fi

# Phase 2: Apply patches (subdirectories first, then top-level)
for source_dir in $(find "$PATCHES_DIR" -mindepth 1 -maxdepth 1 -type d | sort); do
for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do
apply_one_patch "$TARGET_DIR" "$p" "$(basename "$source_dir")/$(basename "$p")"
done
done
for p in $(ls "$PATCHES_DIR"/*.patch 2>/dev/null | sort); do
apply_one_patch "$TARGET_DIR" "$p" "$(basename "$p")"
done
}

# Run with arguments
if [ $# -lt 2 ]; then
echo "Usage: $0 <source-dir> <target-dir>"
exit 1
fi
apply_patches "$1" "$2"
Loading