Skip to content

Commit 4b66c45

Browse files
authored
feat: update llama.cpp to 210a6570c (abetlen#2242)
1 parent a9b480f commit 4b66c45

3 files changed

Lines changed: 6 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- feat: Update llama.cpp to ggml-org/llama.cpp@210a6570c by @abetlen in #2242
1011
- feat: add Gemma 4 multimodal chat support by @abetlen in #2241
1112
- feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239
1213
- feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238

llama_cpp/llama_cpp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ class llama_sampler_seq_config(ctypes.Structure):
903903
# uint32_t n_ubatch; // physical maximum batch size
904904
# uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
905905
# uint32_t n_rs_seq; // number of recurrent-state snapshots per seq for rollback (0 = no rollback) [EXPERIMENTAL]
906+
# uint32_t n_outputs_max; // max outputs in a ubatch (0 = n_batch)
906907
# int32_t n_threads; // number of threads to use for generation
907908
# int32_t n_threads_batch; // number of threads to use for batch processing
908909

@@ -958,6 +959,7 @@ class llama_context_params(ctypes.Structure):
958959
n_ubatch (int): physical maximum batch size
959960
n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models)
960961
n_rs_seq (int): number of recurrent-state snapshots per sequence for rollback
962+
n_outputs_max (int): max outputs in a ubatch, 0 = n_batch
961963
n_threads (int): number of threads to use for generation
962964
n_threads_batch (int): number of threads to use for batch processing
963965
ctx_type (int): context type, from `enum llama_context_type`
@@ -995,6 +997,7 @@ class llama_context_params(ctypes.Structure):
995997
n_ubatch: int
996998
n_seq_max: int
997999
n_rs_seq: int
1000+
n_outputs_max: int
9981001
n_threads: int
9991002
n_threads_batch: int
10001003
ctx_type: int
@@ -1031,6 +1034,7 @@ class llama_context_params(ctypes.Structure):
10311034
("n_ubatch", ctypes.c_uint32),
10321035
("n_seq_max", ctypes.c_uint32),
10331036
("n_rs_seq", ctypes.c_uint32),
1037+
("n_outputs_max", ctypes.c_uint32),
10341038
("n_threads", ctypes.c_int32),
10351039
("n_threads_batch", ctypes.c_int32),
10361040
("ctx_type", ctypes.c_int),

vendor/llama.cpp

Submodule llama.cpp updated 63 files

0 commit comments

Comments
 (0)