feat: update llama.cpp to ggml-org/llama.cpp@f05cf4676

abetlen · abetlen · commit 7d78e7575947 · 2026-06-13T09:59:48.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 - feat(example): support server video inputs and Gemma text tool calls by @abetlen in #2291
-- feat: update llama.cpp to ggml-org/llama.cpp@3e7bd4f39
+- feat: update llama.cpp to ggml-org/llama.cpp@f05cf4676
 - fix(example): support multi-step Responses tool streaming by @abetlen in #2288
 - fix(ci): Repair Linux accelerator wheels for manylinux publishing
 
diff --git a/llama_cpp/mtmd_cpp.py b/llama_cpp/mtmd_cpp.py
@@ -76,6 +76,9 @@
 mtmd_input_chunks_p = NewType("mtmd_input_chunks_p", int)
 mtmd_input_chunks_p_ctypes = c_void_p
 
+mtmd_batch_p = NewType("mtmd_batch_p", int)
+mtmd_batch_p_ctypes = c_void_p
+
 # Enums
 MTMD_INPUT_CHUNK_TYPE_TEXT = 0
 MTMD_INPUT_CHUNK_TYPE_IMAGE = 1
@@ -102,6 +105,7 @@ class mtmd_context_params(Structure):
         image_max_tokens: int
         cb_eval: llama_cpp.ggml_backend_sched_eval_callback
         cb_eval_user_data: c_void_p
+        batch_max_tokens: int
 
     _fields_ = [
         ("use_gpu", c_bool),
@@ -115,6 +119,7 @@ class mtmd_context_params(Structure):
         ("image_max_tokens", c_int),
         ("cb_eval", llama_cpp.ggml_backend_sched_eval_callback),
         ("cb_eval_user_data", c_void_p),
+        ("batch_max_tokens", c_int),
     ]
 
 
@@ -596,7 +601,7 @@ def mtmd_image_tokens_get_decoder_pos(
     c_int,
 )
 def mtmd_encode(ctx: mtmd_context_p, image_tokens: mtmd_image_tokens_p, /) -> int:
-    """Run an MTMD encode pass for image tokens."""
+    """Run a deprecated MTMD encode pass for image tokens."""
     ...
 
 
@@ -618,6 +623,55 @@ def mtmd_get_output_embd(ctx: mtmd_context_p, /) -> Optional[CtypesArray[c_float
     ...
 
 
+# MTMD_API mtmd_batch * mtmd_batch_init(mtmd_context * ctx);
+@ctypes_function("mtmd_batch_init", [mtmd_context_p_ctypes], mtmd_batch_p_ctypes)
+def mtmd_batch_init(ctx: mtmd_context_p, /) -> Optional[mtmd_batch_p]:
+    """Initialize an MTMD media chunk batch for a context."""
+    ...
+
+
+# MTMD_API void mtmd_batch_free(mtmd_batch * batch);
+@ctypes_function("mtmd_batch_free", [mtmd_batch_p_ctypes], None)
+def mtmd_batch_free(batch: mtmd_batch_p, /): ...
+
+
+# MTMD_API int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk);
+@ctypes_function(
+    "mtmd_batch_add_chunk",
+    [mtmd_batch_p_ctypes, mtmd_input_chunk_p_ctypes],
+    c_int,
+)
+def mtmd_batch_add_chunk(
+    batch: mtmd_batch_p,
+    chunk: mtmd_input_chunk_p,
+    /,
+) -> int:
+    """Add a media chunk to an MTMD batch."""
+    ...
+
+
+# MTMD_API int32_t mtmd_batch_encode(mtmd_batch * batch);
+@ctypes_function("mtmd_batch_encode", [mtmd_batch_p_ctypes], c_int)
+def mtmd_batch_encode(batch: mtmd_batch_p, /) -> int:
+    """Run an MTMD encode pass for all chunks in a batch."""
+    ...
+
+
+# MTMD_API float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk);
+@ctypes_function(
+    "mtmd_batch_get_output_embd",
+    [mtmd_batch_p_ctypes, mtmd_input_chunk_p_ctypes],
+    POINTER(c_float),
+)
+def mtmd_batch_get_output_embd(
+    batch: mtmd_batch_p,
+    chunk: mtmd_input_chunk_p,
+    /,
+) -> Optional[CtypesArray[c_float]]:
+    """Get output embeddings for a chunk from the last batch encode pass."""
+    ...
+
+
 # MTMD_API struct mtmd_caps mtmd_get_cap_from_file(const char * mmproj_fname);
 @ctypes_function("mtmd_get_cap_from_file", [c_char_p], mtmd_caps)
 def mtmd_get_cap_from_file(mmproj_fname: bytes, /) -> mtmd_caps:
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 3e7bd4f39ac59167f82103e1fc22dc4585c489d3
+Subproject commit f05cf4676af46c2f017c0e6ba25b6e20204f700e