From e1079999f8a339d39c60dee4d751a3c311b2d72d Mon Sep 17 00:00:00 2001 From: Andrei Date: Mon, 8 Jun 2026 08:14:59 -0700 Subject: [PATCH] feat: update llama.cpp to 8f83d6c27 (#2290) --- CHANGELOG.md | 1 + llama_cpp/mtmd_cpp.py | 217 ++++++++++++++++++++++++++++++++++++++++-- vendor/llama.cpp | 2 +- 3 files changed, 213 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac792bc2a..10209aacf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: update llama.cpp to ggml-org/llama.cpp@8f83d6c27 - fix(example): support multi-step Responses tool streaming by @abetlen in #2288 - fix(ci): Repair Linux accelerator wheels for manylinux publishing diff --git a/llama_cpp/mtmd_cpp.py b/llama_cpp/mtmd_cpp.py index abe4c7a67..919cefb35 100644 --- a/llama_cpp/mtmd_cpp.py +++ b/llama_cpp/mtmd_cpp.py @@ -3,9 +3,11 @@ import os import warnings from ctypes import ( + CFUNCTYPE, c_bool, c_char_p, c_int, + c_int64, c_uint8, c_uint32, c_size_t, @@ -62,6 +64,9 @@ mtmd_bitmap_p = NewType("mtmd_bitmap_p", int) mtmd_bitmap_p_ctypes = c_void_p +mtmd_helper_video_p = NewType("mtmd_helper_video_p", int) +mtmd_helper_video_p_ctypes = c_void_p + mtmd_image_tokens_p = NewType("mtmd_image_tokens_p", int) mtmd_image_tokens_p_ctypes = c_void_p @@ -151,6 +156,60 @@ class mtmd_caps(Structure): ] +mtmd_bitmap_lazy_callback = CFUNCTYPE( + c_int, + c_size_t, + c_void_p, + POINTER(mtmd_bitmap_p_ctypes), + POINTER(c_char_p), +) + + +class mtmd_helper_bitmap_wrapper(Structure): + """Bitmap wrapper returned by MTMD helper media loaders.""" + + if TYPE_CHECKING: + bitmap: Optional[mtmd_bitmap_p] + video_ctx: Optional[mtmd_helper_video_p] + + _fields_ = [ + ("bitmap", mtmd_bitmap_p_ctypes), + ("video_ctx", mtmd_helper_video_p_ctypes), + ] + + +class mtmd_helper_video_info(Structure): + """Metadata for a decoded video stream.""" + + if TYPE_CHECKING: + width: int + height: int + fps: float + n_frames: int + + _fields_ = [ + ("width", c_uint32), + ("height", c_uint32), + ("fps", c_float), + ("n_frames", c_int), + ] + + +class mtmd_helper_video_init_params(Structure): + """Parameters for initializing an MTMD helper video stream.""" + + if TYPE_CHECKING: + fps_target: float + ffmpeg_bin_dir: Optional[bytes] + timestamp_interval_ms: int + + _fields_ = [ + ("fps_target", c_float), + ("ffmpeg_bin_dir", c_char_p), + ("timestamp_interval_ms", c_int64), + ] + + ################################################ # mtmd.h functions ################################################ @@ -234,6 +293,13 @@ def mtmd_get_audio_sample_rate(ctx: mtmd_context_p, /) -> int: ... +# MTMD_API const char * mtmd_get_marker(const mtmd_context * ctx); +@ctypes_function("mtmd_get_marker", [mtmd_context_p_ctypes], c_char_p) +def mtmd_get_marker(ctx: mtmd_context_p, /) -> Optional[bytes]: + """Get the current media marker string.""" + ... + + # Deprecated compatibility wrapper for the renamed mtmd_get_audio_sample_rate(). def mtmd_get_audio_bitrate(ctx: mtmd_context_p, /) -> int: warnings.warn( @@ -325,6 +391,26 @@ def mtmd_bitmap_set_id(bitmap: mtmd_bitmap_p, id: Optional[bytes], /): ... +# MTMD_API mtmd_bitmap * mtmd_bitmap_init_lazy(mtmd_context * ctx, +# const char * id, +# void * user_data, +# mtmd_bitmap_lazy_callback callback); +@ctypes_function( + "mtmd_bitmap_init_lazy", + [mtmd_context_p_ctypes, c_char_p, c_void_p, mtmd_bitmap_lazy_callback], + mtmd_bitmap_p_ctypes, +) +def mtmd_bitmap_init_lazy( + ctx: mtmd_context_p, + id: Optional[bytes], + user_data: c_void_p, + callback: mtmd_bitmap_lazy_callback, + /, +) -> Optional[mtmd_bitmap_p]: + """Initialize a lazy MTMD bitmap.""" + ... + + # MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void); @ctypes_function("mtmd_input_chunks_init", [], mtmd_input_chunks_p_ctypes) def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: ... @@ -551,32 +637,63 @@ def mtmd_test_create_input_chunks() -> Optional[mtmd_input_chunks_p]: ################################################ -# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder); +# MTMD_API bool mtmd_helper_support_video(mtmd_context * ctx); +@ctypes_function( + "mtmd_helper_support_video", + [mtmd_context_p_ctypes], + c_bool, +) +def mtmd_helper_support_video(ctx: mtmd_context_p, /) -> bool: + """Check whether MTMD helper video support is available.""" + ... + + +# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder); @ctypes_function( "mtmd_helper_bitmap_init_from_file", [mtmd_context_p_ctypes, c_char_p, c_bool], - mtmd_bitmap_p_ctypes, + mtmd_helper_bitmap_wrapper, ) +def mtmd_helper_bitmap_init_from_file_wrapper( + ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], / +) -> mtmd_helper_bitmap_wrapper: + """Initialize an MTMD bitmap wrapper from a file.""" + ... + + def mtmd_helper_bitmap_init_from_file( ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], / ) -> Optional[mtmd_bitmap_p]: """Initialize an MTMD bitmap from a file.""" - ... + return mtmd_helper_bitmap_init_from_file_wrapper(ctx, fname, placeholder).bitmap -# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder); +# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder); @ctypes_function( "mtmd_helper_bitmap_init_from_buf", [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, c_bool], - mtmd_bitmap_p_ctypes, + mtmd_helper_bitmap_wrapper, ) +def mtmd_helper_bitmap_init_from_buf_wrapper( + ctx: mtmd_context_p, + buf: CtypesArray[c_uint8], + length: Union[c_size_t, int], + placeholder: Union[c_bool, bool], + /, +) -> mtmd_helper_bitmap_wrapper: ... + + def mtmd_helper_bitmap_init_from_buf( ctx: mtmd_context_p, buf: CtypesArray[c_uint8], length: Union[c_size_t, int], placeholder: Union[c_bool, bool], /, -) -> Optional[mtmd_bitmap_p]: ... +) -> Optional[mtmd_bitmap_p]: + """Initialize an MTMD bitmap from a buffer.""" + return mtmd_helper_bitmap_init_from_buf_wrapper( + ctx, buf, length, placeholder + ).bitmap # MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks); @@ -719,6 +836,94 @@ def mtmd_helper_decode_image_chunk( ... +# MTMD_API struct mtmd_helper_video_init_params mtmd_helper_video_init_params_default(void); +@ctypes_function( + "mtmd_helper_video_init_params_default", [], mtmd_helper_video_init_params +) +def mtmd_helper_video_init_params_default() -> mtmd_helper_video_init_params: + """Return the default MTMD helper video initialization parameters.""" + ... + + +# MTMD_API mtmd_helper_video * mtmd_helper_video_init( +# struct mtmd_context * mctx, +# const char * path, +# struct mtmd_helper_video_init_params params); +@ctypes_function( + "mtmd_helper_video_init", + [mtmd_context_p_ctypes, c_char_p, mtmd_helper_video_init_params], + mtmd_helper_video_p_ctypes, +) +def mtmd_helper_video_init( + ctx: mtmd_context_p, + path: bytes, + params: mtmd_helper_video_init_params, + /, +) -> Optional[mtmd_helper_video_p]: + """Initialize an MTMD helper video stream from a file path.""" + ... + + +# MTMD_API mtmd_helper_video * mtmd_helper_video_init_from_buf( +# struct mtmd_context * mctx, +# const unsigned char * buf, size_t len, +# struct mtmd_helper_video_init_params params); +@ctypes_function( + "mtmd_helper_video_init_from_buf", + [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, mtmd_helper_video_init_params], + mtmd_helper_video_p_ctypes, +) +def mtmd_helper_video_init_from_buf( + ctx: mtmd_context_p, + buf: CtypesArray[c_uint8], + length: Union[c_size_t, int], + params: mtmd_helper_video_init_params, + /, +) -> Optional[mtmd_helper_video_p]: + """Initialize an MTMD helper video stream from a buffer.""" + ... + + +# MTMD_API void mtmd_helper_video_free(mtmd_helper_video * ctx); +@ctypes_function("mtmd_helper_video_free", [mtmd_helper_video_p_ctypes], None) +def mtmd_helper_video_free(ctx: mtmd_helper_video_p, /): + """Free an MTMD helper video stream.""" + ... + + +# MTMD_API struct mtmd_helper_video_info mtmd_helper_video_get_info(const mtmd_helper_video * ctx); +@ctypes_function( + "mtmd_helper_video_get_info", + [mtmd_helper_video_p_ctypes], + mtmd_helper_video_info, +) +def mtmd_helper_video_get_info(ctx: mtmd_helper_video_p, /) -> mtmd_helper_video_info: + """Get metadata for an MTMD helper video stream.""" + ... + + +# MTMD_API int32_t mtmd_helper_video_read_next(mtmd_helper_video * ctx, +# mtmd_bitmap ** out_bitmap, +# char ** out_text); +@ctypes_function( + "mtmd_helper_video_read_next", + [ + mtmd_helper_video_p_ctypes, + POINTER(mtmd_bitmap_p_ctypes), + POINTER(c_char_p), + ], + c_int, +) +def mtmd_helper_video_read_next( + ctx: mtmd_helper_video_p, + out_bitmap: "_Pointer[mtmd_bitmap_p_ctypes]", + out_text: "_Pointer[c_char_p]", + /, +) -> int: + """Read the next bitmap or text chunk from an MTMD helper video stream.""" + ... + + # MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data); @ctypes_function( "mtmd_log_set", diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 9e3b928fd..8f83d6c27 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 9e3b928fd8c9d14dbf15a8768b9fdd7e5c721d66 +Subproject commit 8f83d6c271d194bde2d410145a0ce73bc42e85cd