|
3 | 3 | import os |
4 | 4 | import warnings |
5 | 5 | from ctypes import ( |
| 6 | + CFUNCTYPE, |
6 | 7 | c_bool, |
7 | 8 | c_char_p, |
8 | 9 | c_int, |
| 10 | + c_int64, |
9 | 11 | c_uint8, |
10 | 12 | c_uint32, |
11 | 13 | c_size_t, |
|
62 | 64 | mtmd_bitmap_p = NewType("mtmd_bitmap_p", int) |
63 | 65 | mtmd_bitmap_p_ctypes = c_void_p |
64 | 66 |
|
| 67 | +mtmd_helper_video_p = NewType("mtmd_helper_video_p", int) |
| 68 | +mtmd_helper_video_p_ctypes = c_void_p |
| 69 | + |
65 | 70 | mtmd_image_tokens_p = NewType("mtmd_image_tokens_p", int) |
66 | 71 | mtmd_image_tokens_p_ctypes = c_void_p |
67 | 72 |
|
@@ -151,6 +156,60 @@ class mtmd_caps(Structure): |
151 | 156 | ] |
152 | 157 |
|
153 | 158 |
|
| 159 | +mtmd_bitmap_lazy_callback = CFUNCTYPE( |
| 160 | + c_int, |
| 161 | + c_size_t, |
| 162 | + c_void_p, |
| 163 | + POINTER(mtmd_bitmap_p_ctypes), |
| 164 | + POINTER(c_char_p), |
| 165 | +) |
| 166 | + |
| 167 | + |
| 168 | +class mtmd_helper_bitmap_wrapper(Structure): |
| 169 | + """Bitmap wrapper returned by MTMD helper media loaders.""" |
| 170 | + |
| 171 | + if TYPE_CHECKING: |
| 172 | + bitmap: Optional[mtmd_bitmap_p] |
| 173 | + video_ctx: Optional[mtmd_helper_video_p] |
| 174 | + |
| 175 | + _fields_ = [ |
| 176 | + ("bitmap", mtmd_bitmap_p_ctypes), |
| 177 | + ("video_ctx", mtmd_helper_video_p_ctypes), |
| 178 | + ] |
| 179 | + |
| 180 | + |
| 181 | +class mtmd_helper_video_info(Structure): |
| 182 | + """Metadata for a decoded video stream.""" |
| 183 | + |
| 184 | + if TYPE_CHECKING: |
| 185 | + width: int |
| 186 | + height: int |
| 187 | + fps: float |
| 188 | + n_frames: int |
| 189 | + |
| 190 | + _fields_ = [ |
| 191 | + ("width", c_uint32), |
| 192 | + ("height", c_uint32), |
| 193 | + ("fps", c_float), |
| 194 | + ("n_frames", c_int), |
| 195 | + ] |
| 196 | + |
| 197 | + |
| 198 | +class mtmd_helper_video_init_params(Structure): |
| 199 | + """Parameters for initializing an MTMD helper video stream.""" |
| 200 | + |
| 201 | + if TYPE_CHECKING: |
| 202 | + fps_target: float |
| 203 | + ffmpeg_bin_dir: Optional[bytes] |
| 204 | + timestamp_interval_ms: int |
| 205 | + |
| 206 | + _fields_ = [ |
| 207 | + ("fps_target", c_float), |
| 208 | + ("ffmpeg_bin_dir", c_char_p), |
| 209 | + ("timestamp_interval_ms", c_int64), |
| 210 | + ] |
| 211 | + |
| 212 | + |
154 | 213 | ################################################ |
155 | 214 | # mtmd.h functions |
156 | 215 | ################################################ |
@@ -234,6 +293,13 @@ def mtmd_get_audio_sample_rate(ctx: mtmd_context_p, /) -> int: |
234 | 293 | ... |
235 | 294 |
|
236 | 295 |
|
| 296 | +# MTMD_API const char * mtmd_get_marker(const mtmd_context * ctx); |
| 297 | +@ctypes_function("mtmd_get_marker", [mtmd_context_p_ctypes], c_char_p) |
| 298 | +def mtmd_get_marker(ctx: mtmd_context_p, /) -> Optional[bytes]: |
| 299 | + """Get the current media marker string.""" |
| 300 | + ... |
| 301 | + |
| 302 | + |
237 | 303 | # Deprecated compatibility wrapper for the renamed mtmd_get_audio_sample_rate(). |
238 | 304 | def mtmd_get_audio_bitrate(ctx: mtmd_context_p, /) -> int: |
239 | 305 | warnings.warn( |
@@ -325,6 +391,26 @@ def mtmd_bitmap_set_id(bitmap: mtmd_bitmap_p, id: Optional[bytes], /): |
325 | 391 | ... |
326 | 392 |
|
327 | 393 |
|
| 394 | +# MTMD_API mtmd_bitmap * mtmd_bitmap_init_lazy(mtmd_context * ctx, |
| 395 | +# const char * id, |
| 396 | +# void * user_data, |
| 397 | +# mtmd_bitmap_lazy_callback callback); |
| 398 | +@ctypes_function( |
| 399 | + "mtmd_bitmap_init_lazy", |
| 400 | + [mtmd_context_p_ctypes, c_char_p, c_void_p, mtmd_bitmap_lazy_callback], |
| 401 | + mtmd_bitmap_p_ctypes, |
| 402 | +) |
| 403 | +def mtmd_bitmap_init_lazy( |
| 404 | + ctx: mtmd_context_p, |
| 405 | + id: Optional[bytes], |
| 406 | + user_data: c_void_p, |
| 407 | + callback: mtmd_bitmap_lazy_callback, |
| 408 | + /, |
| 409 | +) -> Optional[mtmd_bitmap_p]: |
| 410 | + """Initialize a lazy MTMD bitmap.""" |
| 411 | + ... |
| 412 | + |
| 413 | + |
328 | 414 | # MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void); |
329 | 415 | @ctypes_function("mtmd_input_chunks_init", [], mtmd_input_chunks_p_ctypes) |
330 | 416 | def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: ... |
@@ -551,32 +637,63 @@ def mtmd_test_create_input_chunks() -> Optional[mtmd_input_chunks_p]: |
551 | 637 | ################################################ |
552 | 638 |
|
553 | 639 |
|
554 | | -# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder); |
| 640 | +# MTMD_API bool mtmd_helper_support_video(mtmd_context * ctx); |
| 641 | +@ctypes_function( |
| 642 | + "mtmd_helper_support_video", |
| 643 | + [mtmd_context_p_ctypes], |
| 644 | + c_bool, |
| 645 | +) |
| 646 | +def mtmd_helper_support_video(ctx: mtmd_context_p, /) -> bool: |
| 647 | + """Check whether MTMD helper video support is available.""" |
| 648 | + ... |
| 649 | + |
| 650 | + |
| 651 | +# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder); |
555 | 652 | @ctypes_function( |
556 | 653 | "mtmd_helper_bitmap_init_from_file", |
557 | 654 | [mtmd_context_p_ctypes, c_char_p, c_bool], |
558 | | - mtmd_bitmap_p_ctypes, |
| 655 | + mtmd_helper_bitmap_wrapper, |
559 | 656 | ) |
| 657 | +def mtmd_helper_bitmap_init_from_file_wrapper( |
| 658 | + ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], / |
| 659 | +) -> mtmd_helper_bitmap_wrapper: |
| 660 | + """Initialize an MTMD bitmap wrapper from a file.""" |
| 661 | + ... |
| 662 | + |
| 663 | + |
560 | 664 | def mtmd_helper_bitmap_init_from_file( |
561 | 665 | ctx: mtmd_context_p, fname: bytes, placeholder: Union[c_bool, bool], / |
562 | 666 | ) -> Optional[mtmd_bitmap_p]: |
563 | 667 | """Initialize an MTMD bitmap from a file.""" |
564 | | - ... |
| 668 | + return mtmd_helper_bitmap_init_from_file_wrapper(ctx, fname, placeholder).bitmap |
565 | 669 |
|
566 | 670 |
|
567 | | -# MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder); |
| 671 | +# MTMD_API struct mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len, bool placeholder); |
568 | 672 | @ctypes_function( |
569 | 673 | "mtmd_helper_bitmap_init_from_buf", |
570 | 674 | [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, c_bool], |
571 | | - mtmd_bitmap_p_ctypes, |
| 675 | + mtmd_helper_bitmap_wrapper, |
572 | 676 | ) |
| 677 | +def mtmd_helper_bitmap_init_from_buf_wrapper( |
| 678 | + ctx: mtmd_context_p, |
| 679 | + buf: CtypesArray[c_uint8], |
| 680 | + length: Union[c_size_t, int], |
| 681 | + placeholder: Union[c_bool, bool], |
| 682 | + /, |
| 683 | +) -> mtmd_helper_bitmap_wrapper: ... |
| 684 | + |
| 685 | + |
573 | 686 | def mtmd_helper_bitmap_init_from_buf( |
574 | 687 | ctx: mtmd_context_p, |
575 | 688 | buf: CtypesArray[c_uint8], |
576 | 689 | length: Union[c_size_t, int], |
577 | 690 | placeholder: Union[c_bool, bool], |
578 | 691 | /, |
579 | | -) -> Optional[mtmd_bitmap_p]: ... |
| 692 | +) -> Optional[mtmd_bitmap_p]: |
| 693 | + """Initialize an MTMD bitmap from a buffer.""" |
| 694 | + return mtmd_helper_bitmap_init_from_buf_wrapper( |
| 695 | + ctx, buf, length, placeholder |
| 696 | + ).bitmap |
580 | 697 |
|
581 | 698 |
|
582 | 699 | # MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks); |
@@ -719,6 +836,94 @@ def mtmd_helper_decode_image_chunk( |
719 | 836 | ... |
720 | 837 |
|
721 | 838 |
|
| 839 | +# MTMD_API struct mtmd_helper_video_init_params mtmd_helper_video_init_params_default(void); |
| 840 | +@ctypes_function( |
| 841 | + "mtmd_helper_video_init_params_default", [], mtmd_helper_video_init_params |
| 842 | +) |
| 843 | +def mtmd_helper_video_init_params_default() -> mtmd_helper_video_init_params: |
| 844 | + """Return the default MTMD helper video initialization parameters.""" |
| 845 | + ... |
| 846 | + |
| 847 | + |
| 848 | +# MTMD_API mtmd_helper_video * mtmd_helper_video_init( |
| 849 | +# struct mtmd_context * mctx, |
| 850 | +# const char * path, |
| 851 | +# struct mtmd_helper_video_init_params params); |
| 852 | +@ctypes_function( |
| 853 | + "mtmd_helper_video_init", |
| 854 | + [mtmd_context_p_ctypes, c_char_p, mtmd_helper_video_init_params], |
| 855 | + mtmd_helper_video_p_ctypes, |
| 856 | +) |
| 857 | +def mtmd_helper_video_init( |
| 858 | + ctx: mtmd_context_p, |
| 859 | + path: bytes, |
| 860 | + params: mtmd_helper_video_init_params, |
| 861 | + /, |
| 862 | +) -> Optional[mtmd_helper_video_p]: |
| 863 | + """Initialize an MTMD helper video stream from a file path.""" |
| 864 | + ... |
| 865 | + |
| 866 | + |
| 867 | +# MTMD_API mtmd_helper_video * mtmd_helper_video_init_from_buf( |
| 868 | +# struct mtmd_context * mctx, |
| 869 | +# const unsigned char * buf, size_t len, |
| 870 | +# struct mtmd_helper_video_init_params params); |
| 871 | +@ctypes_function( |
| 872 | + "mtmd_helper_video_init_from_buf", |
| 873 | + [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t, mtmd_helper_video_init_params], |
| 874 | + mtmd_helper_video_p_ctypes, |
| 875 | +) |
| 876 | +def mtmd_helper_video_init_from_buf( |
| 877 | + ctx: mtmd_context_p, |
| 878 | + buf: CtypesArray[c_uint8], |
| 879 | + length: Union[c_size_t, int], |
| 880 | + params: mtmd_helper_video_init_params, |
| 881 | + /, |
| 882 | +) -> Optional[mtmd_helper_video_p]: |
| 883 | + """Initialize an MTMD helper video stream from a buffer.""" |
| 884 | + ... |
| 885 | + |
| 886 | + |
| 887 | +# MTMD_API void mtmd_helper_video_free(mtmd_helper_video * ctx); |
| 888 | +@ctypes_function("mtmd_helper_video_free", [mtmd_helper_video_p_ctypes], None) |
| 889 | +def mtmd_helper_video_free(ctx: mtmd_helper_video_p, /): |
| 890 | + """Free an MTMD helper video stream.""" |
| 891 | + ... |
| 892 | + |
| 893 | + |
| 894 | +# MTMD_API struct mtmd_helper_video_info mtmd_helper_video_get_info(const mtmd_helper_video * ctx); |
| 895 | +@ctypes_function( |
| 896 | + "mtmd_helper_video_get_info", |
| 897 | + [mtmd_helper_video_p_ctypes], |
| 898 | + mtmd_helper_video_info, |
| 899 | +) |
| 900 | +def mtmd_helper_video_get_info(ctx: mtmd_helper_video_p, /) -> mtmd_helper_video_info: |
| 901 | + """Get metadata for an MTMD helper video stream.""" |
| 902 | + ... |
| 903 | + |
| 904 | + |
| 905 | +# MTMD_API int32_t mtmd_helper_video_read_next(mtmd_helper_video * ctx, |
| 906 | +# mtmd_bitmap ** out_bitmap, |
| 907 | +# char ** out_text); |
| 908 | +@ctypes_function( |
| 909 | + "mtmd_helper_video_read_next", |
| 910 | + [ |
| 911 | + mtmd_helper_video_p_ctypes, |
| 912 | + POINTER(mtmd_bitmap_p_ctypes), |
| 913 | + POINTER(c_char_p), |
| 914 | + ], |
| 915 | + c_int, |
| 916 | +) |
| 917 | +def mtmd_helper_video_read_next( |
| 918 | + ctx: mtmd_helper_video_p, |
| 919 | + out_bitmap: "_Pointer[mtmd_bitmap_p_ctypes]", |
| 920 | + out_text: "_Pointer[c_char_p]", |
| 921 | + /, |
| 922 | +) -> int: |
| 923 | + """Read the next bitmap or text chunk from an MTMD helper video stream.""" |
| 924 | + ... |
| 925 | + |
| 926 | + |
722 | 927 | # MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data); |
723 | 928 | @ctypes_function( |
724 | 929 | "mtmd_log_set", |
|
0 commit comments