Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tools/mtmd/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,8 @@ struct clip_model_loader {
hparams.minicpmv_query_num = 64;
} else if (hparams.minicpmv_version == 6) {
hparams.minicpmv_query_num = 64;
} else if (hparams.minicpmv_version == 100045) {
hparams.minicpmv_query_num = 64;
} else {
hparams.minicpmv_query_num = 96;
}
Expand Down Expand Up @@ -3209,6 +3211,9 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
} else if (params.minicpmv_version == 6) {
// MiniCPM-V 4.5
n_patches = 64;
} else if (params.minicpmv_version == 100045) {
// MiniCPM-o 4.5
n_patches = 64;
} else {
GGML_ABORT("Unknown minicpmv version");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def bytes_to_unicode():
default_image_std = [0.5, 0.5, 0.5]
ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None)
ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None)
ap.add_argument('--minicpmv_version', type=int, help='minicpmv_version: MiniCPM-V-2 use 1; MiniCPM-V-2.5 use 2; MiniCPM-V-2.6 use 3; MiniCPM-o-2.6 use 4; MiniCPM-V 4.0 use 5; MiniCPM-o-4.0 use 6', default=2)
ap.add_argument('--minicpmv_version', type=int, help='minicpmv_version: MiniCPM-V-2 use 1; MiniCPM-V-2.5 use 2; MiniCPM-V-2.6 use 3; MiniCPM-o-2.6 use 4; MiniCPM-V 4.0 use 5; MiniCPM-o-4.0 use 6; MiniCPM-o-4.5 use 100045', default=2)

# with proper
args = ap.parse_args()
Expand Down Expand Up @@ -610,6 +610,9 @@ def bytes_to_unicode():
elif minicpmv_version == 6:
emb_dim = 4096
block_count = 27
elif minicpmv_version == 100045:
emb_dim = 4096
block_count = 27

default_vision_config = {
"hidden_size": 1152,
Expand Down Expand Up @@ -637,6 +640,10 @@ def bytes_to_unicode():
default_vision_config["model_type"] = "siglip_vision_model"
vision_config = SiglipVisionConfig(**default_vision_config)
model = SiglipVisionTransformer(vision_config)
elif minicpmv_version == 100045:
default_vision_config["model_type"] = "siglip_vision_model"
vision_config = SiglipVisionConfig(**default_vision_config)
model = SiglipVisionTransformer(vision_config)

processor = None
# if model.attn_pool is not None:
Expand Down
2 changes: 1 addition & 1 deletion tools/mtmd/mtmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ struct mtmd_context {
tok_row_end_trail = false; // no trailing end-of-row token
ov_img_first = true;

} else if (minicpmv_version == 3 || minicpmv_version == 4 || minicpmv_version == 5 || minicpmv_version == 6) {
} else if (minicpmv_version == 3 || minicpmv_version == 4 || minicpmv_version == 5 || minicpmv_version == 6 || minicpmv_version == 100045) {
// minicpmv 2.6 format:
// <image> (overview) </image><slice> (slice) </slice><slice> (slice) </slice>\n ...
slice_tmpl = MTMD_SLICE_TMPL_MINICPMV_2_6;
Expand Down
Loading