File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1552,6 +1552,14 @@ struct clip_model_loader {
15521552 model.projection = get_tensor (TN_MM_PROJECTOR);
15531553 } break ;
15541554 case PROJECTOR_TYPE_LFM2:
1555+ {
1556+ model.mm_input_norm_w = get_tensor (TN_MM_INP_NORM, false );
1557+ model.mm_input_norm_b = get_tensor (TN_MM_INP_NORM_B, false );
1558+ model.mm_1_w = get_tensor (string_format (TN_LLAVA_PROJ, 1 , " weight" ));
1559+ model.mm_1_b = get_tensor (string_format (TN_LLAVA_PROJ, 1 , " bias" ));
1560+ model.mm_2_w = get_tensor (string_format (TN_LLAVA_PROJ, 2 , " weight" ));
1561+ model.mm_2_b = get_tensor (string_format (TN_LLAVA_PROJ, 2 , " bias" ));
1562+ } break ;
15551563 case PROJECTOR_TYPE_KIMIVL:
15561564 {
15571565 model.mm_input_norm_w = get_tensor (TN_MM_INP_NORM);
Original file line number Diff line number Diff line change @@ -50,10 +50,15 @@ ggml_cgraph * clip_graph_siglip::build() {
5050 const int scale_factor = model.hparams .n_merge ;
5151 cur = build_patch_merge_permute (cur, scale_factor);
5252
53- // projection
54- cur = ggml_norm (ctx0, cur, 1e-5 ); // default nn.LayerNorm
55- cur = ggml_mul (ctx0, cur, model.mm_input_norm_w );
56- cur = ggml_add (ctx0, cur, model.mm_input_norm_b );
53+ // projection, in LFM2-VL input norm is optional
54+ if (model.mm_input_norm_w ) {
55+ cur = ggml_norm (ctx0, cur, 1e-5 ); // default nn.LayerNorm
56+ cur = ggml_mul (ctx0, cur, model.mm_input_norm_w );
57+ }
58+
59+ if (model.mm_input_norm_b ) {
60+ cur = ggml_add (ctx0, cur, model.mm_input_norm_b );
61+ }
5762
5863 cur = build_ffn (cur,
5964 model.mm_1_w , model.mm_1_b ,
You can’t perform that action at this time.
0 commit comments