From d4d1522b20809a350ffb094db20f40f17d3ab80f Mon Sep 17 00:00:00 2001
From: Molly Sophia <mollysophia379@gmail.com>
Date: Tue, 22 Jul 2025 23:01:29 +0800
Subject: [PATCH 1/3] llama : add model type detection for rwkv7 7B&14B
 (#14816)

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
---
 src/llama-model.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 2d90ec1ac68..35e718aa989 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1544,7 +1544,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_TOKEN_SHIFT_COUNT,                      hparams.token_shift_count, false);
 
                 switch (hparams.n_layer) {
-                    case 12: type = LLM_TYPE_190M; break;
+                    case 12:
+                        switch (hparams.n_embd) {
+                            case 768: type = LLM_TYPE_190M; break;
+                            default: type = LLM_TYPE_UNKNOWN;
+                        } break;
                     case 24:
                         switch (hparams.n_embd) {
                             case 1024: type = LLM_TYPE_450M; break;
@@ -1557,7 +1561,17 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                             case 3584: type = LLM_TYPE_7B; break;
                             default: type = LLM_TYPE_UNKNOWN;
                         } break;
-                    case 32: type = LLM_TYPE_2_9B; break; // RWKV-7-World
+                    case 32:
+                        switch (hparams.n_embd) {
+                            case 2560: type = LLM_TYPE_2_9B; break;
+                            case 4096: type = LLM_TYPE_7B; break;
+                            default: type = LLM_TYPE_UNKNOWN;
+                        } break;
+                    case 61:
+                        switch (hparams.n_embd) {
+                            case 4096: type = LLM_TYPE_14B; break;
+                            default: type = LLM_TYPE_UNKNOWN;
+                        } break;
                     default: type = LLM_TYPE_UNKNOWN;
                 }
             } break;

From 84712b60439453fb393c2ca753cee682a4ad41f5 Mon Sep 17 00:00:00 2001
From: Jeff Bolz <jbolz@nvidia.com>
Date: Tue, 22 Jul 2025 10:35:21 -0500
Subject: [PATCH 2/3] vulkan: fix rms_norm_mul to handle broadcasting dim0
 (#14817)

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp              |  2 +-
 ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index c3f1369b663..1a7a381ce59 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -10248,7 +10248,7 @@ static bool ggml_vk_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, st
         }
         // if rms_norm is the B operand, then we don't handle broadcast
         if (rms_norm == mul->src[1] &&
-            mul->src[0]->ne[1] != rms_norm->ne[1]) {
+            !ggml_are_same_shape(mul->src[0], rms_norm)) {
             return false;
         }
         // rms_norm shader assumes contiguous rows
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp
index 6428ca7ba33..bdd7db2d698 100644
--- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp
@@ -50,8 +50,14 @@ void main() {
     const FLOAT_TYPE scale = inversesqrt(mean + FLOAT_TYPE(p.param1));
 
     if (do_multiply) {
-        [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
-            data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + col]));
+        if (ncols > p.ne10) {
+            [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
+                data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + fastmod(col, p.ne10)]));
+            }
+        } else {
+            [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
+                data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + col]));
+            }
         }
     } else {
         [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {

From acd6cb1c41676f6bbb25c2a76fa5abeb1719301e Mon Sep 17 00:00:00 2001
From: Csaba Kecskemeti <csaba.kecskemeti@gmail.com>
Date: Tue, 22 Jul 2025 09:29:43 -0700
Subject: [PATCH 3/3] ggml : model card yaml tab->2xspace (#14819)

---
 gguf-py/gguf/metadata.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py
index e807f434689..67efedbdbc5 100644
--- a/gguf-py/gguf/metadata.py
+++ b/gguf-py/gguf/metadata.py
@@ -144,6 +144,10 @@ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
         # Quick hack to fix the Norway problem
         # https://hitchdev.com/strictyaml/why/implicit-typing-removed/
         yaml_content = yaml_content.replace("- no\n", "- \"no\"\n")
+        # yaml should use 2 spaces insted of tab
+        # this issue has came up with the Qwen/Qwen3-235B-A22B-Instruct-2507 model card
+        #    (I've also sent a pr tp fix the modelcard too)
+        yaml_content = yaml_content.replace("\t", "  ")
 
         if yaml_content:
             data = yaml.safe_load(yaml_content)