From 3fd76cdb9b1c26a8fcdf6022e334a4d2462d2e15 Mon Sep 17 00:00:00 2001
From: rmatif <rmatif@proton.me>
Date: Wed, 4 Mar 2026 23:27:28 +0100
Subject: [PATCH 1/2] enhance sd.cpp GGUF loader compatibility

---
 loader.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/loader.py b/loader.py
index 7cefb11..d776dca 100644
--- a/loader.py
+++ b/loader.py
@@ -97,12 +97,23 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
         if is_text_model:
             raise ValueError(f"This gguf file is incompatible with llama.cpp!\nConsider using safetensors or a compatible gguf file\n({path})")
         compat = "sd.cpp" if arch_str is None else arch_str
-        # import here to avoid changes to convert.py breaking regular models
-        from .tools.convert import detect_arch
-        try:
-            arch_str = detect_arch(set(val[0] for val in tensors)).arch
-        except Exception as e:
-            raise ValueError(f"This model is not currently supported - ({e})")
+        tensor_keys = set(val[0] for val in tensors)
+        # stable-diffusion.cpp qwen-image tensors overlap some legacy flux/sd3 markers,
+        # so we detect qwen-image directly before generic fallback detection.
+        if {
+            "img_in.weight",
+            "proj_out.weight",
+            "time_text_embed.timestep_embedder.linear_1.weight",
+            "norm_out.linear.weight",
+        }.issubset(tensor_keys):
+            arch_str = "qwen_image"
+        else:
+            # import here to avoid changes to convert.py breaking regular models
+            from .tools.convert import detect_arch
+            try:
+                arch_str = detect_arch(tensor_keys).arch
+            except Exception as e:
+                raise ValueError(f"This model is not currently supported - ({e})")
     elif arch_str not in TXT_ARCH_LIST and is_text_model:
         if type_str not in VIS_TYPE_LIST:
             raise ValueError(f"Unexpected text model architecture type in GGUF file: {arch_str!r}")
@@ -112,6 +123,17 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
     if compat:
         logging.warning(f"Warning: This gguf model file is loaded in compatibility mode '{compat}' [arch:{arch_str}]")
 
+    wan_dim = None
+    if compat == "sd.cpp" and arch_str == "wan":
+        # Used to restore collapsed Conv3d patch embedding shape in sd.cpp exports
+        head_mod = next((t for k, t in tensors if k == "head.modulation"), None)
+        if head_mod is not None:
+            mod_shape = get_orig_shape(reader, head_mod.name)
+            if mod_shape is None:
+                mod_shape = torch.Size(tuple(int(v) for v in reversed(head_mod.shape)))
+            if len(mod_shape) >= 1:
+                wan_dim = int(mod_shape[-1])
+
     # main loading loop
     state_dict = {}
     qtype_dict = {}
@@ -132,6 +154,23 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
                 if any([tensor_name.endswith(x) for x in (".proj_in.weight", ".proj_out.weight")]):
                     while len(shape) > 2 and shape[-1] == 1:
                         shape = shape[:-1]
+            # Workaround for stable-diffusion.cpp Lumina2 pad token shape
+            if compat == "sd.cpp" and arch_str == "lumina2":
+                if len(shape) == 1 and sd_key in {"x_pad_token", "cap_pad_token"}:
+                    shape = torch.Size((1, shape[0]))
+            # Workaround for stable-diffusion.cpp Wan 2.1 shape collapse
+            if compat == "sd.cpp" and arch_str == "wan":
+                if len(shape) == 2 and sd_key.endswith(".modulation"):
+                    shape = torch.Size((1, shape[0], shape[1]))
+                if (
+                    len(shape) == 4
+                    and sd_key.endswith("patch_embedding.weight")
+                    and shape[1] == 1
+                    and wan_dim is not None
+                    and shape[0] % wan_dim == 0
+                ):
+                    in_dim = shape[0] // wan_dim
+                    shape = torch.Size((wan_dim, in_dim, 1, shape[2], shape[3]))
 
         # add to state dict
         if tensor.tensor_type in {gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16}:
@@ -142,6 +181,12 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
         if len(shape) <= 1 and tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
             state_dict[sd_key] = dequantize_tensor(state_dict[sd_key], dtype=torch.float32)
 
+        if compat == "sd.cpp" and len(shape) <= 1 and is_quantized(state_dict[sd_key]):
+            state_dict[sd_key] = dequantize_tensor(state_dict[sd_key], dtype=torch.float32)
+        if compat == "sd.cpp" and arch_str == "wan":
+            if sd_key.endswith(".modulation") and is_quantized(state_dict[sd_key]):
+                state_dict[sd_key] = dequantize_tensor(state_dict[sd_key], dtype=torch.float32)
+
         # keep track of loaded tensor types
         tensor_type_str = getattr(tensor.tensor_type, "name", repr(tensor.tensor_type))
         qtype_dict[tensor_type_str] = qtype_dict.get(tensor_type_str, 0) + 1

From d8bda72df97562eecf884783e7f3a9ace83231e2 Mon Sep 17 00:00:00 2001
From: rmatif <rmatif@proton.me>
Date: Thu, 5 Mar 2026 01:43:04 +0100
Subject: [PATCH 2/2] add anima

---
 loader.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/loader.py b/loader.py
index d776dca..87ed8f2 100644
--- a/loader.py
+++ b/loader.py
@@ -76,9 +76,14 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
     # filter and strip prefix
     has_prefix = False
     if handle_prefix is not None:
-        prefix_len = len(handle_prefix)
         tensor_names = set(tensor.name for tensor in reader.tensors)
+        prefix_len = len(handle_prefix)
         has_prefix = any(s.startswith(handle_prefix) for s in tensor_names)
+        # Some stable-diffusion.cpp exports (anima) use a "net." prefix
+        if (not has_prefix) and (not is_text_model) and tensor_names and all(s.startswith("net.") for s in tensor_names):
+            handle_prefix = "net."
+            prefix_len = len(handle_prefix)
+            has_prefix = True
 
     tensors = []
     for tensor in reader.tensors:
@@ -98,8 +103,7 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", is_text_model=F
             raise ValueError(f"This gguf file is incompatible with llama.cpp!\nConsider using safetensors or a compatible gguf file\n({path})")
         compat = "sd.cpp" if arch_str is None else arch_str
         tensor_keys = set(val[0] for val in tensors)
-        # stable-diffusion.cpp qwen-image tensors overlap some legacy flux/sd3 markers,
-        # so we detect qwen-image directly before generic fallback detection.
+        # stable-diffusion.cpp qwen-image tensors overlap some legacy flux/sd3 markers
         if {
             "img_in.weight",
             "proj_out.weight",