leejet · leejet · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/ggml b/ggml
diff --git a/src/model/diffusion/anima.hpp b/src/model/diffusion/anima.hpp
@@ -227,7 +227,6 @@ namespace Anima {
             k4 = k_norm->forward(ctx, k4);
 
             ggml_tensor* attn_out = nullptr;
-            float scale           = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f;
             if (pe_q != nullptr || pe_k != nullptr) {
                 if (pe_q == nullptr) {
                     pe_q = pe_k;
@@ -245,8 +244,7 @@ namespace Anima {
                                                      num_heads,
                                                      nullptr,
                                                      true,
-                                                     ctx->flash_attn_enabled,
-                                                     scale);
+                                                     ctx->flash_attn_enabled);
             } else {
                 auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N);
                 auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N);
@@ -258,8 +256,7 @@ namespace Anima {
                                                      num_heads,
                                                      nullptr,
                                                      false,
-                                                     ctx->flash_attn_enabled,
-                                                     scale);
+                                                     ctx->flash_attn_enabled);
             }
 
             return out_proj->forward(ctx, attn_out);

diff --git a/src/model/diffusion/ernie_image.hpp b/src/model/diffusion/ernie_image.hpp
@@ -162,8 +162,6 @@ namespace ErnieImage {
             int64_t S = x->ne[1];
             int64_t N = x->ne[2];
 
-            float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f;
-
             auto q = to_q->forward(ctx, x);
             auto k = to_k->forward(ctx, x);
             auto v = to_v->forward(ctx, x);
@@ -184,7 +182,7 @@ namespace ErnieImage {
             k = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, k, 0, 2, 1, 3));  // [N, heads, S, head_dim]
             k = ggml_reshape_3d(ctx->ggml_ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]);
 
-            x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled, scale);  // [N, S, hidden_size]
+            x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled);  // [N, S, hidden_size]
             x = to_out_0->forward(ctx, x);
             return x;
         }