From e7bb79e7fc6f4f4055c722f5317b04fb7179274b Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Fri, 26 Jun 2026 16:59:19 +0200 Subject: [PATCH 1/3] sync: update ggml --- ggml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml b/ggml index 3af5f5760..eced84c86 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 3af5f5760e19a96427f5f7a93b79cbdf3d4b265b +Subproject commit eced84c86f8b012c752c016f7fe789adea168e1e From 0f85060272ff86b75312129b1ae1a12990a58295 Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:01:45 +0200 Subject: [PATCH 2/3] fix: revert workaround for anima on vulkan fa --- src/model/diffusion/anima.hpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/model/diffusion/anima.hpp b/src/model/diffusion/anima.hpp index 504904d41..6042516a9 100644 --- a/src/model/diffusion/anima.hpp +++ b/src/model/diffusion/anima.hpp @@ -227,7 +227,6 @@ namespace Anima { k4 = k_norm->forward(ctx, k4); ggml_tensor* attn_out = nullptr; - float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f; if (pe_q != nullptr || pe_k != nullptr) { if (pe_q == nullptr) { pe_q = pe_k; @@ -245,8 +244,7 @@ namespace Anima { num_heads, nullptr, true, - ctx->flash_attn_enabled, - scale); + ctx->flash_attn_enabled); } else { auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N); auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N); @@ -258,8 +256,7 @@ namespace Anima { num_heads, nullptr, false, - ctx->flash_attn_enabled, - scale); + ctx->flash_attn_enabled); } return out_proj->forward(ctx, attn_out); From 20a7163b916b7ec9f3bb67fc54c191b5cfb4cb38 Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:02:30 +0200 Subject: [PATCH 3/3] fix: revert workaround for ernie on vulkan fa --- src/model/diffusion/ernie_image.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/model/diffusion/ernie_image.hpp b/src/model/diffusion/ernie_image.hpp index 0427b3b38..12fcada59 100644 --- a/src/model/diffusion/ernie_image.hpp +++ b/src/model/diffusion/ernie_image.hpp @@ -162,8 +162,6 @@ namespace ErnieImage { int64_t S = x->ne[1]; int64_t N = x->ne[2]; - float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f; - auto q = to_q->forward(ctx, x); auto k = to_k->forward(ctx, x); auto v = to_v->forward(ctx, x); @@ -184,7 +182,7 @@ namespace ErnieImage { k = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, k, 0, 2, 1, 3)); // [N, heads, S, head_dim] k = ggml_reshape_3d(ctx->ggml_ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]); - x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled, scale); // [N, S, hidden_size] + x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled); // [N, S, hidden_size] x = to_out_0->forward(ctx, x); return x; }