From 3bcc3016b95682eabf76f61cf506b0fdf2193b11 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 26 Jun 2026 13:49:23 -0300 Subject: [PATCH] fix: correct TAEHV encoding for image models --- src/model/vae/tae.hpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/model/vae/tae.hpp b/src/model/vae/tae.hpp index 7c6e1d35c..a78e5e96b 100644 --- a/src/model/vae/tae.hpp +++ b/src/model/vae/tae.hpp @@ -548,7 +548,7 @@ class TAEHV : public GGMLBlock { } auto result = decoder->forward(ctx, z); if (sd_version_is_wan(version) || sd_version_is_ltxav(version)) { - // (W, H, C, T) -> (W, H, T, C) + // (W, H, T, C) -> (W, H, C, T) result = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, result, 0, 1, 3, 2)); } return result; @@ -556,8 +556,10 @@ class TAEHV : public GGMLBlock { ggml_tensor* encode(GGMLRunnerContext* ctx, ggml_tensor* x) { auto encoder = std::dynamic_pointer_cast(blocks["encoder"]); - // (W, H, T, C) -> (W, H, C, T) - x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 1, 3, 2)); + if (sd_version_is_wan(version) || sd_version_is_ltxav(version)) { + // (W, H, T, C) -> (W, H, C, T) + x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 1, 3, 2)); + } int64_t num_frames = x->ne[3]; if (num_frames % encoder->t_downscale) { // pad to multiple of encoder->t_downscale at the end @@ -567,7 +569,10 @@ class TAEHV : public GGMLBlock { } } x = encoder->forward(ctx, x); - x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 1, 3, 2)); + if (sd_version_is_wan(version) || sd_version_is_ltxav(version)) { + // (W, H, C, T) -> (W, H, T, C) + x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 1, 3, 2)); + } return x; } };