diff --git a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py
index 244db7300767..0a8382d6031f 100644
--- a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py
+++ b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py
@@ -544,6 +544,7 @@ def prepare_latents(
 
             # Reshape to match latent dimensions [batch, 1, height, width, channels]
             image_latents = image_latents.permute(0, 2, 3, 4, 1)  # [batch, 1, H, W, C]
+            image_latents = image_latents.to(device=latents.device, dtype=latents.dtype)
             latents = torch.cat([latents, image_latents, torch.ones_like(latents[..., :1])], -1)
 
         return latents
diff --git a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py
index ad4bb182d248..e82dc737f1a9 100644
--- a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py
+++ b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py
@@ -704,6 +704,7 @@ def prepare_latents(
 
             # Reshape to match latent dimensions [batch, frames, height, width, channels]
             image_latents = image_latents.permute(0, 2, 3, 4, 1)  # [batch, 1, H, W, C]
+            image_latents = image_latents.to(device=latents.device, dtype=latents.dtype)
 
             # Replace first frame with encoded image
             latents[:, 0:1] = image_latents