diff --git a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py index 244db7300767..0a8382d6031f 100644 --- a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py +++ b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py @@ -544,6 +544,7 @@ def prepare_latents( # Reshape to match latent dimensions [batch, 1, height, width, channels] image_latents = image_latents.permute(0, 2, 3, 4, 1) # [batch, 1, H, W, C] + image_latents = image_latents.to(device=latents.device, dtype=latents.dtype) latents = torch.cat([latents, image_latents, torch.ones_like(latents[..., :1])], -1) return latents diff --git a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py index ad4bb182d248..e82dc737f1a9 100644 --- a/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py +++ b/src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py @@ -704,6 +704,7 @@ def prepare_latents( # Reshape to match latent dimensions [batch, frames, height, width, channels] image_latents = image_latents.permute(0, 2, 3, 4, 1) # [batch, 1, H, W, C] + image_latents = image_latents.to(device=latents.device, dtype=latents.dtype) # Replace first frame with encoded image latents[:, 0:1] = image_latents