diff --git a/docs/source/en/api/pipelines/animatediff.md b/docs/source/en/api/pipelines/animatediff.md
index f0188f3c36fb..51d674a68b83 100644
--- a/docs/source/en/api/pipelines/animatediff.md
+++ b/docs/source/en/api/pipelines/animatediff.md
@@ -172,7 +172,7 @@ Here are some sample outputs:
raccoon playing a guitar
-
+
|
a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality
@@ -491,7 +491,7 @@ Here are some sample outputs:
raccoon playing a guitar
|
diff --git a/docs/source/en/api/pipelines/bria_fibo.md b/docs/source/en/api/pipelines/bria_fibo.md
index 96c6b0317e1b..52e463500847 100644
--- a/docs/source/en/api/pipelines/bria_fibo.md
+++ b/docs/source/en/api/pipelines/bria_fibo.md
@@ -16,7 +16,7 @@ Text-to-image models have mastered imagination - but not control. FIBO changes t
FIBO is trained on structured JSON captions up to 1,000+ words and designed to understand and control different visual parameters such as lighting, composition, color, and camera settings, enabling precise and reproducible outputs.
-With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and proffesional control.
+With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and professional control.
FIBO is trained exclusively on a structured prompt and will not work with freeform text prompts.
you can use the [FIBO-VLM-prompt-to-JSON](https://huggingface.co/briaai/FIBO-VLM-prompt-to-JSON) model or the [FIBO-gemini-prompt-to-JSON](https://huggingface.co/briaai/FIBO-gemini-prompt-to-JSON) to convert your freeform text prompt to a structured JSON prompt.
diff --git a/docs/source/en/api/pipelines/kandinsky5_video.md b/docs/source/en/api/pipelines/kandinsky5_video.md
index 733e2481732a..ea561e8c887e 100644
--- a/docs/source/en/api/pipelines/kandinsky5_video.md
+++ b/docs/source/en/api/pipelines/kandinsky5_video.md
@@ -54,7 +54,7 @@ Kandinsky 5.0 T2V Lite:
### Basic Text-to-Video Generation
#### Pro
-**⚠️ Warning!** all Pro models should be infered with pipeline.enable_model_cpu_offload()
+**⚠️ Warning!** all Pro models should be inferred with pipeline.enable_model_cpu_offload()
```python
import torch
from diffusers import Kandinsky5T2VPipeline
@@ -65,7 +65,7 @@ model_id = "kandinskylab/Kandinsky-5.0-T2V-Pro-sft-5s-Diffusers"
pipe = Kandinsky5T2VPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
pipe = pipe.to("cuda")
-pipeline.transformer.set_attention_backend("flex") # <--- Set attention bakend to Flex
+pipeline.transformer.set_attention_backend("flex") # <--- Set attention backend to Flex
pipeline.enable_model_cpu_offload() # <--- Enable cpu offloading for single GPU inference
pipeline.transformer.compile(mode="max-autotune-no-cudagraphs", dynamic=True) # <--- Compile with max-autotune-no-cudagraphs
@@ -126,7 +126,7 @@ pipe = pipe.to("cuda")
pipe.transformer.set_attention_backend(
"flex"
-) # <--- Set attention bakend to Flex
+) # <--- Set attention backend to Flex
pipe.transformer.compile(
mode="max-autotune-no-cudagraphs",
dynamic=True
@@ -149,7 +149,7 @@ export_to_video(output, "output.mp4", fps=24, quality=9)
```
### Diffusion Distilled model
-**⚠️ Warning!** all nocfg and diffusion distilled models should be infered wothout CFG (```guidance_scale=1.0```):
+**⚠️ Warning!** all nocfg and diffusion distilled models should be inferred without CFG (```guidance_scale=1.0```):
```python
model_id = "kandinskylab/Kandinsky-5.0-T2V-Lite-distilled16steps-5s-Diffusers"
@@ -167,7 +167,7 @@ export_to_video(output, "output.mp4", fps=24, quality=9)
### Basic Image-to-Video Generation
-**⚠️ Warning!** all Pro models should be infered with pipeline.enable_model_cpu_offload()
+**⚠️ Warning!** all Pro models should be inferred with pipeline.enable_model_cpu_offload()
```python
import torch
from diffusers import Kandinsky5T2VPipeline
@@ -178,7 +178,7 @@ model_id = "kandinskylab/Kandinsky-5.0-I2V-Pro-sft-5s-Diffusers"
pipe = Kandinsky5T2VPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
pipe = pipe.to("cuda")
-pipeline.transformer.set_attention_backend("flex") # <--- Set attention bakend to Flex
+pipeline.transformer.set_attention_backend("flex") # <--- Set attention backend to Flex
pipeline.enable_model_cpu_offload() # <--- Enable cpu offloading for single GPU inference
pipeline.transformer.compile(mode="max-autotune-no-cudagraphs", dynamic=True) # <--- Compile with max-autotune-no-cudagraphs
diff --git a/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md b/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md
index fb4f7dbbc18c..e5cbeaa8bf7c 100644
--- a/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md
+++ b/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md
@@ -75,14 +75,14 @@ import torch
pipeline_text2image = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
pipeline_text2image = pipeline_text2image.to("cuda")
-prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe."
+prompt = "A cinematic shot of a baby raccoon wearing an intricate italian priest robe."
image = pipeline_text2image(prompt=prompt, guidance_scale=0.0, num_inference_steps=1).images[0]
image
```
- 
+
## Image-to-image
diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md
index d5fdbbfe0f95..2c3b00aef690 100644
--- a/docs/source/en/api/pipelines/wan.md
+++ b/docs/source/en/api/pipelines/wan.md
@@ -531,7 +531,7 @@ export_to_video(output, "animated_advanced.mp4", fps=30)
- Try lower `shift` values (`2.0` to `5.0`) for lower resolution videos and higher `shift` values (`7.0` to `12.0`) for higher resolution images.
-- Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involed. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more.
+- Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involved. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more.
- Wan 2.2 has two denoisers. By default, LoRAs are only loaded into the first denoiser. One can set `load_into_transformer_2=True` to load LoRAs into the second denoiser. Refer to [this](https://github.com/huggingface/diffusers/pull/12074#issue-3292620048) and [this](https://github.com/huggingface/diffusers/pull/12074#issuecomment-3155896144) examples to learn more.
diff --git a/src/diffusers/pipelines/bria/pipeline_bria.py b/src/diffusers/pipelines/bria/pipeline_bria.py
index 9b80278af21e..2dff6a9189c7 100644
--- a/src/diffusers/pipelines/bria/pipeline_bria.py
+++ b/src/diffusers/pipelines/bria/pipeline_bria.py
@@ -604,7 +604,7 @@ def __call__(
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
# 5. Prepare latent variables
- num_channels_latents = self.transformer.config.in_channels // 4 # due to patch=2, we devide by 4
+ num_channels_latents = self.transformer.config.in_channels // 4 # due to patch=2, we divide by 4
latents, latent_image_ids = self.prepare_latents(
batch_size * num_images_per_prompt,
num_channels_latents,
diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py
index f526dc419cea..8d4b88bb2dba 100644
--- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py
@@ -320,10 +320,10 @@ def __call__(
Args:
prompt (`str` or `list[str]`):
The prompt or prompts to guide the image generation.
- image (`nd.ndarray` or `PIL.Image.Image`):
+ image (`np.ndarray` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the
process. This is the image whose masked region will be inpainted.
- mask_image (`nd.ndarray` or `PIL.Image.Image`):
+ mask_image (`np.ndarray` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
replaced by noise and therefore repainted, while black pixels will be preserved. If `mask_image` is a
PIL image, it will be converted to a single channel (luminance) before use. If it's a tensor, it should
diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py
index f74bf1e14900..9892199355e3 100644
--- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py
+++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py
@@ -150,7 +150,7 @@ def prepare_mask_and_masked_image(image, mask, height, width):
ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
- (ot the other way around).
+ (or the other way around).
Returns:
tuple[torch.Tensor]: The pair (mask, image) as ``torch.Tensor`` with 4
diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py
index 796ab94b33a6..4e7d6edb1e6f 100644
--- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py
+++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py
@@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask, height, width):
ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
- (ot the other way around).
+ (or the other way around).
Returns:
tuple[torch.Tensor]: The pair (mask, image) as ``torch.Tensor`` with 4
diff --git a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py
index f640fddc2bc5..ec7a85f46673 100644
--- a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py
+++ b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py
@@ -223,7 +223,7 @@ def check_inputs(
f"got {type(task_prompt)} and {type(content_prompt)}"
)
if len(content_prompt) != len(task_prompt):
- raise ValueError("`task_prompt` and `content_prompt` must have the same length whe they are lists.")
+ raise ValueError("`task_prompt` and `content_prompt` must have the same length when they are lists.")
for sample in image:
if not isinstance(sample, list) or not isinstance(sample[0], list):
diff --git a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py
index dd5d0603d6d0..bd84adcf6425 100644
--- a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py
+++ b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py
@@ -443,7 +443,7 @@ def check_inputs(
f"got {type(task_prompt)} and {type(content_prompt)}"
)
if len(content_prompt) != len(task_prompt):
- raise ValueError("`task_prompt` and `content_prompt` must have the same length whe they are lists.")
+ raise ValueError("`task_prompt` and `content_prompt` must have the same length when they are lists.")
for sample in image:
if not isinstance(sample, list) or not isinstance(sample[0], list):
diff --git a/src/diffusers/quantizers/gguf/utils.py b/src/diffusers/quantizers/gguf/utils.py
index c7d9ec89bee6..0d29d52222b8 100644
--- a/src/diffusers/quantizers/gguf/utils.py
+++ b/src/diffusers/quantizers/gguf/utils.py
@@ -518,7 +518,7 @@ def dequantize_gguf_tensor(tensor):
block_size, type_size = GGML_QUANT_SIZES[quant_type]
- # Conver to plain tensor to avoid unnecessary __torch_function__ overhead.
+ # Convert to plain tensor to avoid unnecessary __torch_function__ overhead.
tensor = tensor.as_tensor()
tensor = tensor.view(torch.uint8)
|