diff --git a/02_ml_inference/02_text_to_image/.env.example b/02_ml_inference/02_text_to_image/.env.example
new file mode 100644
index 0000000..91af5f2
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/.env.example
@@ -0,0 +1,4 @@
+# RUNPOD_API_KEY=your_api_key_here
+# FLASH_HOST=localhost
+# FLASH_PORT=8888
+# LOG_LEVEL=INFO
diff --git a/02_ml_inference/02_text_to_image/.flashignore b/02_ml_inference/02_text_to_image/.flashignore
new file mode 100644
index 0000000..10ffb6d
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/.flashignore
@@ -0,0 +1,43 @@
+# Flash Build Ignore Patterns
+
+# Python cache
+__pycache__/
+*.pyc
+
+# Virtual environments
+venv/
+.venv/
+env/
+
+# IDE
+.vscode/
+.idea/
+
+# Environment files
+.env
+.env.local
+
+# Git
+.git/
+.gitignore
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
+
+# Flash resources
+.flash_resources.pkl
+
+# Tests
+tests/
+test_*.py
+*_test.py
+
+# Documentation
+docs/
+*.md
+!README.md
+
+# Demo output
+generated.png
diff --git a/02_ml_inference/02_text_to_image/.gitignore b/02_ml_inference/02_text_to_image/.gitignore
new file mode 100644
index 0000000..4ea30c5
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/.gitignore
@@ -0,0 +1,27 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment
+.env
+.env.local
+
+# Flash
+.flash_resources.pkl
+.tetra_resources.pkl
+
+# IDE
+.vscode/
+.idea/
+
+# Demo output
+generated.png
diff --git a/02_ml_inference/02_text_to_image/__init__.py b/02_ml_inference/02_text_to_image/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/02_ml_inference/02_text_to_image/demo.py b/02_ml_inference/02_text_to_image/demo.py
new file mode 100755
index 0000000..ede93a6
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/demo.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+"""
+Flash Demo — Generate an image with Flux and display it in your terminal.
+
+Usage:
+    1. Start the server:   cd 02_ml_inference/02_text_to_image && flash run
+    2. Run this script:    python demo.py
+    3. Or with a prompt:   python demo.py "a cat astronaut on mars"
+"""
+
+import base64
+import io
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+import urllib.error
+import urllib.request
+
+API_URL = "http://localhost:8888/gpu/generate"
+DEFAULT_PROMPT = "a tiny astronaut floating above earth, watercolor style"
+OUTPUT_FILE = "generated.png"
+
+# ── Terminal image rendering ─────────────────────────────────────────
+
+
+def render_in_terminal(image_bytes: bytes, max_width: int | None = None):
+    """Render an image in the terminal using ANSI true-color half-blocks.
+
+    Works in any terminal that supports 24-bit color (iTerm2, Kitty,
+    WezTerm, Windows Terminal, most modern terminals).
+    """
+    from PIL import Image
+
+    img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+
+    # Fit to terminal width
+    term_width = max_width or min(shutil.get_terminal_size().columns, 80)
+    aspect = img.height / img.width
+    w = term_width
+    h = int(w * aspect)
+    if h % 2 != 0:
+        h += 1
+
+    img = img.resize((w, h), Image.LANCZOS)
+    px = img.load()
+
+    lines = []
+    for y in range(0, h, 2):
+        row = []
+        for x in range(w):
+            r1, g1, b1 = px[x, y]
+            r2, g2, b2 = px[x, y + 1] if y + 1 < h else (0, 0, 0)
+            row.append(f"\033[38;2;{r1};{g1};{b1}m\033[48;2;{r2};{g2};{b2}m▀")
+        lines.append("".join(row) + "\033[0m")
+
+    print("\n".join(lines))
+
+
+def try_imgcat(image_bytes: bytes) -> bool:
+    """Try to display via imgcat (iTerm2) or chafa."""
+    for cmd in ("imgcat", "chafa", "viu"):
+        if shutil.which(cmd):
+            try:
+                proc = subprocess.run(
+                    [cmd, "-"],
+                    input=image_bytes,
+                    timeout=5,
+                )
+                return proc.returncode == 0
+            except Exception:
+                continue
+    return False
+
+
+def display_image(image_bytes: bytes):
+    """Display an image in the terminal with the best available method."""
+    # Try native image tools first (high-res)
+    if try_imgcat(image_bytes):
+        return
+
+    # Fall back to ANSI half-block rendering (works everywhere)
+    render_in_terminal(image_bytes)
+
+
+# ── Main ─────────────────────────────────────────────────────────────
+
+
+def main():
+    prompt = " ".join(sys.argv[1:]) if len(sys.argv) > 1 else DEFAULT_PROMPT
+
+    print()
+    print("  ⚡ Flash Demo — Flux Text-to-Image")
+    print("  ─────────────────────────────────────")
+    print(f'  Prompt:  "{prompt}"')
+    print(f"  Server:  {API_URL}")
+    print()
+
+    # Build request
+    hf_token = os.environ.get("HF_TOKEN", "")
+    payload = json.dumps({"prompt": prompt, "hf_token": hf_token}).encode()
+    req = urllib.request.Request(
+        API_URL,
+        data=payload,
+        headers={"Content-Type": "application/json"},
+    )
+
+    # Send request with timing
+    print("  Sending to RunPod GPU worker...", end="", flush=True)
+    t0 = time.time()
+
+    try:
+        resp = urllib.request.urlopen(req, timeout=300)
+    except urllib.error.URLError as e:
+        print(f"\n\n  Error: Could not connect to {API_URL}")
+        print("  Make sure the Flash server is running:  flash run")
+        print(f"  ({e})")
+        sys.exit(1)
+
+    result = json.loads(resp.read())
+    elapsed = time.time() - t0
+
+    if result.get("status") != "success":
+        print(f"\n\n  Error from worker: {result}")
+        sys.exit(1)
+
+    # Decode image
+    image_bytes = base64.b64decode(result["image_base64"])
+    size_kb = len(image_bytes) / 1024
+
+    print(f" done! ({elapsed:.1f}s)")
+    print(f"  Image:   {result.get('width')}x{result.get('height')}px, {size_kb:.0f}KB")
+    print()
+
+    # Save to disk
+    with open(OUTPUT_FILE, "wb") as f:
+        f.write(image_bytes)
+    print(f"  Saved to {OUTPUT_FILE}")
+    print()
+
+    # Display in terminal
+    display_image(image_bytes)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/02_ml_inference/02_text_to_image/gpu_worker.py b/02_ml_inference/02_text_to_image/gpu_worker.py
new file mode 100644
index 0000000..4cd283f
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/gpu_worker.py
@@ -0,0 +1,137 @@
+"""Flux Text-to-Image — GPU Worker
+
+One warm worker. Cached FLUX pipeline.
+"""
+
+import os
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from runpod_flash import GpuGroup, LiveServerless, remote
+
+# ── GPU Configuration ────────────────────────────────────────────────
+# FLUX.1-schnell is a fast distilled model (~12GB VRAM).
+# ADA_24 gives us an RTX 4090-class GPU with 24GB — plenty of room.
+gpu_config = LiveServerless(
+    name="02_02_flux_schnell",
+    gpus=[GpuGroup.ADA_24],
+    workersMin=1,
+    workersMax=3,
+    idleTimeout=5,
+)
+
+
+@remote(
+    resource_config=gpu_config,
+    dependencies=[
+        "diffusers",
+        "torch",
+        "transformers",
+        "accelerate",
+        "sentencepiece",
+        "protobuf",
+    ],
+)
+class FluxWorker:
+    """Warm FLUX worker that caches the pipeline between requests."""
+
+    def __init__(self):
+        import torch
+
+        self._torch = torch
+        self._model_name = "black-forest-labs/FLUX.1-schnell"
+        self._pipe = None
+
+    def _ensure_pipeline(self, hf_token: str):
+        from diffusers import FluxPipeline
+        from huggingface_hub import login
+
+        if self._pipe is not None:
+            return
+
+        if hf_token:
+            login(token=hf_token)
+
+        self._pipe = FluxPipeline.from_pretrained(
+            self._model_name,
+            torch_dtype=self._torch.bfloat16,
+        )
+        self._pipe.enable_model_cpu_offload()
+
+    async def generate(self, input_data: dict) -> dict:
+        import base64
+        import io
+
+        hf_token = input_data.get("hf_token", "")
+        prompt = input_data.get("prompt", "a lightning flash above a datacenter")
+        width = int(input_data.get("width", 512))
+        height = int(input_data.get("height", 512))
+        num_steps = int(input_data.get("num_steps", 4))
+
+        try:
+            self._ensure_pipeline(hf_token=hf_token)
+            image = self._pipe(
+                prompt,
+                num_inference_steps=num_steps,
+                width=width,
+                height=height,
+                guidance_scale=0.0,
+            ).images[0]
+        except Exception as exc:
+            return {"status": "error", "error": f"Image generation failed: {exc}"}
+
+        buf = io.BytesIO()
+        image.save(buf, format="PNG")
+        buf.seek(0)
+
+        return {
+            "status": "success",
+            "image_base64": base64.b64encode(buf.read()).decode(),
+            "prompt": prompt,
+            "width": width,
+            "height": height,
+        }
+
+
+# ── FastAPI Router ───────────────────────────────────────────────────
+gpu_router = APIRouter()
+worker: FluxWorker | None = None
+
+
+def get_worker() -> FluxWorker:
+    global worker
+    if worker is None:
+        worker = FluxWorker()
+    return worker
+
+
+class ImageRequest(BaseModel):
+    prompt: str = Field(
+        default="a tiny astronaut floating in space, watercolor style",
+        description="Text prompt describing the image to generate",
+    )
+    width: int = Field(default=512, description="Image width in pixels")
+    height: int = Field(default=512, description="Image height in pixels")
+    num_steps: int = Field(default=4, description="Number of diffusion steps (1-8)")
+    hf_token: str = Field(
+        default="",
+        description="Optional Hugging Face token. Uses HF_TOKEN env var when omitted.",
+    )
+
+
+@gpu_router.post("/generate")
+async def generate(request: ImageRequest):
+    """Generate an image from a text prompt using FLUX.1-schnell."""
+    hf_token = request.hf_token.strip() or os.environ.get("HF_TOKEN", "")
+    result = await get_worker().generate(
+        {
+            "prompt": request.prompt,
+            "width": request.width,
+            "height": request.height,
+            "num_steps": request.num_steps,
+            "hf_token": hf_token,
+        }
+    )
+    if result.get("status") != "success":
+        raise HTTPException(status_code=400, detail=result.get("error", "Image generation failed"))
+    return result
diff --git a/02_ml_inference/02_text_to_image/main.py b/02_ml_inference/02_text_to_image/main.py
new file mode 100644
index 0000000..2cb1f50
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/main.py
@@ -0,0 +1,38 @@
+import logging
+import os
+
+from fastapi import FastAPI
+from gpu_worker import gpu_router
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI(
+    title="Flux Text-to-Image",
+    description="Generate images from text prompts with FLUX.1-schnell on RunPod serverless GPUs",
+    version="1.0.0",
+)
+
+app.include_router(gpu_router, prefix="/gpu", tags=["Text-to-Image"])
+
+
+@app.get("/")
+def home():
+    return {
+        "message": "Flux Text-to-Image API",
+        "docs": "/docs",
+        "endpoints": {"generate": "/gpu/generate"},
+    }
+
+
+@app.get("/ping")
+def ping():
+    return {"status": "healthy"}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    host = os.getenv("FLASH_HOST", "localhost")
+    port = int(os.getenv("FLASH_PORT", 8888))
+    logger.info(f"Starting Flash server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
diff --git a/02_ml_inference/02_text_to_image/mothership.py b/02_ml_inference/02_text_to_image/mothership.py
new file mode 100644
index 0000000..7cb8059
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/mothership.py
@@ -0,0 +1,7 @@
+"""Mothership Endpoint Configuration"""
+
+from runpod_flash import CpuLiveLoadBalancer
+
+mothership = CpuLiveLoadBalancer(
+    name="02_02_text_to_image-mothership",
+)
diff --git a/02_ml_inference/02_text_to_image/pyproject.toml b/02_ml_inference/02_text_to_image/pyproject.toml
new file mode 100644
index 0000000..beb9159
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "flash-flux-text-to-image"
+version = "0.1.0"
+description = "Generate images with FLUX.1-schnell via RunPod Flash"
+requires-python = ">=3.10"
+dependencies = [
+    "runpod-flash",
+    "fastapi>=0.104.0",
+    "pillow>=10.0.0",
+]
diff --git a/02_ml_inference/02_text_to_image/requirements.txt b/02_ml_inference/02_text_to_image/requirements.txt
new file mode 100644
index 0000000..a73ed1a
--- /dev/null
+++ b/02_ml_inference/02_text_to_image/requirements.txt
@@ -0,0 +1 @@
+runpod-flash
diff --git a/02_ml_inference/03_image_to_image/.env.example b/02_ml_inference/03_image_to_image/.env.example
new file mode 100644
index 0000000..8360712
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/.env.example
@@ -0,0 +1,4 @@
+# FLASH_HOST=localhost
+# FLASH_PORT=8888
+# LOG_LEVEL=INFO
+# RUNPOD_API_KEY=your_api_key_here
diff --git a/02_ml_inference/03_image_to_image/.flashignore b/02_ml_inference/03_image_to_image/.flashignore
new file mode 100644
index 0000000..6c8e627
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/.flashignore
@@ -0,0 +1,43 @@
+# Flash Build Ignore Patterns
+
+# Python cache
+__pycache__/
+*.pyc
+
+# Virtual environments
+venv/
+.venv/
+env/
+
+# IDE
+.vscode/
+.idea/
+
+# Environment files
+.env
+.env.local
+
+# Git
+.git/
+.gitignore
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
+
+# Flash resources
+.runpod/
+
+# Tests
+tests/
+test_*.py
+*_test.py
+
+# Documentation
+docs/
+*.md
+!README.md
+
+# Demo output
+transformed.png
diff --git a/02_ml_inference/03_image_to_image/.gitignore b/02_ml_inference/03_image_to_image/.gitignore
new file mode 100644
index 0000000..cf5cbb3
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/.gitignore
@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment
+.env
+.env.local
+
+# Flash
+.flash_resources.pkl
+.tetra_resources.pkl
+.runpod/
+
+# IDE
+.vscode/
+.idea/
+
+# Demo output
+transformed.png
diff --git a/02_ml_inference/03_image_to_image/README.md b/02_ml_inference/03_image_to_image/README.md
new file mode 100644
index 0000000..2b6a52e
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/README.md
@@ -0,0 +1,71 @@
+# Image-to-Image with Stable Diffusion
+
+Serverless image-to-image API built with Runpod Flash and Stable Diffusion v1.5.
+
+## What this example does
+
+- Accepts an input image as base64
+- Applies prompt-guided transformation with `StableDiffusionImg2ImgPipeline`
+- Returns a transformed image as base64 PNG
+
+## Quick Start
+
+```bash
+cd 02_ml_inference/03_image_to_image
+pip install -r requirements.txt
+cp .env.example .env
+# Add RUNPOD_API_KEY in .env
+flash run
+```
+
+Open docs at `http://localhost:8888/docs`.
+
+## Endpoint
+
+### POST `/gpu/transform`
+
+Request body:
+
+```json
+{
+  "image_base64": "<base64-encoded-image-or-omit-for-default-poddy.jpg>",
+  "prompt": "turn this portrait into a cinematic oil painting",
+  "negative_prompt": "blurry, low quality",
+  "strength": 0.65,
+  "guidance_scale": 7.5,
+  "num_steps": 25,
+  "seed": 42
+}
+```
+
+Response:
+
+```json
+{
+  "status": "success",
+  "image_base64": "<base64-encoded-output-image>",
+  "model": "runwayml/stable-diffusion-v1-5",
+  "prompt": "...",
+  "negative_prompt": "...",
+  "strength": 0.65,
+  "guidance_scale": 7.5,
+  "num_steps": 25,
+  "seed": 42,
+  "timestamp": "2026-02-15T12:34:56.789123"
+}
+```
+
+## Local Demo Script
+
+Run the demo client against your local endpoint:
+
+```bash
+python demo.py "turn this into a watercolor painting" output.png
+```
+
+## Notes
+
+- First request can take longer because the worker and model need to warm up.
+- Input images are resized to `512x512` before inference for stable memory usage.
+- If `image_base64` is omitted, the endpoint uses `poddy.jpg` as the default input image.
+- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset.
diff --git a/02_ml_inference/03_image_to_image/__init__.py b/02_ml_inference/03_image_to_image/__init__.py
new file mode 100644
index 0000000..5d8d1d1
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/__init__.py
@@ -0,0 +1 @@
+"""Image-to-image inference example package."""
diff --git a/02_ml_inference/03_image_to_image/demo.py b/02_ml_inference/03_image_to_image/demo.py
new file mode 100644
index 0000000..cc1ef59
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/demo.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+Send an input image to the local Flash img2img endpoint and save the output.
+
+Usage:
+    python demo.py "turn this into a watercolor painting" [output.png]
+    python demo.py input.png "turn this into a watercolor painting" [output.png]
+"""
+
+import base64
+import json
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+API_URL = "http://localhost:8888/gpu/transform"
+DEFAULT_IMAGE = Path(__file__).resolve().parent / "poddy.jpg"
+DEFAULT_PROMPT = "turn this into a cinematic watercolor painting"
+DEFAULT_OUTPUT = "transformed.png"
+
+
+def main() -> None:
+    args = sys.argv[1:]
+
+    if not args:
+        input_path = DEFAULT_IMAGE
+        prompt = DEFAULT_PROMPT
+        output_path = Path(DEFAULT_OUTPUT).resolve()
+    else:
+        first_arg_path = Path(args[0]).expanduser()
+        if first_arg_path.exists():
+            input_path = first_arg_path.resolve()
+            prompt = args[1] if len(args) > 1 else DEFAULT_PROMPT
+            output_path = Path(args[2] if len(args) > 2 else DEFAULT_OUTPUT).resolve()
+        else:
+            input_path = DEFAULT_IMAGE
+            prompt = args[0]
+            output_path = Path(args[1] if len(args) > 1 else DEFAULT_OUTPUT).resolve()
+
+    if not input_path.exists():
+        print(f"Input image not found: {input_path}")
+        sys.exit(1)
+
+    image_base64 = base64.b64encode(input_path.read_bytes()).decode("utf-8")
+    payload = {
+        "image_base64": image_base64,
+        "prompt": prompt,
+        "strength": 0.65,
+        "guidance_scale": 7.5,
+        "num_steps": 25,
+    }
+
+    request = urllib.request.Request(
+        API_URL,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(request, timeout=300) as response:
+            result = json.loads(response.read().decode("utf-8"))
+    except urllib.error.URLError as exc:
+        print(f"Request failed: {exc}")
+        print("Make sure the server is running from this folder with: flash run")
+        sys.exit(1)
+
+    if result.get("status") != "success":
+        print(f"Worker error: {result}")
+        sys.exit(1)
+
+    output_bytes = base64.b64decode(result["image_base64"])
+    output_path.write_bytes(output_bytes)
+    print(f"Saved transformed image to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/02_ml_inference/03_image_to_image/gpu_worker.py b/02_ml_inference/03_image_to_image/gpu_worker.py
new file mode 100644
index 0000000..60a6261
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/gpu_worker.py
@@ -0,0 +1,145 @@
+import base64
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from runpod_flash import GpuGroup, LiveServerless, remote
+
+DEFAULT_IMAGE_PATH = Path(__file__).resolve().parent / "poddy.jpg"
+
+
+def load_default_image_base64() -> str:
+    return base64.b64encode(DEFAULT_IMAGE_PATH.read_bytes()).decode("utf-8")
+
+
+gpu_config = LiveServerless(
+    name="02_03_image_to_image_gpu",
+    gpus=[GpuGroup.ADA_24],
+    workersMin=0,
+    workersMax=2,
+    idleTimeout=5,
+)
+
+
+@remote(
+    resource_config=gpu_config,
+    dependencies=[
+        "diffusers",
+        "torch",
+        "transformers",
+        "accelerate",
+        "safetensors",
+        "pillow",
+    ],
+)
+class ImageToImageWorker:
+    def __init__(self):
+        import torch
+        from diffusers import StableDiffusionImg2ImgPipeline
+
+        self._torch = torch
+        self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5",
+            torch_dtype=torch.float16,
+            safety_checker=None,
+            requires_safety_checker=False,
+        )
+        self.pipe = self.pipe.to("cuda")
+        self.pipe.enable_attention_slicing()
+
+    async def transform(self, input_data: dict) -> dict:
+        import base64
+        import io
+        from datetime import datetime
+
+        from PIL import Image
+
+        image_base64 = input_data.get("image_base64", "")
+        prompt = input_data.get("prompt", "").strip()
+        negative_prompt = input_data.get("negative_prompt", "").strip()
+        strength = float(input_data.get("strength", 0.65))
+        guidance_scale = float(input_data.get("guidance_scale", 7.5))
+        num_steps = int(input_data.get("num_steps", 25))
+        seed = input_data.get("seed")
+
+        if not image_base64:
+            return {"status": "error", "error": "image_base64 is required"}
+        if not prompt:
+            return {"status": "error", "error": "prompt is required"}
+
+        try:
+            image_bytes = base64.b64decode(image_base64)
+            input_image = Image.open(io.BytesIO(image_bytes)).convert("RGB").resize((512, 512))
+        except Exception as exc:
+            return {"status": "error", "error": f"Invalid input image: {exc}"}
+
+        generator = None
+        if seed is not None:
+            generator = self._torch.Generator(device="cuda").manual_seed(int(seed))
+
+        output_image = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt if negative_prompt else None,
+            image=input_image,
+            strength=strength,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_steps,
+            generator=generator,
+        ).images[0]
+
+        output_buffer = io.BytesIO()
+        output_image.save(output_buffer, format="PNG")
+        output_buffer.seek(0)
+
+        return {
+            "status": "success",
+            "image_base64": base64.b64encode(output_buffer.read()).decode("utf-8"),
+            "model": "runwayml/stable-diffusion-v1-5",
+            "prompt": prompt,
+            "negative_prompt": negative_prompt or None,
+            "strength": strength,
+            "guidance_scale": guidance_scale,
+            "num_steps": num_steps,
+            "seed": seed,
+            "timestamp": datetime.now().isoformat(),
+        }
+
+
+gpu_router = APIRouter()
+worker: ImageToImageWorker | None = None
+
+
+def get_worker() -> ImageToImageWorker:
+    global worker
+    if worker is None:
+        worker = ImageToImageWorker()
+    return worker
+
+
+class ImageToImageRequest(BaseModel):
+    image_base64: str = Field(
+        default="",
+        description="Input image encoded as base64. If omitted, defaults to poddy.jpg.",
+    )
+    prompt: str = Field(description="Prompt that describes how to transform the image")
+    negative_prompt: str = Field(default="", description="What to avoid in the output image")
+    strength: float = Field(default=0.65, ge=0.1, le=1.0)
+    guidance_scale: float = Field(default=7.5, ge=0.0, le=20.0)
+    num_steps: int = Field(default=25, ge=1, le=50)
+    seed: int | None = Field(default=None, ge=0)
+
+
+@gpu_router.post("/transform")
+async def transform(request: ImageToImageRequest):
+    payload = request.model_dump()
+    if not payload.get("image_base64"):
+        try:
+            payload["image_base64"] = load_default_image_base64()
+        except FileNotFoundError as exc:
+            raise HTTPException(status_code=500, detail=f"Default image not found: {exc}") from exc
+    result = await get_worker().transform(payload)
+    if result.get("status") != "success":
+        raise HTTPException(
+            status_code=400, detail=result.get("error", "Image transformation failed")
+        )
+    return result
diff --git a/02_ml_inference/03_image_to_image/main.py b/02_ml_inference/03_image_to_image/main.py
new file mode 100644
index 0000000..7a001cd
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/main.py
@@ -0,0 +1,38 @@
+import logging
+import os
+
+from fastapi import FastAPI
+from gpu_worker import gpu_router
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI(
+    title="Image-to-Image API",
+    description="Transform images with Stable Diffusion on RunPod serverless GPUs",
+    version="1.0.0",
+)
+
+app.include_router(gpu_router, prefix="/gpu", tags=["Image-to-Image"])
+
+
+@app.get("/")
+def home():
+    return {
+        "message": "Image-to-Image API",
+        "docs": "/docs",
+        "endpoints": {"transform": "/gpu/transform"},
+    }
+
+
+@app.get("/ping")
+def ping():
+    return {"status": "healthy"}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    host = os.getenv("FLASH_HOST", "localhost")
+    port = int(os.getenv("FLASH_PORT", 8888))
+    logger.info(f"Starting Flash server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
diff --git a/02_ml_inference/03_image_to_image/mothership.py b/02_ml_inference/03_image_to_image/mothership.py
new file mode 100644
index 0000000..55eab2a
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/mothership.py
@@ -0,0 +1,7 @@
+"""Mothership endpoint configuration."""
+
+from runpod_flash import CpuLiveLoadBalancer
+
+mothership = CpuLiveLoadBalancer(
+    name="02_03_image_to_image-mothership",
+)
diff --git a/02_ml_inference/03_image_to_image/poddy.jpg b/02_ml_inference/03_image_to_image/poddy.jpg
new file mode 100644
index 0000000..7493710
Binary files /dev/null and b/02_ml_inference/03_image_to_image/poddy.jpg differ
diff --git a/02_ml_inference/03_image_to_image/pyproject.toml b/02_ml_inference/03_image_to_image/pyproject.toml
new file mode 100644
index 0000000..b73c5a0
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "flash-image-to-image"
+version = "0.1.0"
+description = "Image-to-image transformations with Stable Diffusion on RunPod Flash"
+requires-python = ">=3.10"
+dependencies = [
+    "runpod-flash",
+    "fastapi>=0.104.0",
+    "pillow>=10.0.0",
+]
diff --git a/02_ml_inference/03_image_to_image/requirements.txt b/02_ml_inference/03_image_to_image/requirements.txt
new file mode 100644
index 0000000..a73ed1a
--- /dev/null
+++ b/02_ml_inference/03_image_to_image/requirements.txt
@@ -0,0 +1 @@
+runpod-flash
diff --git a/02_ml_inference/04_text_to_video/.env.example b/02_ml_inference/04_text_to_video/.env.example
new file mode 100644
index 0000000..8360712
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/.env.example
@@ -0,0 +1,4 @@
+# FLASH_HOST=localhost
+# FLASH_PORT=8888
+# LOG_LEVEL=INFO
+# RUNPOD_API_KEY=your_api_key_here
diff --git a/02_ml_inference/04_text_to_video/.flashignore b/02_ml_inference/04_text_to_video/.flashignore
new file mode 100644
index 0000000..2dfb6fb
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/.flashignore
@@ -0,0 +1,43 @@
+# Flash Build Ignore Patterns
+
+# Python cache
+__pycache__/
+*.pyc
+
+# Virtual environments
+venv/
+.venv/
+env/
+
+# IDE
+.vscode/
+.idea/
+
+# Environment files
+.env
+.env.local
+
+# Git
+.git/
+.gitignore
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
+
+# Flash resources
+.runpod/
+
+# Tests
+tests/
+test_*.py
+*_test.py
+
+# Documentation
+docs/
+*.md
+!README.md
+
+# Demo output
+text_to_video.gif
diff --git a/02_ml_inference/04_text_to_video/.gitignore b/02_ml_inference/04_text_to_video/.gitignore
new file mode 100644
index 0000000..2f377a5
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/.gitignore
@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment
+.env
+.env.local
+
+# Flash
+.flash_resources.pkl
+.tetra_resources.pkl
+.runpod/
+
+# IDE
+.vscode/
+.idea/
+
+# Demo output
+text_to_video.gif
diff --git a/02_ml_inference/04_text_to_video/README.md b/02_ml_inference/04_text_to_video/README.md
new file mode 100644
index 0000000..296c563
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/README.md
@@ -0,0 +1,72 @@
+# Text-to-Video with Diffusers
+
+Serverless text-to-video API built with Runpod Flash and Diffusers.
+
+## What this example does
+
+- Accepts a text prompt
+- Generates a short video clip with a GPU `@remote` worker
+- Returns the generated video as base64-encoded GIF
+
+## Quick Start
+
+```bash
+cd 02_ml_inference/04_text_to_video
+pip install -r requirements.txt
+cp .env.example .env
+# Add RUNPOD_API_KEY in .env
+flash run
+```
+
+Open docs at `http://localhost:8888/docs`.
+
+## Endpoint
+
+### POST `/gpu/generate`
+
+Request body:
+
+```json
+{
+  "prompt": "a cinematic drone shot of snowy mountains at sunrise",
+  "negative_prompt": "blurry, noisy, low quality",
+  "num_frames": 12,
+  "num_steps": 18,
+  "guidance_scale": 7.0,
+  "fps": 8,
+  "width": 512,
+  "height": 288,
+  "seed": 42
+}
+```
+
+Response:
+
+```json
+{
+  "status": "success",
+  "video_base64": "<base64-encoded-gif>",
+  "video_mime_type": "image/gif",
+  "preview_image_base64": "<base64-encoded-png>",
+  "preview_image_mime_type": "image/png",
+  "model": "damo-vilab/text-to-video-ms-1.7b",
+  "prompt": "...",
+  "num_frames": 16,
+  "fps": 8,
+  "timestamp": "2026-02-15T12:34:56.789123"
+}
+```
+
+## Local Demo Script
+
+```bash
+python demo.py "a cinematic drone shot of snowy mountains" output.gif
+```
+
+## Notes
+
+- First request can take longer because the worker and model need to warm up.
+- This example returns GIF output for portability and simple local testing.
+- GIF encoding is capped at 25 FPS; higher requested values are clamped and response `fps` reflects the encoded output.
+- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset.
+- The default parameters are tuned for reliability on 24GB GPUs; increase frames/steps/resolution gradually if you want higher quality.
diff --git a/02_ml_inference/04_text_to_video/__init__.py b/02_ml_inference/04_text_to_video/__init__.py
new file mode 100644
index 0000000..ba611bf
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/__init__.py
@@ -0,0 +1 @@
+"""Text-to-video inference example package."""
diff --git a/02_ml_inference/04_text_to_video/demo.py b/02_ml_inference/04_text_to_video/demo.py
new file mode 100644
index 0000000..c2f4caa
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/demo.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Generate a short GIF video from a text prompt.
+
+Usage:
+    python demo.py "a cinematic drone shot of snowy mountains" [output.gif]
+"""
+
+import base64
+import json
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+API_URL = "http://localhost:8888/gpu/generate"
+
+
+def main() -> None:
+    prompt = (
+        sys.argv[1] if len(sys.argv) > 1 else "a cinematic drone shot of snowy mountains at sunrise"
+    )
+    output_path = Path(sys.argv[2] if len(sys.argv) > 2 else "text_to_video.gif").resolve()
+
+    payload = {
+        "prompt": prompt,
+        "num_frames": 12,
+        "num_steps": 18,
+        "guidance_scale": 7.0,
+        "fps": 8,
+        "width": 512,
+        "height": 288,
+    }
+
+    request = urllib.request.Request(
+        API_URL,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(request, timeout=600) as response:
+            result = json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", errors="replace")
+        print(f"Request failed: HTTP {exc.code}")
+        if body:
+            print(f"Server detail: {body}")
+        print("Make sure the server is running from this folder with: flash run")
+        sys.exit(1)
+    except urllib.error.URLError as exc:
+        print(f"Request failed: {exc}")
+        print("Make sure the server is running from this folder with: flash run")
+        sys.exit(1)
+
+    if result.get("status") != "success":
+        print(f"Worker error: {result}")
+        sys.exit(1)
+
+    output_bytes = base64.b64decode(result["video_base64"])
+    output_path.write_bytes(output_bytes)
+    print(f"Saved generated video GIF to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/02_ml_inference/04_text_to_video/gpu_worker.py b/02_ml_inference/04_text_to_video/gpu_worker.py
new file mode 100644
index 0000000..5daf485
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/gpu_worker.py
@@ -0,0 +1,195 @@
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from runpod_flash import GpuGroup, LiveServerless, remote
+
+gpu_config = LiveServerless(
+    name="02_04_text_to_video_gpu",
+    gpus=[GpuGroup.ADA_24],
+    workersMin=0,
+    workersMax=2,
+    idleTimeout=5,
+)
+
+
+@remote(
+    resource_config=gpu_config,
+    dependencies=[
+        "diffusers",
+        "torch",
+        "transformers",
+        "accelerate",
+        "safetensors",
+        "pillow",
+    ],
+)
+class TextToVideoWorker:
+    def __init__(self):
+        import torch
+        from diffusers import DiffusionPipeline
+
+        self._torch = torch
+        self.model = "damo-vilab/text-to-video-ms-1.7b"
+        self._using_cpu_offload = False
+        self.pipe = DiffusionPipeline.from_pretrained(
+            self.model,
+            torch_dtype=torch.float16,
+        )
+        self.pipe.enable_attention_slicing()
+        if hasattr(self.pipe, "vae"):
+            if hasattr(self.pipe.vae, "enable_slicing"):
+                try:
+                    self.pipe.vae.enable_slicing()
+                except NotImplementedError:
+                    pass
+                except Exception:
+                    pass
+            if hasattr(self.pipe.vae, "enable_tiling"):
+                try:
+                    self.pipe.vae.enable_tiling()
+                except NotImplementedError:
+                    pass
+                except Exception:
+                    pass
+
+        if torch.cuda.is_available():
+            try:
+                # Prefer CPU offload for better reliability on 24GB GPUs.
+                self.pipe.enable_model_cpu_offload()
+                self._using_cpu_offload = True
+            except Exception:
+                # Fallback to full-GPU placement if offload is unavailable.
+                self.pipe = self.pipe.to("cuda")
+        else:
+            self.pipe = self.pipe.to("cpu")
+
+    async def generate(self, input_data: dict) -> dict:
+        import base64
+        import io
+        from datetime import datetime
+
+        prompt = input_data.get("prompt", "").strip()
+        negative_prompt = input_data.get("negative_prompt", "").strip()
+        num_frames = int(input_data.get("num_frames", 12))
+        num_steps = int(input_data.get("num_steps", 18))
+        guidance_scale = float(input_data.get("guidance_scale", 7.0))
+        fps = int(input_data.get("fps", 8))
+        width = int(input_data.get("width", 512))
+        height = int(input_data.get("height", 288))
+        seed = input_data.get("seed")
+
+        if not prompt:
+            return {"status": "error", "error": "prompt is required"}
+        if width % 8 != 0 or height % 8 != 0:
+            return {"status": "error", "error": "width and height must be divisible by 8"}
+
+        generator = None
+        if seed is not None:
+            generator_device = "cpu" if self._using_cpu_offload else "cuda"
+            if not self._torch.cuda.is_available():
+                generator_device = "cpu"
+            generator = self._torch.Generator(device=generator_device).manual_seed(int(seed))
+
+        try:
+            with self._torch.inference_mode():
+                result = self.pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt if negative_prompt else None,
+                    num_frames=num_frames,
+                    num_inference_steps=num_steps,
+                    guidance_scale=guidance_scale,
+                    width=width,
+                    height=height,
+                    generator=generator,
+                    output_type="pil",
+                )
+            frames = result.frames[0]
+        except Exception as exc:
+            return {"status": "error", "error": f"Video generation failed: {exc}"}
+        finally:
+            if self._torch.cuda.is_available():
+                self._torch.cuda.empty_cache()
+
+        if frames is None:
+            return {"status": "error", "error": "Model returned no frames"}
+        frames = list(frames)
+        if len(frames) == 0:
+            return {"status": "error", "error": "Model returned no frames"}
+        if not hasattr(frames[0], "save"):
+            from PIL import Image
+
+            converted_frames = []
+            for frame in frames:
+                arr = frame
+                if hasattr(arr, "dtype") and str(arr.dtype) != "uint8":
+                    arr = (arr * 255).clip(0, 255).astype("uint8")
+                converted_frames.append(Image.fromarray(arr))
+            frames = converted_frames
+
+        # GIF timing is quantized in milliseconds; clamp to 25 FPS max and report actual output FPS.
+        effective_fps = min(max(fps, 1), 25)
+        duration_ms = int(1000 / effective_fps)
+
+        gif_buffer = io.BytesIO()
+        frames[0].save(
+            gif_buffer,
+            format="GIF",
+            save_all=True,
+            append_images=frames[1:],
+            duration=duration_ms,
+            loop=0,
+        )
+        gif_buffer.seek(0)
+
+        preview_buffer = io.BytesIO()
+        frames[0].save(preview_buffer, format="PNG")
+        preview_buffer.seek(0)
+
+        return {
+            "status": "success",
+            "video_base64": base64.b64encode(gif_buffer.read()).decode("utf-8"),
+            "video_mime_type": "image/gif",
+            "preview_image_base64": base64.b64encode(preview_buffer.read()).decode("utf-8"),
+            "preview_image_mime_type": "image/png",
+            "model": self.model,
+            "prompt": prompt,
+            "negative_prompt": negative_prompt or None,
+            "num_frames": len(frames),
+            "fps": effective_fps,
+            "num_steps": num_steps,
+            "guidance_scale": guidance_scale,
+            "width": width,
+            "height": height,
+            "seed": seed,
+            "timestamp": datetime.now().isoformat(),
+        }
+
+
+gpu_router = APIRouter()
+worker: TextToVideoWorker | None = None
+
+
+def get_worker() -> TextToVideoWorker:
+    global worker
+    if worker is None:
+        worker = TextToVideoWorker()
+    return worker
+
+
+class TextToVideoRequest(BaseModel):
+    prompt: str = Field(description="Prompt that describes the video to generate")
+    negative_prompt: str = Field(default="", description="What to avoid in the generated video")
+    num_frames: int = Field(default=12, ge=8, le=24)
+    num_steps: int = Field(default=18, ge=5, le=40)
+    guidance_scale: float = Field(default=7.0, ge=1.0, le=20.0)
+    fps: int = Field(default=8, ge=1, le=30)
+    width: int = Field(default=512, ge=256, le=768)
+    height: int = Field(default=288, ge=256, le=512)
+    seed: int | None = Field(default=None, ge=0)
+
+
+@gpu_router.post("/generate")
+async def generate(request: TextToVideoRequest):
+    result = await get_worker().generate(request.model_dump())
+    if result.get("status") != "success":
+        raise HTTPException(status_code=400, detail=result.get("error", "Video generation failed"))
+    return result
diff --git a/02_ml_inference/04_text_to_video/main.py b/02_ml_inference/04_text_to_video/main.py
new file mode 100644
index 0000000..abe2d89
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/main.py
@@ -0,0 +1,38 @@
+import logging
+import os
+
+from fastapi import FastAPI
+from gpu_worker import gpu_router
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI(
+    title="Text-to-Video API",
+    description="Generate short videos from text prompts on RunPod serverless GPUs",
+    version="1.0.0",
+)
+
+app.include_router(gpu_router, prefix="/gpu", tags=["Text-to-Video"])
+
+
+@app.get("/")
+def home():
+    return {
+        "message": "Text-to-Video API",
+        "docs": "/docs",
+        "endpoints": {"generate": "/gpu/generate"},
+    }
+
+
+@app.get("/ping")
+def ping():
+    return {"status": "healthy"}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    host = os.getenv("FLASH_HOST", "localhost")
+    port = int(os.getenv("FLASH_PORT", 8888))
+    logger.info(f"Starting Flash server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
diff --git a/02_ml_inference/04_text_to_video/mothership.py b/02_ml_inference/04_text_to_video/mothership.py
new file mode 100644
index 0000000..a4de8a8
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/mothership.py
@@ -0,0 +1,7 @@
+"""Mothership endpoint configuration."""
+
+from runpod_flash import CpuLiveLoadBalancer
+
+mothership = CpuLiveLoadBalancer(
+    name="02_04_text_to_video-mothership",
+)
diff --git a/02_ml_inference/04_text_to_video/pyproject.toml b/02_ml_inference/04_text_to_video/pyproject.toml
new file mode 100644
index 0000000..011ac7c
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "flash-text-to-video"
+version = "0.1.0"
+description = "Text-to-video generation with Diffusers on RunPod Flash"
+requires-python = ">=3.10"
+dependencies = [
+    "runpod-flash",
+    "fastapi>=0.104.0",
+    "pillow>=10.0.0",
+]
diff --git a/02_ml_inference/04_text_to_video/requirements.txt b/02_ml_inference/04_text_to_video/requirements.txt
new file mode 100644
index 0000000..a73ed1a
--- /dev/null
+++ b/02_ml_inference/04_text_to_video/requirements.txt
@@ -0,0 +1 @@
+runpod-flash
diff --git a/02_ml_inference/05_image_to_video/.env.example b/02_ml_inference/05_image_to_video/.env.example
new file mode 100644
index 0000000..8360712
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/.env.example
@@ -0,0 +1,4 @@
+# FLASH_HOST=localhost
+# FLASH_PORT=8888
+# LOG_LEVEL=INFO
+# RUNPOD_API_KEY=your_api_key_here
diff --git a/02_ml_inference/05_image_to_video/.flashignore b/02_ml_inference/05_image_to_video/.flashignore
new file mode 100644
index 0000000..ac60074
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/.flashignore
@@ -0,0 +1,43 @@
+# Flash Build Ignore Patterns
+
+# Python cache
+__pycache__/
+*.pyc
+
+# Virtual environments
+venv/
+.venv/
+env/
+
+# IDE
+.vscode/
+.idea/
+
+# Environment files
+.env
+.env.local
+
+# Git
+.git/
+.gitignore
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
+
+# Flash resources
+.runpod/
+
+# Tests
+tests/
+test_*.py
+*_test.py
+
+# Documentation
+docs/
+*.md
+!README.md
+
+# Demo output
+image_to_video.gif
diff --git a/02_ml_inference/05_image_to_video/.gitignore b/02_ml_inference/05_image_to_video/.gitignore
new file mode 100644
index 0000000..b551249
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/.gitignore
@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Environment
+.env
+.env.local
+
+# Flash
+.flash_resources.pkl
+.tetra_resources.pkl
+.runpod/
+
+# IDE
+.vscode/
+.idea/
+
+# Demo output
+image_to_video.gif
diff --git a/02_ml_inference/05_image_to_video/README.md b/02_ml_inference/05_image_to_video/README.md
new file mode 100644
index 0000000..acf55de
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/README.md
@@ -0,0 +1,76 @@
+# Image-to-Video with Stable Video Diffusion
+
+Serverless image-to-video API built with Runpod Flash and Stable Video Diffusion.
+
+## What this example does
+
+- Accepts an input image as base64
+- Animates the image into a short clip with `StableVideoDiffusionPipeline`
+- Returns the generated video as base64-encoded GIF
+
+## Quick Start
+
+```bash
+cd 02_ml_inference/05_image_to_video
+pip install -r requirements.txt
+cp .env.example .env
+# Add RUNPOD_API_KEY in .env
+flash run
+```
+
+Open docs at `http://localhost:8888/docs`.
+
+## Endpoint
+
+### POST `/gpu/animate`
+
+Request body:
+
+```json
+{
+  "image_base64": "<base64-encoded-image-or-omit-for-default-poddy.jpg>",
+  "motion_bucket_id": 127,
+  "noise_aug_strength": 0.02,
+  "num_frames": 12,
+  "num_steps": 18,
+  "fps": 7,
+  "seed": 42
+}
+```
+
+Response:
+
+```json
+{
+  "status": "success",
+  "video_base64": "<base64-encoded-gif>",
+  "video_mime_type": "image/gif",
+  "preview_image_base64": "<base64-encoded-png>",
+  "preview_image_mime_type": "image/png",
+  "model": "stabilityai/stable-video-diffusion-img2vid-xt",
+  "input_width": 1920,
+  "input_height": 1080,
+  "render_width": 1024,
+  "render_height": 576,
+  "num_frames": 16,
+  "timestamp": "2026-02-15T12:34:56.789123"
+}
+```
+
+## Local Demo Script
+
+```bash
+python demo.py
+# or explicitly:
+python demo.py input.png output.gif
+```
+
+## Notes
+
+- First request can take longer because the worker and model need to warm up.
+- Input images are resized to `1024x576` before animation for predictable memory usage.
+- This example returns GIF output for portability and simple local testing.
+- GIF encoding is capped at 25 FPS; higher requested values are clamped and response `fps` reflects the encoded output.
+- If `image_base64` is omitted, the endpoint uses `poddy.jpg` as the default input image.
+- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset.
+- The default parameters are tuned for reliability on 24GB GPUs; increase frames/steps gradually if you want higher quality.
diff --git a/02_ml_inference/05_image_to_video/__init__.py b/02_ml_inference/05_image_to_video/__init__.py
new file mode 100644
index 0000000..640a62b
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/__init__.py
@@ -0,0 +1 @@
+"""Image-to-video inference example package."""
diff --git a/02_ml_inference/05_image_to_video/demo.py b/02_ml_inference/05_image_to_video/demo.py
new file mode 100644
index 0000000..dca1582
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/demo.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""
+Animate an input image into a short GIF video.
+
+Usage:
+    python demo.py [input.png] [output.gif]
+"""
+
+import base64
+import json
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+API_URL = "http://localhost:8888/gpu/animate"
+DEFAULT_IMAGE = Path(__file__).resolve().parent / "poddy.jpg"
+DEFAULT_OUTPUT = "image_to_video.gif"
+
+
+def main() -> None:
+    input_path = Path(sys.argv[1]).expanduser().resolve() if len(sys.argv) > 1 else DEFAULT_IMAGE
+    output_path = Path(sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUTPUT).resolve()
+
+    if not input_path.exists():
+        print(f"Input image not found: {input_path}")
+        sys.exit(1)
+
+    image_base64 = base64.b64encode(input_path.read_bytes()).decode("utf-8")
+    payload = {
+        "image_base64": image_base64,
+        "motion_bucket_id": 127,
+        "noise_aug_strength": 0.02,
+        "num_frames": 12,
+        "num_steps": 18,
+        "fps": 7,
+    }
+
+    request = urllib.request.Request(
+        API_URL,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(request, timeout=600) as response:
+            result = json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8", errors="replace")
+        print(f"Request failed: HTTP {exc.code}")
+        if body:
+            print(f"Server detail: {body}")
+        print("Make sure the server is running from this folder with: flash run")
+        sys.exit(1)
+    except urllib.error.URLError as exc:
+        print(f"Request failed: {exc}")
+        print("Make sure the server is running from this folder with: flash run")
+        sys.exit(1)
+
+    if result.get("status") != "success":
+        print(f"Worker error: {result}")
+        sys.exit(1)
+
+    output_bytes = base64.b64decode(result["video_base64"])
+    output_path.write_bytes(output_bytes)
+    print(f"Saved animated video GIF to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/02_ml_inference/05_image_to_video/gpu_worker.py b/02_ml_inference/05_image_to_video/gpu_worker.py
new file mode 100644
index 0000000..06f9b45
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/gpu_worker.py
@@ -0,0 +1,215 @@
+import base64
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from runpod_flash import GpuGroup, LiveServerless, remote
+
+DEFAULT_IMAGE_PATH = Path(__file__).resolve().parent / "poddy.jpg"
+
+
+def load_default_image_base64() -> str:
+    return base64.b64encode(DEFAULT_IMAGE_PATH.read_bytes()).decode("utf-8")
+
+
+gpu_config = LiveServerless(
+    name="02_05_image_to_video_gpu",
+    gpus=[GpuGroup.ADA_24],
+    workersMin=0,
+    workersMax=2,
+    idleTimeout=5,
+)
+
+
+@remote(
+    resource_config=gpu_config,
+    dependencies=[
+        "diffusers",
+        "torch",
+        "transformers",
+        "accelerate",
+        "safetensors",
+        "pillow",
+    ],
+)
+class ImageToVideoWorker:
+    def __init__(self):
+        import torch
+        from diffusers import StableVideoDiffusionPipeline
+
+        self._torch = torch
+        self.model = "stabilityai/stable-video-diffusion-img2vid-xt"
+        self._using_cpu_offload = False
+        self.pipe = StableVideoDiffusionPipeline.from_pretrained(
+            self.model,
+            torch_dtype=torch.float16,
+            variant="fp16",
+        )
+        self.pipe.enable_attention_slicing()
+        if hasattr(self.pipe, "vae"):
+            if hasattr(self.pipe.vae, "enable_slicing"):
+                try:
+                    self.pipe.vae.enable_slicing()
+                except NotImplementedError:
+                    pass
+                except Exception:
+                    pass
+            if hasattr(self.pipe.vae, "enable_tiling"):
+                try:
+                    self.pipe.vae.enable_tiling()
+                except NotImplementedError:
+                    pass
+                except Exception:
+                    pass
+
+        if torch.cuda.is_available():
+            try:
+                self.pipe.enable_model_cpu_offload()
+                self._using_cpu_offload = True
+            except Exception:
+                self.pipe = self.pipe.to("cuda")
+        else:
+            self.pipe = self.pipe.to("cpu")
+
+    async def animate(self, input_data: dict) -> dict:
+        import base64
+        import io
+        from datetime import datetime
+
+        from PIL import Image
+
+        image_base64 = input_data.get("image_base64", "")
+        motion_bucket_id = int(input_data.get("motion_bucket_id", 127))
+        noise_aug_strength = float(input_data.get("noise_aug_strength", 0.02))
+        num_frames = int(input_data.get("num_frames", 12))
+        num_steps = int(input_data.get("num_steps", 18))
+        fps = int(input_data.get("fps", 7))
+        seed = input_data.get("seed")
+
+        if not image_base64:
+            return {"status": "error", "error": "image_base64 is required"}
+
+        try:
+            image_bytes = base64.b64decode(image_base64)
+            input_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        except Exception as exc:
+            return {"status": "error", "error": f"Invalid input image: {exc}"}
+
+        resized_image = input_image.resize((1024, 576))
+
+        generator = None
+        if seed is not None:
+            generator_device = "cpu" if self._using_cpu_offload else "cuda"
+            if not self._torch.cuda.is_available():
+                generator_device = "cpu"
+            generator = self._torch.Generator(device=generator_device).manual_seed(int(seed))
+
+        try:
+            with self._torch.inference_mode():
+                result = self.pipe(
+                    image=resized_image,
+                    decode_chunk_size=4,
+                    motion_bucket_id=motion_bucket_id,
+                    noise_aug_strength=noise_aug_strength,
+                    num_frames=num_frames,
+                    num_inference_steps=num_steps,
+                    generator=generator,
+                    output_type="pil",
+                )
+            frames = result.frames[0]
+        except Exception as exc:
+            return {"status": "error", "error": f"Animation failed: {exc}"}
+        finally:
+            if self._torch.cuda.is_available():
+                self._torch.cuda.empty_cache()
+
+        if frames is None:
+            return {"status": "error", "error": "Model returned no frames"}
+        frames = list(frames)
+        if len(frames) == 0:
+            return {"status": "error", "error": "Model returned no frames"}
+        if not hasattr(frames[0], "save"):
+            converted_frames = []
+            for frame in frames:
+                arr = frame
+                if hasattr(arr, "dtype") and str(arr.dtype) != "uint8":
+                    arr = (arr * 255).clip(0, 255).astype("uint8")
+                converted_frames.append(Image.fromarray(arr))
+            frames = converted_frames
+
+        # GIF timing is quantized in milliseconds; clamp to 25 FPS max and report actual output FPS.
+        effective_fps = min(max(fps, 1), 25)
+        duration_ms = int(1000 / effective_fps)
+
+        gif_buffer = io.BytesIO()
+        frames[0].save(
+            gif_buffer,
+            format="GIF",
+            save_all=True,
+            append_images=frames[1:],
+            duration=duration_ms,
+            loop=0,
+        )
+        gif_buffer.seek(0)
+
+        preview_buffer = io.BytesIO()
+        frames[0].save(preview_buffer, format="PNG")
+        preview_buffer.seek(0)
+
+        return {
+            "status": "success",
+            "video_base64": base64.b64encode(gif_buffer.read()).decode("utf-8"),
+            "video_mime_type": "image/gif",
+            "preview_image_base64": base64.b64encode(preview_buffer.read()).decode("utf-8"),
+            "preview_image_mime_type": "image/png",
+            "model": self.model,
+            "input_width": input_image.width,
+            "input_height": input_image.height,
+            "render_width": 1024,
+            "render_height": 576,
+            "num_frames": len(frames),
+            "num_steps": num_steps,
+            "motion_bucket_id": motion_bucket_id,
+            "noise_aug_strength": noise_aug_strength,
+            "fps": effective_fps,
+            "seed": seed,
+            "timestamp": datetime.now().isoformat(),
+        }
+
+
+gpu_router = APIRouter()
+worker: ImageToVideoWorker | None = None
+
+
+def get_worker() -> ImageToVideoWorker:
+    global worker
+    if worker is None:
+        worker = ImageToVideoWorker()
+    return worker
+
+
+class ImageToVideoRequest(BaseModel):
+    image_base64: str = Field(
+        default="",
+        description="Input image encoded as base64. If omitted, defaults to poddy.jpg.",
+    )
+    motion_bucket_id: int = Field(default=127, ge=1, le=255)
+    noise_aug_strength: float = Field(default=0.02, ge=0.0, le=1.0)
+    num_frames: int = Field(default=12, ge=8, le=24)
+    num_steps: int = Field(default=18, ge=5, le=40)
+    fps: int = Field(default=7, ge=1, le=30)
+    seed: int | None = Field(default=None, ge=0)
+
+
+@gpu_router.post("/animate")
+async def animate(request: ImageToVideoRequest):
+    payload = request.model_dump()
+    if not payload.get("image_base64"):
+        try:
+            payload["image_base64"] = load_default_image_base64()
+        except FileNotFoundError as exc:
+            raise HTTPException(status_code=500, detail=f"Default image not found: {exc}") from exc
+    result = await get_worker().animate(payload)
+    if result.get("status") != "success":
+        raise HTTPException(status_code=400, detail=result.get("error", "Image animation failed"))
+    return result
diff --git a/02_ml_inference/05_image_to_video/main.py b/02_ml_inference/05_image_to_video/main.py
new file mode 100644
index 0000000..ba791ee
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/main.py
@@ -0,0 +1,38 @@
+import logging
+import os
+
+from fastapi import FastAPI
+from gpu_worker import gpu_router
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI(
+    title="Image-to-Video API",
+    description="Animate still images on RunPod serverless GPUs",
+    version="1.0.0",
+)
+
+app.include_router(gpu_router, prefix="/gpu", tags=["Image-to-Video"])
+
+
+@app.get("/")
+def home():
+    return {
+        "message": "Image-to-Video API",
+        "docs": "/docs",
+        "endpoints": {"animate": "/gpu/animate"},
+    }
+
+
+@app.get("/ping")
+def ping():
+    return {"status": "healthy"}
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    host = os.getenv("FLASH_HOST", "localhost")
+    port = int(os.getenv("FLASH_PORT", 8888))
+    logger.info(f"Starting Flash server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
diff --git a/02_ml_inference/05_image_to_video/mothership.py b/02_ml_inference/05_image_to_video/mothership.py
new file mode 100644
index 0000000..7a726d3
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/mothership.py
@@ -0,0 +1,7 @@
+"""Mothership endpoint configuration."""
+
+from runpod_flash import CpuLiveLoadBalancer
+
+mothership = CpuLiveLoadBalancer(
+    name="02_05_image_to_video-mothership",
+)
diff --git a/02_ml_inference/05_image_to_video/poddy.jpg b/02_ml_inference/05_image_to_video/poddy.jpg
new file mode 100644
index 0000000..7493710
Binary files /dev/null and b/02_ml_inference/05_image_to_video/poddy.jpg differ
diff --git a/02_ml_inference/05_image_to_video/pyproject.toml b/02_ml_inference/05_image_to_video/pyproject.toml
new file mode 100644
index 0000000..c87060f
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "flash-image-to-video"
+version = "0.1.0"
+description = "Image-to-video generation with Diffusers on RunPod Flash"
+requires-python = ">=3.10"
+dependencies = [
+    "runpod-flash",
+    "fastapi>=0.104.0",
+    "pillow>=10.0.0",
+]
diff --git a/02_ml_inference/05_image_to_video/requirements.txt b/02_ml_inference/05_image_to_video/requirements.txt
new file mode 100644
index 0000000..a73ed1a
--- /dev/null
+++ b/02_ml_inference/05_image_to_video/requirements.txt
@@ -0,0 +1 @@
+runpod-flash
diff --git a/02_ml_inference/README.md b/02_ml_inference/README.md
index cd41fed..598caa6 100644
--- a/02_ml_inference/README.md
+++ b/02_ml_inference/README.md
@@ -18,21 +18,55 @@ LLM inference API with streaming support.
 - Mistral, Mixtral
 - Qwen, Phi, Gemma
 
-### 02_image_generation _(coming soon)_
-Stable Diffusion image generation API.
+### 02_text_to_image
+Text-to-image generation API.
 
 **What you'll learn:**
-- Loading Stable Diffusion models
-- Optimizing inference with diffusers
-- Handling image uploads and downloads
-- Model caching strategies
+- Building text-to-image endpoints with `@remote` GPU workers
+- Running Diffusers pipelines on serverless GPUs
+- Returning generated images as base64 payloads
+- Tuning quality/speed tradeoffs with inference steps
 
 **Models covered:**
-- Stable Diffusion 1.5, 2.1, XL
-- SDXL Turbo
-- ControlNet integration
+- FLUX.1-schnell
 
-### 03_embeddings _(coming soon)_
+### 03_image_to_image
+Prompt-guided image transformation API with Stable Diffusion img2img.
+
+**What you'll learn:**
+- Building image-to-image endpoints with `@remote` GPU workers
+- Sending base64-encoded images through FastAPI
+- Controlling style transfer intensity with `strength` and `guidance_scale`
+- Returning transformed images from serverless workers
+
+**Models covered:**
+- Stable Diffusion v1.5 img2img pipeline
+
+### 04_text_to_video
+Prompt-guided text-to-video generation API.
+
+**What you'll learn:**
+- Building text-to-video endpoints with `@remote` GPU workers
+- Returning generated clips as portable GIF output
+- Tuning temporal quality with frames, inference steps, and guidance
+- Managing higher-memory multimodal inference workloads
+
+**Models covered:**
+- damo-vilab/text-to-video-ms-1.7b
+
+### 05_image_to_video
+Image animation API with Stable Video Diffusion.
+
+**What you'll learn:**
+- Turning still images into short animated clips on serverless GPUs
+- Sending and validating base64-encoded image inputs
+- Controlling animation dynamics with motion and noise settings
+- Returning generated clips with preview frames
+
+**Models covered:**
+- stabilityai/stable-video-diffusion-img2vid-xt
+
+### 06_embeddings _(coming soon)_
 Text embedding API for semantic search and RAG.
 
 **What you'll learn:**
@@ -46,7 +80,7 @@ Text embedding API for semantic search and RAG.
 - OpenAI-compatible embeddings
 - Multilingual models
 
-### 04_multimodal _(coming soon)_
+### 07_multimodal _(coming soon)_
 Vision-language models (CLIP, LLaVA, etc.).
 
 **What you'll learn:**
diff --git a/02_ml_inference/poddy.jpg b/02_ml_inference/poddy.jpg
new file mode 100644
index 0000000..7493710
Binary files /dev/null and b/02_ml_inference/poddy.jpg differ
diff --git a/README.md b/README.md
index 04b053c..9ef9564 100644
--- a/README.md
+++ b/README.md
@@ -136,9 +136,12 @@ Learn the fundamentals of Flash applications.
 Deploy machine learning models as APIs.
 
 - 01_text_generation - LLM inference (Llama, Mistral, etc.) _(coming soon)_
-- 02_image_generation - Stable Diffusion image generation _(coming soon)_
-- 03_embeddings - Text embeddings API _(coming soon)_
-- 04_multimodal - Vision-language models _(coming soon)_
+- **[02_text_to_image](./02_ml_inference/02_text_to_image/)** - Serverless text-to-image generation with FLUX.1-schnell
+- **[03_image_to_image](./02_ml_inference/03_image_to_image/)** - Serverless image-to-image transformations with Stable Diffusion
+- **[04_text_to_video](./02_ml_inference/04_text_to_video/)** - Serverless text-to-video generation with Diffusers
+- **[05_image_to_video](./02_ml_inference/05_image_to_video/)** - Serverless image-to-video animation with Stable Video Diffusion
+- 06_embeddings - Text embeddings API _(coming soon)_
+- 07_multimodal - Vision-language models _(coming soon)_
 
 ### 03 - Advanced Workers
 Production-ready worker patterns.
@@ -415,4 +418,3 @@ All examples are continuously tested against Python 3.10-3.14 to ensure compatib
 ## License
 
 MIT License - see [LICENSE](./LICENSE) for details.
-
diff --git a/uv.lock b/uv.lock
index 5e44705..efc2fd5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2992,12 +2992,14 @@ name = "runpod-flash-examples"
 version = "1.0.0"
 source = { editable = "." }
 dependencies = [
+    { name = "fastapi" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "pillow" },
     { name = "python-multipart" },
     { name = "runpod-flash" },
     { name = "structlog" },
+    { name = "uvicorn" },
 ]
 
 [package.dev-dependencies]
@@ -3014,11 +3016,13 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "fastapi", specifier = ">=0.104.0" },
     { name = "numpy", specifier = ">=2.0.2" },
     { name = "pillow", specifier = ">=10.0.0" },
     { name = "python-multipart", specifier = ">=0.0.6" },
     { name = "runpod-flash" },
     { name = "structlog", specifier = ">=23.0.0" },
+    { name = "uvicorn", specifier = ">=0.24.0" },
 ]
 
 [package.metadata.requires-dev]