diff --git a/02_ml_inference/02_text_to_image/.env.example b/02_ml_inference/02_text_to_image/.env.example new file mode 100644 index 0000000..91af5f2 --- /dev/null +++ b/02_ml_inference/02_text_to_image/.env.example @@ -0,0 +1,4 @@ +# RUNPOD_API_KEY=your_api_key_here +# FLASH_HOST=localhost +# FLASH_PORT=8888 +# LOG_LEVEL=INFO diff --git a/02_ml_inference/02_text_to_image/.flashignore b/02_ml_inference/02_text_to_image/.flashignore new file mode 100644 index 0000000..10ffb6d --- /dev/null +++ b/02_ml_inference/02_text_to_image/.flashignore @@ -0,0 +1,43 @@ +# Flash Build Ignore Patterns + +# Python cache +__pycache__/ +*.pyc + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.vscode/ +.idea/ + +# Environment files +.env +.env.local + +# Git +.git/ +.gitignore + +# Build artifacts +dist/ +build/ +*.egg-info/ + +# Flash resources +.flash_resources.pkl + +# Tests +tests/ +test_*.py +*_test.py + +# Documentation +docs/ +*.md +!README.md + +# Demo output +generated.png diff --git a/02_ml_inference/02_text_to_image/.gitignore b/02_ml_inference/02_text_to_image/.gitignore new file mode 100644 index 0000000..4ea30c5 --- /dev/null +++ b/02_ml_inference/02_text_to_image/.gitignore @@ -0,0 +1,27 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ + +# Virtual environments +.venv/ +venv/ +env/ + +# Environment +.env +.env.local + +# Flash +.flash_resources.pkl +.tetra_resources.pkl + +# IDE +.vscode/ +.idea/ + +# Demo output +generated.png diff --git a/02_ml_inference/02_text_to_image/__init__.py b/02_ml_inference/02_text_to_image/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/02_ml_inference/02_text_to_image/demo.py b/02_ml_inference/02_text_to_image/demo.py new file mode 100755 index 0000000..ede93a6 --- /dev/null +++ b/02_ml_inference/02_text_to_image/demo.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Flash Demo — Generate an image with Flux and display it in your terminal. + +Usage: + 1. Start the server: cd 02_ml_inference/02_text_to_image && flash run + 2. Run this script: python demo.py + 3. Or with a prompt: python demo.py "a cat astronaut on mars" +""" + +import base64 +import io +import json +import os +import shutil +import subprocess +import sys +import time +import urllib.error +import urllib.request + +API_URL = "http://localhost:8888/gpu/generate" +DEFAULT_PROMPT = "a tiny astronaut floating above earth, watercolor style" +OUTPUT_FILE = "generated.png" + +# ── Terminal image rendering ───────────────────────────────────────── + + +def render_in_terminal(image_bytes: bytes, max_width: int | None = None): + """Render an image in the terminal using ANSI true-color half-blocks. + + Works in any terminal that supports 24-bit color (iTerm2, Kitty, + WezTerm, Windows Terminal, most modern terminals). + """ + from PIL import Image + + img = Image.open(io.BytesIO(image_bytes)).convert("RGB") + + # Fit to terminal width + term_width = max_width or min(shutil.get_terminal_size().columns, 80) + aspect = img.height / img.width + w = term_width + h = int(w * aspect) + if h % 2 != 0: + h += 1 + + img = img.resize((w, h), Image.LANCZOS) + px = img.load() + + lines = [] + for y in range(0, h, 2): + row = [] + for x in range(w): + r1, g1, b1 = px[x, y] + r2, g2, b2 = px[x, y + 1] if y + 1 < h else (0, 0, 0) + row.append(f"\033[38;2;{r1};{g1};{b1}m\033[48;2;{r2};{g2};{b2}m▀") + lines.append("".join(row) + "\033[0m") + + print("\n".join(lines)) + + +def try_imgcat(image_bytes: bytes) -> bool: + """Try to display via imgcat (iTerm2) or chafa.""" + for cmd in ("imgcat", "chafa", "viu"): + if shutil.which(cmd): + try: + proc = subprocess.run( + [cmd, "-"], + input=image_bytes, + timeout=5, + ) + return proc.returncode == 0 + except Exception: + continue + return False + + +def display_image(image_bytes: bytes): + """Display an image in the terminal with the best available method.""" + # Try native image tools first (high-res) + if try_imgcat(image_bytes): + return + + # Fall back to ANSI half-block rendering (works everywhere) + render_in_terminal(image_bytes) + + +# ── Main ───────────────────────────────────────────────────────────── + + +def main(): + prompt = " ".join(sys.argv[1:]) if len(sys.argv) > 1 else DEFAULT_PROMPT + + print() + print(" ⚡ Flash Demo — Flux Text-to-Image") + print(" ─────────────────────────────────────") + print(f' Prompt: "{prompt}"') + print(f" Server: {API_URL}") + print() + + # Build request + hf_token = os.environ.get("HF_TOKEN", "") + payload = json.dumps({"prompt": prompt, "hf_token": hf_token}).encode() + req = urllib.request.Request( + API_URL, + data=payload, + headers={"Content-Type": "application/json"}, + ) + + # Send request with timing + print(" Sending to RunPod GPU worker...", end="", flush=True) + t0 = time.time() + + try: + resp = urllib.request.urlopen(req, timeout=300) + except urllib.error.URLError as e: + print(f"\n\n Error: Could not connect to {API_URL}") + print(" Make sure the Flash server is running: flash run") + print(f" ({e})") + sys.exit(1) + + result = json.loads(resp.read()) + elapsed = time.time() - t0 + + if result.get("status") != "success": + print(f"\n\n Error from worker: {result}") + sys.exit(1) + + # Decode image + image_bytes = base64.b64decode(result["image_base64"]) + size_kb = len(image_bytes) / 1024 + + print(f" done! ({elapsed:.1f}s)") + print(f" Image: {result.get('width')}x{result.get('height')}px, {size_kb:.0f}KB") + print() + + # Save to disk + with open(OUTPUT_FILE, "wb") as f: + f.write(image_bytes) + print(f" Saved to {OUTPUT_FILE}") + print() + + # Display in terminal + display_image(image_bytes) + print() + + +if __name__ == "__main__": + main() diff --git a/02_ml_inference/02_text_to_image/gpu_worker.py b/02_ml_inference/02_text_to_image/gpu_worker.py new file mode 100644 index 0000000..4cd283f --- /dev/null +++ b/02_ml_inference/02_text_to_image/gpu_worker.py @@ -0,0 +1,137 @@ +"""Flux Text-to-Image — GPU Worker + +One warm worker. Cached FLUX pipeline. +""" + +import os + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field +from runpod_flash import GpuGroup, LiveServerless, remote + +# ── GPU Configuration ──────────────────────────────────────────────── +# FLUX.1-schnell is a fast distilled model (~12GB VRAM). +# ADA_24 gives us an RTX 4090-class GPU with 24GB — plenty of room. +gpu_config = LiveServerless( + name="02_02_flux_schnell", + gpus=[GpuGroup.ADA_24], + workersMin=1, + workersMax=3, + idleTimeout=5, +) + + +@remote( + resource_config=gpu_config, + dependencies=[ + "diffusers", + "torch", + "transformers", + "accelerate", + "sentencepiece", + "protobuf", + ], +) +class FluxWorker: + """Warm FLUX worker that caches the pipeline between requests.""" + + def __init__(self): + import torch + + self._torch = torch + self._model_name = "black-forest-labs/FLUX.1-schnell" + self._pipe = None + + def _ensure_pipeline(self, hf_token: str): + from diffusers import FluxPipeline + from huggingface_hub import login + + if self._pipe is not None: + return + + if hf_token: + login(token=hf_token) + + self._pipe = FluxPipeline.from_pretrained( + self._model_name, + torch_dtype=self._torch.bfloat16, + ) + self._pipe.enable_model_cpu_offload() + + async def generate(self, input_data: dict) -> dict: + import base64 + import io + + hf_token = input_data.get("hf_token", "") + prompt = input_data.get("prompt", "a lightning flash above a datacenter") + width = int(input_data.get("width", 512)) + height = int(input_data.get("height", 512)) + num_steps = int(input_data.get("num_steps", 4)) + + try: + self._ensure_pipeline(hf_token=hf_token) + image = self._pipe( + prompt, + num_inference_steps=num_steps, + width=width, + height=height, + guidance_scale=0.0, + ).images[0] + except Exception as exc: + return {"status": "error", "error": f"Image generation failed: {exc}"} + + buf = io.BytesIO() + image.save(buf, format="PNG") + buf.seek(0) + + return { + "status": "success", + "image_base64": base64.b64encode(buf.read()).decode(), + "prompt": prompt, + "width": width, + "height": height, + } + + +# ── FastAPI Router ─────────────────────────────────────────────────── +gpu_router = APIRouter() +worker: FluxWorker | None = None + + +def get_worker() -> FluxWorker: + global worker + if worker is None: + worker = FluxWorker() + return worker + + +class ImageRequest(BaseModel): + prompt: str = Field( + default="a tiny astronaut floating in space, watercolor style", + description="Text prompt describing the image to generate", + ) + width: int = Field(default=512, description="Image width in pixels") + height: int = Field(default=512, description="Image height in pixels") + num_steps: int = Field(default=4, description="Number of diffusion steps (1-8)") + hf_token: str = Field( + default="", + description="Optional Hugging Face token. Uses HF_TOKEN env var when omitted.", + ) + + +@gpu_router.post("/generate") +async def generate(request: ImageRequest): + """Generate an image from a text prompt using FLUX.1-schnell.""" + hf_token = request.hf_token.strip() or os.environ.get("HF_TOKEN", "") + result = await get_worker().generate( + { + "prompt": request.prompt, + "width": request.width, + "height": request.height, + "num_steps": request.num_steps, + "hf_token": hf_token, + } + ) + if result.get("status") != "success": + raise HTTPException(status_code=400, detail=result.get("error", "Image generation failed")) + return result diff --git a/02_ml_inference/02_text_to_image/main.py b/02_ml_inference/02_text_to_image/main.py new file mode 100644 index 0000000..2cb1f50 --- /dev/null +++ b/02_ml_inference/02_text_to_image/main.py @@ -0,0 +1,38 @@ +import logging +import os + +from fastapi import FastAPI +from gpu_worker import gpu_router + +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Flux Text-to-Image", + description="Generate images from text prompts with FLUX.1-schnell on RunPod serverless GPUs", + version="1.0.0", +) + +app.include_router(gpu_router, prefix="/gpu", tags=["Text-to-Image"]) + + +@app.get("/") +def home(): + return { + "message": "Flux Text-to-Image API", + "docs": "/docs", + "endpoints": {"generate": "/gpu/generate"}, + } + + +@app.get("/ping") +def ping(): + return {"status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + host = os.getenv("FLASH_HOST", "localhost") + port = int(os.getenv("FLASH_PORT", 8888)) + logger.info(f"Starting Flash server on {host}:{port}") + uvicorn.run(app, host=host, port=port) diff --git a/02_ml_inference/02_text_to_image/mothership.py b/02_ml_inference/02_text_to_image/mothership.py new file mode 100644 index 0000000..7cb8059 --- /dev/null +++ b/02_ml_inference/02_text_to_image/mothership.py @@ -0,0 +1,7 @@ +"""Mothership Endpoint Configuration""" + +from runpod_flash import CpuLiveLoadBalancer + +mothership = CpuLiveLoadBalancer( + name="02_02_text_to_image-mothership", +) diff --git a/02_ml_inference/02_text_to_image/pyproject.toml b/02_ml_inference/02_text_to_image/pyproject.toml new file mode 100644 index 0000000..beb9159 --- /dev/null +++ b/02_ml_inference/02_text_to_image/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "flash-flux-text-to-image" +version = "0.1.0" +description = "Generate images with FLUX.1-schnell via RunPod Flash" +requires-python = ">=3.10" +dependencies = [ + "runpod-flash", + "fastapi>=0.104.0", + "pillow>=10.0.0", +] diff --git a/02_ml_inference/02_text_to_image/requirements.txt b/02_ml_inference/02_text_to_image/requirements.txt new file mode 100644 index 0000000..a73ed1a --- /dev/null +++ b/02_ml_inference/02_text_to_image/requirements.txt @@ -0,0 +1 @@ +runpod-flash diff --git a/02_ml_inference/03_image_to_image/.env.example b/02_ml_inference/03_image_to_image/.env.example new file mode 100644 index 0000000..8360712 --- /dev/null +++ b/02_ml_inference/03_image_to_image/.env.example @@ -0,0 +1,4 @@ +# FLASH_HOST=localhost +# FLASH_PORT=8888 +# LOG_LEVEL=INFO +# RUNPOD_API_KEY=your_api_key_here diff --git a/02_ml_inference/03_image_to_image/.flashignore b/02_ml_inference/03_image_to_image/.flashignore new file mode 100644 index 0000000..6c8e627 --- /dev/null +++ b/02_ml_inference/03_image_to_image/.flashignore @@ -0,0 +1,43 @@ +# Flash Build Ignore Patterns + +# Python cache +__pycache__/ +*.pyc + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.vscode/ +.idea/ + +# Environment files +.env +.env.local + +# Git +.git/ +.gitignore + +# Build artifacts +dist/ +build/ +*.egg-info/ + +# Flash resources +.runpod/ + +# Tests +tests/ +test_*.py +*_test.py + +# Documentation +docs/ +*.md +!README.md + +# Demo output +transformed.png diff --git a/02_ml_inference/03_image_to_image/.gitignore b/02_ml_inference/03_image_to_image/.gitignore new file mode 100644 index 0000000..cf5cbb3 --- /dev/null +++ b/02_ml_inference/03_image_to_image/.gitignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ + +# Virtual environments +.venv/ +venv/ +env/ + +# Environment +.env +.env.local + +# Flash +.flash_resources.pkl +.tetra_resources.pkl +.runpod/ + +# IDE +.vscode/ +.idea/ + +# Demo output +transformed.png diff --git a/02_ml_inference/03_image_to_image/README.md b/02_ml_inference/03_image_to_image/README.md new file mode 100644 index 0000000..2b6a52e --- /dev/null +++ b/02_ml_inference/03_image_to_image/README.md @@ -0,0 +1,71 @@ +# Image-to-Image with Stable Diffusion + +Serverless image-to-image API built with Runpod Flash and Stable Diffusion v1.5. + +## What this example does + +- Accepts an input image as base64 +- Applies prompt-guided transformation with `StableDiffusionImg2ImgPipeline` +- Returns a transformed image as base64 PNG + +## Quick Start + +```bash +cd 02_ml_inference/03_image_to_image +pip install -r requirements.txt +cp .env.example .env +# Add RUNPOD_API_KEY in .env +flash run +``` + +Open docs at `http://localhost:8888/docs`. + +## Endpoint + +### POST `/gpu/transform` + +Request body: + +```json +{ + "image_base64": "", + "prompt": "turn this portrait into a cinematic oil painting", + "negative_prompt": "blurry, low quality", + "strength": 0.65, + "guidance_scale": 7.5, + "num_steps": 25, + "seed": 42 +} +``` + +Response: + +```json +{ + "status": "success", + "image_base64": "", + "model": "runwayml/stable-diffusion-v1-5", + "prompt": "...", + "negative_prompt": "...", + "strength": 0.65, + "guidance_scale": 7.5, + "num_steps": 25, + "seed": 42, + "timestamp": "2026-02-15T12:34:56.789123" +} +``` + +## Local Demo Script + +Run the demo client against your local endpoint: + +```bash +python demo.py "turn this into a watercolor painting" output.png +``` + +## Notes + +- First request can take longer because the worker and model need to warm up. +- Input images are resized to `512x512` before inference for stable memory usage. +- If `image_base64` is omitted, the endpoint uses `poddy.jpg` as the default input image. +- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset. diff --git a/02_ml_inference/03_image_to_image/__init__.py b/02_ml_inference/03_image_to_image/__init__.py new file mode 100644 index 0000000..5d8d1d1 --- /dev/null +++ b/02_ml_inference/03_image_to_image/__init__.py @@ -0,0 +1 @@ +"""Image-to-image inference example package.""" diff --git a/02_ml_inference/03_image_to_image/demo.py b/02_ml_inference/03_image_to_image/demo.py new file mode 100644 index 0000000..cc1ef59 --- /dev/null +++ b/02_ml_inference/03_image_to_image/demo.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Send an input image to the local Flash img2img endpoint and save the output. + +Usage: + python demo.py "turn this into a watercolor painting" [output.png] + python demo.py input.png "turn this into a watercolor painting" [output.png] +""" + +import base64 +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path + +API_URL = "http://localhost:8888/gpu/transform" +DEFAULT_IMAGE = Path(__file__).resolve().parent / "poddy.jpg" +DEFAULT_PROMPT = "turn this into a cinematic watercolor painting" +DEFAULT_OUTPUT = "transformed.png" + + +def main() -> None: + args = sys.argv[1:] + + if not args: + input_path = DEFAULT_IMAGE + prompt = DEFAULT_PROMPT + output_path = Path(DEFAULT_OUTPUT).resolve() + else: + first_arg_path = Path(args[0]).expanduser() + if first_arg_path.exists(): + input_path = first_arg_path.resolve() + prompt = args[1] if len(args) > 1 else DEFAULT_PROMPT + output_path = Path(args[2] if len(args) > 2 else DEFAULT_OUTPUT).resolve() + else: + input_path = DEFAULT_IMAGE + prompt = args[0] + output_path = Path(args[1] if len(args) > 1 else DEFAULT_OUTPUT).resolve() + + if not input_path.exists(): + print(f"Input image not found: {input_path}") + sys.exit(1) + + image_base64 = base64.b64encode(input_path.read_bytes()).decode("utf-8") + payload = { + "image_base64": image_base64, + "prompt": prompt, + "strength": 0.65, + "guidance_scale": 7.5, + "num_steps": 25, + } + + request = urllib.request.Request( + API_URL, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(request, timeout=300) as response: + result = json.loads(response.read().decode("utf-8")) + except urllib.error.URLError as exc: + print(f"Request failed: {exc}") + print("Make sure the server is running from this folder with: flash run") + sys.exit(1) + + if result.get("status") != "success": + print(f"Worker error: {result}") + sys.exit(1) + + output_bytes = base64.b64decode(result["image_base64"]) + output_path.write_bytes(output_bytes) + print(f"Saved transformed image to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/02_ml_inference/03_image_to_image/gpu_worker.py b/02_ml_inference/03_image_to_image/gpu_worker.py new file mode 100644 index 0000000..60a6261 --- /dev/null +++ b/02_ml_inference/03_image_to_image/gpu_worker.py @@ -0,0 +1,145 @@ +import base64 +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field +from runpod_flash import GpuGroup, LiveServerless, remote + +DEFAULT_IMAGE_PATH = Path(__file__).resolve().parent / "poddy.jpg" + + +def load_default_image_base64() -> str: + return base64.b64encode(DEFAULT_IMAGE_PATH.read_bytes()).decode("utf-8") + + +gpu_config = LiveServerless( + name="02_03_image_to_image_gpu", + gpus=[GpuGroup.ADA_24], + workersMin=0, + workersMax=2, + idleTimeout=5, +) + + +@remote( + resource_config=gpu_config, + dependencies=[ + "diffusers", + "torch", + "transformers", + "accelerate", + "safetensors", + "pillow", + ], +) +class ImageToImageWorker: + def __init__(self): + import torch + from diffusers import StableDiffusionImg2ImgPipeline + + self._torch = torch + self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + torch_dtype=torch.float16, + safety_checker=None, + requires_safety_checker=False, + ) + self.pipe = self.pipe.to("cuda") + self.pipe.enable_attention_slicing() + + async def transform(self, input_data: dict) -> dict: + import base64 + import io + from datetime import datetime + + from PIL import Image + + image_base64 = input_data.get("image_base64", "") + prompt = input_data.get("prompt", "").strip() + negative_prompt = input_data.get("negative_prompt", "").strip() + strength = float(input_data.get("strength", 0.65)) + guidance_scale = float(input_data.get("guidance_scale", 7.5)) + num_steps = int(input_data.get("num_steps", 25)) + seed = input_data.get("seed") + + if not image_base64: + return {"status": "error", "error": "image_base64 is required"} + if not prompt: + return {"status": "error", "error": "prompt is required"} + + try: + image_bytes = base64.b64decode(image_base64) + input_image = Image.open(io.BytesIO(image_bytes)).convert("RGB").resize((512, 512)) + except Exception as exc: + return {"status": "error", "error": f"Invalid input image: {exc}"} + + generator = None + if seed is not None: + generator = self._torch.Generator(device="cuda").manual_seed(int(seed)) + + output_image = self.pipe( + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + image=input_image, + strength=strength, + guidance_scale=guidance_scale, + num_inference_steps=num_steps, + generator=generator, + ).images[0] + + output_buffer = io.BytesIO() + output_image.save(output_buffer, format="PNG") + output_buffer.seek(0) + + return { + "status": "success", + "image_base64": base64.b64encode(output_buffer.read()).decode("utf-8"), + "model": "runwayml/stable-diffusion-v1-5", + "prompt": prompt, + "negative_prompt": negative_prompt or None, + "strength": strength, + "guidance_scale": guidance_scale, + "num_steps": num_steps, + "seed": seed, + "timestamp": datetime.now().isoformat(), + } + + +gpu_router = APIRouter() +worker: ImageToImageWorker | None = None + + +def get_worker() -> ImageToImageWorker: + global worker + if worker is None: + worker = ImageToImageWorker() + return worker + + +class ImageToImageRequest(BaseModel): + image_base64: str = Field( + default="", + description="Input image encoded as base64. If omitted, defaults to poddy.jpg.", + ) + prompt: str = Field(description="Prompt that describes how to transform the image") + negative_prompt: str = Field(default="", description="What to avoid in the output image") + strength: float = Field(default=0.65, ge=0.1, le=1.0) + guidance_scale: float = Field(default=7.5, ge=0.0, le=20.0) + num_steps: int = Field(default=25, ge=1, le=50) + seed: int | None = Field(default=None, ge=0) + + +@gpu_router.post("/transform") +async def transform(request: ImageToImageRequest): + payload = request.model_dump() + if not payload.get("image_base64"): + try: + payload["image_base64"] = load_default_image_base64() + except FileNotFoundError as exc: + raise HTTPException(status_code=500, detail=f"Default image not found: {exc}") from exc + result = await get_worker().transform(payload) + if result.get("status") != "success": + raise HTTPException( + status_code=400, detail=result.get("error", "Image transformation failed") + ) + return result diff --git a/02_ml_inference/03_image_to_image/main.py b/02_ml_inference/03_image_to_image/main.py new file mode 100644 index 0000000..7a001cd --- /dev/null +++ b/02_ml_inference/03_image_to_image/main.py @@ -0,0 +1,38 @@ +import logging +import os + +from fastapi import FastAPI +from gpu_worker import gpu_router + +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Image-to-Image API", + description="Transform images with Stable Diffusion on RunPod serverless GPUs", + version="1.0.0", +) + +app.include_router(gpu_router, prefix="/gpu", tags=["Image-to-Image"]) + + +@app.get("/") +def home(): + return { + "message": "Image-to-Image API", + "docs": "/docs", + "endpoints": {"transform": "/gpu/transform"}, + } + + +@app.get("/ping") +def ping(): + return {"status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + host = os.getenv("FLASH_HOST", "localhost") + port = int(os.getenv("FLASH_PORT", 8888)) + logger.info(f"Starting Flash server on {host}:{port}") + uvicorn.run(app, host=host, port=port) diff --git a/02_ml_inference/03_image_to_image/mothership.py b/02_ml_inference/03_image_to_image/mothership.py new file mode 100644 index 0000000..55eab2a --- /dev/null +++ b/02_ml_inference/03_image_to_image/mothership.py @@ -0,0 +1,7 @@ +"""Mothership endpoint configuration.""" + +from runpod_flash import CpuLiveLoadBalancer + +mothership = CpuLiveLoadBalancer( + name="02_03_image_to_image-mothership", +) diff --git a/02_ml_inference/03_image_to_image/poddy.jpg b/02_ml_inference/03_image_to_image/poddy.jpg new file mode 100644 index 0000000..7493710 Binary files /dev/null and b/02_ml_inference/03_image_to_image/poddy.jpg differ diff --git a/02_ml_inference/03_image_to_image/pyproject.toml b/02_ml_inference/03_image_to_image/pyproject.toml new file mode 100644 index 0000000..b73c5a0 --- /dev/null +++ b/02_ml_inference/03_image_to_image/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "flash-image-to-image" +version = "0.1.0" +description = "Image-to-image transformations with Stable Diffusion on RunPod Flash" +requires-python = ">=3.10" +dependencies = [ + "runpod-flash", + "fastapi>=0.104.0", + "pillow>=10.0.0", +] diff --git a/02_ml_inference/03_image_to_image/requirements.txt b/02_ml_inference/03_image_to_image/requirements.txt new file mode 100644 index 0000000..a73ed1a --- /dev/null +++ b/02_ml_inference/03_image_to_image/requirements.txt @@ -0,0 +1 @@ +runpod-flash diff --git a/02_ml_inference/04_text_to_video/.env.example b/02_ml_inference/04_text_to_video/.env.example new file mode 100644 index 0000000..8360712 --- /dev/null +++ b/02_ml_inference/04_text_to_video/.env.example @@ -0,0 +1,4 @@ +# FLASH_HOST=localhost +# FLASH_PORT=8888 +# LOG_LEVEL=INFO +# RUNPOD_API_KEY=your_api_key_here diff --git a/02_ml_inference/04_text_to_video/.flashignore b/02_ml_inference/04_text_to_video/.flashignore new file mode 100644 index 0000000..2dfb6fb --- /dev/null +++ b/02_ml_inference/04_text_to_video/.flashignore @@ -0,0 +1,43 @@ +# Flash Build Ignore Patterns + +# Python cache +__pycache__/ +*.pyc + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.vscode/ +.idea/ + +# Environment files +.env +.env.local + +# Git +.git/ +.gitignore + +# Build artifacts +dist/ +build/ +*.egg-info/ + +# Flash resources +.runpod/ + +# Tests +tests/ +test_*.py +*_test.py + +# Documentation +docs/ +*.md +!README.md + +# Demo output +text_to_video.gif diff --git a/02_ml_inference/04_text_to_video/.gitignore b/02_ml_inference/04_text_to_video/.gitignore new file mode 100644 index 0000000..2f377a5 --- /dev/null +++ b/02_ml_inference/04_text_to_video/.gitignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ + +# Virtual environments +.venv/ +venv/ +env/ + +# Environment +.env +.env.local + +# Flash +.flash_resources.pkl +.tetra_resources.pkl +.runpod/ + +# IDE +.vscode/ +.idea/ + +# Demo output +text_to_video.gif diff --git a/02_ml_inference/04_text_to_video/README.md b/02_ml_inference/04_text_to_video/README.md new file mode 100644 index 0000000..296c563 --- /dev/null +++ b/02_ml_inference/04_text_to_video/README.md @@ -0,0 +1,72 @@ +# Text-to-Video with Diffusers + +Serverless text-to-video API built with Runpod Flash and Diffusers. + +## What this example does + +- Accepts a text prompt +- Generates a short video clip with a GPU `@remote` worker +- Returns the generated video as base64-encoded GIF + +## Quick Start + +```bash +cd 02_ml_inference/04_text_to_video +pip install -r requirements.txt +cp .env.example .env +# Add RUNPOD_API_KEY in .env +flash run +``` + +Open docs at `http://localhost:8888/docs`. + +## Endpoint + +### POST `/gpu/generate` + +Request body: + +```json +{ + "prompt": "a cinematic drone shot of snowy mountains at sunrise", + "negative_prompt": "blurry, noisy, low quality", + "num_frames": 12, + "num_steps": 18, + "guidance_scale": 7.0, + "fps": 8, + "width": 512, + "height": 288, + "seed": 42 +} +``` + +Response: + +```json +{ + "status": "success", + "video_base64": "", + "video_mime_type": "image/gif", + "preview_image_base64": "", + "preview_image_mime_type": "image/png", + "model": "damo-vilab/text-to-video-ms-1.7b", + "prompt": "...", + "num_frames": 16, + "fps": 8, + "timestamp": "2026-02-15T12:34:56.789123" +} +``` + +## Local Demo Script + +```bash +python demo.py "a cinematic drone shot of snowy mountains" output.gif +``` + +## Notes + +- First request can take longer because the worker and model need to warm up. +- This example returns GIF output for portability and simple local testing. +- GIF encoding is capped at 25 FPS; higher requested values are clamped and response `fps` reflects the encoded output. +- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset. +- The default parameters are tuned for reliability on 24GB GPUs; increase frames/steps/resolution gradually if you want higher quality. diff --git a/02_ml_inference/04_text_to_video/__init__.py b/02_ml_inference/04_text_to_video/__init__.py new file mode 100644 index 0000000..ba611bf --- /dev/null +++ b/02_ml_inference/04_text_to_video/__init__.py @@ -0,0 +1 @@ +"""Text-to-video inference example package.""" diff --git a/02_ml_inference/04_text_to_video/demo.py b/02_ml_inference/04_text_to_video/demo.py new file mode 100644 index 0000000..c2f4caa --- /dev/null +++ b/02_ml_inference/04_text_to_video/demo.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Generate a short GIF video from a text prompt. + +Usage: + python demo.py "a cinematic drone shot of snowy mountains" [output.gif] +""" + +import base64 +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path + +API_URL = "http://localhost:8888/gpu/generate" + + +def main() -> None: + prompt = ( + sys.argv[1] if len(sys.argv) > 1 else "a cinematic drone shot of snowy mountains at sunrise" + ) + output_path = Path(sys.argv[2] if len(sys.argv) > 2 else "text_to_video.gif").resolve() + + payload = { + "prompt": prompt, + "num_frames": 12, + "num_steps": 18, + "guidance_scale": 7.0, + "fps": 8, + "width": 512, + "height": 288, + } + + request = urllib.request.Request( + API_URL, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(request, timeout=600) as response: + result = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + print(f"Request failed: HTTP {exc.code}") + if body: + print(f"Server detail: {body}") + print("Make sure the server is running from this folder with: flash run") + sys.exit(1) + except urllib.error.URLError as exc: + print(f"Request failed: {exc}") + print("Make sure the server is running from this folder with: flash run") + sys.exit(1) + + if result.get("status") != "success": + print(f"Worker error: {result}") + sys.exit(1) + + output_bytes = base64.b64decode(result["video_base64"]) + output_path.write_bytes(output_bytes) + print(f"Saved generated video GIF to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/02_ml_inference/04_text_to_video/gpu_worker.py b/02_ml_inference/04_text_to_video/gpu_worker.py new file mode 100644 index 0000000..5daf485 --- /dev/null +++ b/02_ml_inference/04_text_to_video/gpu_worker.py @@ -0,0 +1,195 @@ +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field +from runpod_flash import GpuGroup, LiveServerless, remote + +gpu_config = LiveServerless( + name="02_04_text_to_video_gpu", + gpus=[GpuGroup.ADA_24], + workersMin=0, + workersMax=2, + idleTimeout=5, +) + + +@remote( + resource_config=gpu_config, + dependencies=[ + "diffusers", + "torch", + "transformers", + "accelerate", + "safetensors", + "pillow", + ], +) +class TextToVideoWorker: + def __init__(self): + import torch + from diffusers import DiffusionPipeline + + self._torch = torch + self.model = "damo-vilab/text-to-video-ms-1.7b" + self._using_cpu_offload = False + self.pipe = DiffusionPipeline.from_pretrained( + self.model, + torch_dtype=torch.float16, + ) + self.pipe.enable_attention_slicing() + if hasattr(self.pipe, "vae"): + if hasattr(self.pipe.vae, "enable_slicing"): + try: + self.pipe.vae.enable_slicing() + except NotImplementedError: + pass + except Exception: + pass + if hasattr(self.pipe.vae, "enable_tiling"): + try: + self.pipe.vae.enable_tiling() + except NotImplementedError: + pass + except Exception: + pass + + if torch.cuda.is_available(): + try: + # Prefer CPU offload for better reliability on 24GB GPUs. + self.pipe.enable_model_cpu_offload() + self._using_cpu_offload = True + except Exception: + # Fallback to full-GPU placement if offload is unavailable. + self.pipe = self.pipe.to("cuda") + else: + self.pipe = self.pipe.to("cpu") + + async def generate(self, input_data: dict) -> dict: + import base64 + import io + from datetime import datetime + + prompt = input_data.get("prompt", "").strip() + negative_prompt = input_data.get("negative_prompt", "").strip() + num_frames = int(input_data.get("num_frames", 12)) + num_steps = int(input_data.get("num_steps", 18)) + guidance_scale = float(input_data.get("guidance_scale", 7.0)) + fps = int(input_data.get("fps", 8)) + width = int(input_data.get("width", 512)) + height = int(input_data.get("height", 288)) + seed = input_data.get("seed") + + if not prompt: + return {"status": "error", "error": "prompt is required"} + if width % 8 != 0 or height % 8 != 0: + return {"status": "error", "error": "width and height must be divisible by 8"} + + generator = None + if seed is not None: + generator_device = "cpu" if self._using_cpu_offload else "cuda" + if not self._torch.cuda.is_available(): + generator_device = "cpu" + generator = self._torch.Generator(device=generator_device).manual_seed(int(seed)) + + try: + with self._torch.inference_mode(): + result = self.pipe( + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + num_frames=num_frames, + num_inference_steps=num_steps, + guidance_scale=guidance_scale, + width=width, + height=height, + generator=generator, + output_type="pil", + ) + frames = result.frames[0] + except Exception as exc: + return {"status": "error", "error": f"Video generation failed: {exc}"} + finally: + if self._torch.cuda.is_available(): + self._torch.cuda.empty_cache() + + if frames is None: + return {"status": "error", "error": "Model returned no frames"} + frames = list(frames) + if len(frames) == 0: + return {"status": "error", "error": "Model returned no frames"} + if not hasattr(frames[0], "save"): + from PIL import Image + + converted_frames = [] + for frame in frames: + arr = frame + if hasattr(arr, "dtype") and str(arr.dtype) != "uint8": + arr = (arr * 255).clip(0, 255).astype("uint8") + converted_frames.append(Image.fromarray(arr)) + frames = converted_frames + + # GIF timing is quantized in milliseconds; clamp to 25 FPS max and report actual output FPS. + effective_fps = min(max(fps, 1), 25) + duration_ms = int(1000 / effective_fps) + + gif_buffer = io.BytesIO() + frames[0].save( + gif_buffer, + format="GIF", + save_all=True, + append_images=frames[1:], + duration=duration_ms, + loop=0, + ) + gif_buffer.seek(0) + + preview_buffer = io.BytesIO() + frames[0].save(preview_buffer, format="PNG") + preview_buffer.seek(0) + + return { + "status": "success", + "video_base64": base64.b64encode(gif_buffer.read()).decode("utf-8"), + "video_mime_type": "image/gif", + "preview_image_base64": base64.b64encode(preview_buffer.read()).decode("utf-8"), + "preview_image_mime_type": "image/png", + "model": self.model, + "prompt": prompt, + "negative_prompt": negative_prompt or None, + "num_frames": len(frames), + "fps": effective_fps, + "num_steps": num_steps, + "guidance_scale": guidance_scale, + "width": width, + "height": height, + "seed": seed, + "timestamp": datetime.now().isoformat(), + } + + +gpu_router = APIRouter() +worker: TextToVideoWorker | None = None + + +def get_worker() -> TextToVideoWorker: + global worker + if worker is None: + worker = TextToVideoWorker() + return worker + + +class TextToVideoRequest(BaseModel): + prompt: str = Field(description="Prompt that describes the video to generate") + negative_prompt: str = Field(default="", description="What to avoid in the generated video") + num_frames: int = Field(default=12, ge=8, le=24) + num_steps: int = Field(default=18, ge=5, le=40) + guidance_scale: float = Field(default=7.0, ge=1.0, le=20.0) + fps: int = Field(default=8, ge=1, le=30) + width: int = Field(default=512, ge=256, le=768) + height: int = Field(default=288, ge=256, le=512) + seed: int | None = Field(default=None, ge=0) + + +@gpu_router.post("/generate") +async def generate(request: TextToVideoRequest): + result = await get_worker().generate(request.model_dump()) + if result.get("status") != "success": + raise HTTPException(status_code=400, detail=result.get("error", "Video generation failed")) + return result diff --git a/02_ml_inference/04_text_to_video/main.py b/02_ml_inference/04_text_to_video/main.py new file mode 100644 index 0000000..abe2d89 --- /dev/null +++ b/02_ml_inference/04_text_to_video/main.py @@ -0,0 +1,38 @@ +import logging +import os + +from fastapi import FastAPI +from gpu_worker import gpu_router + +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Text-to-Video API", + description="Generate short videos from text prompts on RunPod serverless GPUs", + version="1.0.0", +) + +app.include_router(gpu_router, prefix="/gpu", tags=["Text-to-Video"]) + + +@app.get("/") +def home(): + return { + "message": "Text-to-Video API", + "docs": "/docs", + "endpoints": {"generate": "/gpu/generate"}, + } + + +@app.get("/ping") +def ping(): + return {"status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + host = os.getenv("FLASH_HOST", "localhost") + port = int(os.getenv("FLASH_PORT", 8888)) + logger.info(f"Starting Flash server on {host}:{port}") + uvicorn.run(app, host=host, port=port) diff --git a/02_ml_inference/04_text_to_video/mothership.py b/02_ml_inference/04_text_to_video/mothership.py new file mode 100644 index 0000000..a4de8a8 --- /dev/null +++ b/02_ml_inference/04_text_to_video/mothership.py @@ -0,0 +1,7 @@ +"""Mothership endpoint configuration.""" + +from runpod_flash import CpuLiveLoadBalancer + +mothership = CpuLiveLoadBalancer( + name="02_04_text_to_video-mothership", +) diff --git a/02_ml_inference/04_text_to_video/pyproject.toml b/02_ml_inference/04_text_to_video/pyproject.toml new file mode 100644 index 0000000..011ac7c --- /dev/null +++ b/02_ml_inference/04_text_to_video/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "flash-text-to-video" +version = "0.1.0" +description = "Text-to-video generation with Diffusers on RunPod Flash" +requires-python = ">=3.10" +dependencies = [ + "runpod-flash", + "fastapi>=0.104.0", + "pillow>=10.0.0", +] diff --git a/02_ml_inference/04_text_to_video/requirements.txt b/02_ml_inference/04_text_to_video/requirements.txt new file mode 100644 index 0000000..a73ed1a --- /dev/null +++ b/02_ml_inference/04_text_to_video/requirements.txt @@ -0,0 +1 @@ +runpod-flash diff --git a/02_ml_inference/05_image_to_video/.env.example b/02_ml_inference/05_image_to_video/.env.example new file mode 100644 index 0000000..8360712 --- /dev/null +++ b/02_ml_inference/05_image_to_video/.env.example @@ -0,0 +1,4 @@ +# FLASH_HOST=localhost +# FLASH_PORT=8888 +# LOG_LEVEL=INFO +# RUNPOD_API_KEY=your_api_key_here diff --git a/02_ml_inference/05_image_to_video/.flashignore b/02_ml_inference/05_image_to_video/.flashignore new file mode 100644 index 0000000..ac60074 --- /dev/null +++ b/02_ml_inference/05_image_to_video/.flashignore @@ -0,0 +1,43 @@ +# Flash Build Ignore Patterns + +# Python cache +__pycache__/ +*.pyc + +# Virtual environments +venv/ +.venv/ +env/ + +# IDE +.vscode/ +.idea/ + +# Environment files +.env +.env.local + +# Git +.git/ +.gitignore + +# Build artifacts +dist/ +build/ +*.egg-info/ + +# Flash resources +.runpod/ + +# Tests +tests/ +test_*.py +*_test.py + +# Documentation +docs/ +*.md +!README.md + +# Demo output +image_to_video.gif diff --git a/02_ml_inference/05_image_to_video/.gitignore b/02_ml_inference/05_image_to_video/.gitignore new file mode 100644 index 0000000..b551249 --- /dev/null +++ b/02_ml_inference/05_image_to_video/.gitignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ + +# Virtual environments +.venv/ +venv/ +env/ + +# Environment +.env +.env.local + +# Flash +.flash_resources.pkl +.tetra_resources.pkl +.runpod/ + +# IDE +.vscode/ +.idea/ + +# Demo output +image_to_video.gif diff --git a/02_ml_inference/05_image_to_video/README.md b/02_ml_inference/05_image_to_video/README.md new file mode 100644 index 0000000..acf55de --- /dev/null +++ b/02_ml_inference/05_image_to_video/README.md @@ -0,0 +1,76 @@ +# Image-to-Video with Stable Video Diffusion + +Serverless image-to-video API built with Runpod Flash and Stable Video Diffusion. + +## What this example does + +- Accepts an input image as base64 +- Animates the image into a short clip with `StableVideoDiffusionPipeline` +- Returns the generated video as base64-encoded GIF + +## Quick Start + +```bash +cd 02_ml_inference/05_image_to_video +pip install -r requirements.txt +cp .env.example .env +# Add RUNPOD_API_KEY in .env +flash run +``` + +Open docs at `http://localhost:8888/docs`. + +## Endpoint + +### POST `/gpu/animate` + +Request body: + +```json +{ + "image_base64": "", + "motion_bucket_id": 127, + "noise_aug_strength": 0.02, + "num_frames": 12, + "num_steps": 18, + "fps": 7, + "seed": 42 +} +``` + +Response: + +```json +{ + "status": "success", + "video_base64": "", + "video_mime_type": "image/gif", + "preview_image_base64": "", + "preview_image_mime_type": "image/png", + "model": "stabilityai/stable-video-diffusion-img2vid-xt", + "input_width": 1920, + "input_height": 1080, + "render_width": 1024, + "render_height": 576, + "num_frames": 16, + "timestamp": "2026-02-15T12:34:56.789123" +} +``` + +## Local Demo Script + +```bash +python demo.py +# or explicitly: +python demo.py input.png output.gif +``` + +## Notes + +- First request can take longer because the worker and model need to warm up. +- Input images are resized to `1024x576` before animation for predictable memory usage. +- This example returns GIF output for portability and simple local testing. +- GIF encoding is capped at 25 FPS; higher requested values are clamped and response `fps` reflects the encoded output. +- If `image_base64` is omitted, the endpoint uses `poddy.jpg` as the default input image. +- Quality is intentionally baseline for fast, reliable, and lower-cost demo runs; this is a starter configuration, not a max-quality preset. +- The default parameters are tuned for reliability on 24GB GPUs; increase frames/steps gradually if you want higher quality. diff --git a/02_ml_inference/05_image_to_video/__init__.py b/02_ml_inference/05_image_to_video/__init__.py new file mode 100644 index 0000000..640a62b --- /dev/null +++ b/02_ml_inference/05_image_to_video/__init__.py @@ -0,0 +1 @@ +"""Image-to-video inference example package.""" diff --git a/02_ml_inference/05_image_to_video/demo.py b/02_ml_inference/05_image_to_video/demo.py new file mode 100644 index 0000000..dca1582 --- /dev/null +++ b/02_ml_inference/05_image_to_video/demo.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Animate an input image into a short GIF video. + +Usage: + python demo.py [input.png] [output.gif] +""" + +import base64 +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path + +API_URL = "http://localhost:8888/gpu/animate" +DEFAULT_IMAGE = Path(__file__).resolve().parent / "poddy.jpg" +DEFAULT_OUTPUT = "image_to_video.gif" + + +def main() -> None: + input_path = Path(sys.argv[1]).expanduser().resolve() if len(sys.argv) > 1 else DEFAULT_IMAGE + output_path = Path(sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUTPUT).resolve() + + if not input_path.exists(): + print(f"Input image not found: {input_path}") + sys.exit(1) + + image_base64 = base64.b64encode(input_path.read_bytes()).decode("utf-8") + payload = { + "image_base64": image_base64, + "motion_bucket_id": 127, + "noise_aug_strength": 0.02, + "num_frames": 12, + "num_steps": 18, + "fps": 7, + } + + request = urllib.request.Request( + API_URL, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(request, timeout=600) as response: + result = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + print(f"Request failed: HTTP {exc.code}") + if body: + print(f"Server detail: {body}") + print("Make sure the server is running from this folder with: flash run") + sys.exit(1) + except urllib.error.URLError as exc: + print(f"Request failed: {exc}") + print("Make sure the server is running from this folder with: flash run") + sys.exit(1) + + if result.get("status") != "success": + print(f"Worker error: {result}") + sys.exit(1) + + output_bytes = base64.b64decode(result["video_base64"]) + output_path.write_bytes(output_bytes) + print(f"Saved animated video GIF to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/02_ml_inference/05_image_to_video/gpu_worker.py b/02_ml_inference/05_image_to_video/gpu_worker.py new file mode 100644 index 0000000..06f9b45 --- /dev/null +++ b/02_ml_inference/05_image_to_video/gpu_worker.py @@ -0,0 +1,215 @@ +import base64 +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field +from runpod_flash import GpuGroup, LiveServerless, remote + +DEFAULT_IMAGE_PATH = Path(__file__).resolve().parent / "poddy.jpg" + + +def load_default_image_base64() -> str: + return base64.b64encode(DEFAULT_IMAGE_PATH.read_bytes()).decode("utf-8") + + +gpu_config = LiveServerless( + name="02_05_image_to_video_gpu", + gpus=[GpuGroup.ADA_24], + workersMin=0, + workersMax=2, + idleTimeout=5, +) + + +@remote( + resource_config=gpu_config, + dependencies=[ + "diffusers", + "torch", + "transformers", + "accelerate", + "safetensors", + "pillow", + ], +) +class ImageToVideoWorker: + def __init__(self): + import torch + from diffusers import StableVideoDiffusionPipeline + + self._torch = torch + self.model = "stabilityai/stable-video-diffusion-img2vid-xt" + self._using_cpu_offload = False + self.pipe = StableVideoDiffusionPipeline.from_pretrained( + self.model, + torch_dtype=torch.float16, + variant="fp16", + ) + self.pipe.enable_attention_slicing() + if hasattr(self.pipe, "vae"): + if hasattr(self.pipe.vae, "enable_slicing"): + try: + self.pipe.vae.enable_slicing() + except NotImplementedError: + pass + except Exception: + pass + if hasattr(self.pipe.vae, "enable_tiling"): + try: + self.pipe.vae.enable_tiling() + except NotImplementedError: + pass + except Exception: + pass + + if torch.cuda.is_available(): + try: + self.pipe.enable_model_cpu_offload() + self._using_cpu_offload = True + except Exception: + self.pipe = self.pipe.to("cuda") + else: + self.pipe = self.pipe.to("cpu") + + async def animate(self, input_data: dict) -> dict: + import base64 + import io + from datetime import datetime + + from PIL import Image + + image_base64 = input_data.get("image_base64", "") + motion_bucket_id = int(input_data.get("motion_bucket_id", 127)) + noise_aug_strength = float(input_data.get("noise_aug_strength", 0.02)) + num_frames = int(input_data.get("num_frames", 12)) + num_steps = int(input_data.get("num_steps", 18)) + fps = int(input_data.get("fps", 7)) + seed = input_data.get("seed") + + if not image_base64: + return {"status": "error", "error": "image_base64 is required"} + + try: + image_bytes = base64.b64decode(image_base64) + input_image = Image.open(io.BytesIO(image_bytes)).convert("RGB") + except Exception as exc: + return {"status": "error", "error": f"Invalid input image: {exc}"} + + resized_image = input_image.resize((1024, 576)) + + generator = None + if seed is not None: + generator_device = "cpu" if self._using_cpu_offload else "cuda" + if not self._torch.cuda.is_available(): + generator_device = "cpu" + generator = self._torch.Generator(device=generator_device).manual_seed(int(seed)) + + try: + with self._torch.inference_mode(): + result = self.pipe( + image=resized_image, + decode_chunk_size=4, + motion_bucket_id=motion_bucket_id, + noise_aug_strength=noise_aug_strength, + num_frames=num_frames, + num_inference_steps=num_steps, + generator=generator, + output_type="pil", + ) + frames = result.frames[0] + except Exception as exc: + return {"status": "error", "error": f"Animation failed: {exc}"} + finally: + if self._torch.cuda.is_available(): + self._torch.cuda.empty_cache() + + if frames is None: + return {"status": "error", "error": "Model returned no frames"} + frames = list(frames) + if len(frames) == 0: + return {"status": "error", "error": "Model returned no frames"} + if not hasattr(frames[0], "save"): + converted_frames = [] + for frame in frames: + arr = frame + if hasattr(arr, "dtype") and str(arr.dtype) != "uint8": + arr = (arr * 255).clip(0, 255).astype("uint8") + converted_frames.append(Image.fromarray(arr)) + frames = converted_frames + + # GIF timing is quantized in milliseconds; clamp to 25 FPS max and report actual output FPS. + effective_fps = min(max(fps, 1), 25) + duration_ms = int(1000 / effective_fps) + + gif_buffer = io.BytesIO() + frames[0].save( + gif_buffer, + format="GIF", + save_all=True, + append_images=frames[1:], + duration=duration_ms, + loop=0, + ) + gif_buffer.seek(0) + + preview_buffer = io.BytesIO() + frames[0].save(preview_buffer, format="PNG") + preview_buffer.seek(0) + + return { + "status": "success", + "video_base64": base64.b64encode(gif_buffer.read()).decode("utf-8"), + "video_mime_type": "image/gif", + "preview_image_base64": base64.b64encode(preview_buffer.read()).decode("utf-8"), + "preview_image_mime_type": "image/png", + "model": self.model, + "input_width": input_image.width, + "input_height": input_image.height, + "render_width": 1024, + "render_height": 576, + "num_frames": len(frames), + "num_steps": num_steps, + "motion_bucket_id": motion_bucket_id, + "noise_aug_strength": noise_aug_strength, + "fps": effective_fps, + "seed": seed, + "timestamp": datetime.now().isoformat(), + } + + +gpu_router = APIRouter() +worker: ImageToVideoWorker | None = None + + +def get_worker() -> ImageToVideoWorker: + global worker + if worker is None: + worker = ImageToVideoWorker() + return worker + + +class ImageToVideoRequest(BaseModel): + image_base64: str = Field( + default="", + description="Input image encoded as base64. If omitted, defaults to poddy.jpg.", + ) + motion_bucket_id: int = Field(default=127, ge=1, le=255) + noise_aug_strength: float = Field(default=0.02, ge=0.0, le=1.0) + num_frames: int = Field(default=12, ge=8, le=24) + num_steps: int = Field(default=18, ge=5, le=40) + fps: int = Field(default=7, ge=1, le=30) + seed: int | None = Field(default=None, ge=0) + + +@gpu_router.post("/animate") +async def animate(request: ImageToVideoRequest): + payload = request.model_dump() + if not payload.get("image_base64"): + try: + payload["image_base64"] = load_default_image_base64() + except FileNotFoundError as exc: + raise HTTPException(status_code=500, detail=f"Default image not found: {exc}") from exc + result = await get_worker().animate(payload) + if result.get("status") != "success": + raise HTTPException(status_code=400, detail=result.get("error", "Image animation failed")) + return result diff --git a/02_ml_inference/05_image_to_video/main.py b/02_ml_inference/05_image_to_video/main.py new file mode 100644 index 0000000..ba791ee --- /dev/null +++ b/02_ml_inference/05_image_to_video/main.py @@ -0,0 +1,38 @@ +import logging +import os + +from fastapi import FastAPI +from gpu_worker import gpu_router + +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Image-to-Video API", + description="Animate still images on RunPod serverless GPUs", + version="1.0.0", +) + +app.include_router(gpu_router, prefix="/gpu", tags=["Image-to-Video"]) + + +@app.get("/") +def home(): + return { + "message": "Image-to-Video API", + "docs": "/docs", + "endpoints": {"animate": "/gpu/animate"}, + } + + +@app.get("/ping") +def ping(): + return {"status": "healthy"} + + +if __name__ == "__main__": + import uvicorn + + host = os.getenv("FLASH_HOST", "localhost") + port = int(os.getenv("FLASH_PORT", 8888)) + logger.info(f"Starting Flash server on {host}:{port}") + uvicorn.run(app, host=host, port=port) diff --git a/02_ml_inference/05_image_to_video/mothership.py b/02_ml_inference/05_image_to_video/mothership.py new file mode 100644 index 0000000..7a726d3 --- /dev/null +++ b/02_ml_inference/05_image_to_video/mothership.py @@ -0,0 +1,7 @@ +"""Mothership endpoint configuration.""" + +from runpod_flash import CpuLiveLoadBalancer + +mothership = CpuLiveLoadBalancer( + name="02_05_image_to_video-mothership", +) diff --git a/02_ml_inference/05_image_to_video/poddy.jpg b/02_ml_inference/05_image_to_video/poddy.jpg new file mode 100644 index 0000000..7493710 Binary files /dev/null and b/02_ml_inference/05_image_to_video/poddy.jpg differ diff --git a/02_ml_inference/05_image_to_video/pyproject.toml b/02_ml_inference/05_image_to_video/pyproject.toml new file mode 100644 index 0000000..c87060f --- /dev/null +++ b/02_ml_inference/05_image_to_video/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "flash-image-to-video" +version = "0.1.0" +description = "Image-to-video generation with Diffusers on RunPod Flash" +requires-python = ">=3.10" +dependencies = [ + "runpod-flash", + "fastapi>=0.104.0", + "pillow>=10.0.0", +] diff --git a/02_ml_inference/05_image_to_video/requirements.txt b/02_ml_inference/05_image_to_video/requirements.txt new file mode 100644 index 0000000..a73ed1a --- /dev/null +++ b/02_ml_inference/05_image_to_video/requirements.txt @@ -0,0 +1 @@ +runpod-flash diff --git a/02_ml_inference/README.md b/02_ml_inference/README.md index cd41fed..598caa6 100644 --- a/02_ml_inference/README.md +++ b/02_ml_inference/README.md @@ -18,21 +18,55 @@ LLM inference API with streaming support. - Mistral, Mixtral - Qwen, Phi, Gemma -### 02_image_generation _(coming soon)_ -Stable Diffusion image generation API. +### 02_text_to_image +Text-to-image generation API. **What you'll learn:** -- Loading Stable Diffusion models -- Optimizing inference with diffusers -- Handling image uploads and downloads -- Model caching strategies +- Building text-to-image endpoints with `@remote` GPU workers +- Running Diffusers pipelines on serverless GPUs +- Returning generated images as base64 payloads +- Tuning quality/speed tradeoffs with inference steps **Models covered:** -- Stable Diffusion 1.5, 2.1, XL -- SDXL Turbo -- ControlNet integration +- FLUX.1-schnell -### 03_embeddings _(coming soon)_ +### 03_image_to_image +Prompt-guided image transformation API with Stable Diffusion img2img. + +**What you'll learn:** +- Building image-to-image endpoints with `@remote` GPU workers +- Sending base64-encoded images through FastAPI +- Controlling style transfer intensity with `strength` and `guidance_scale` +- Returning transformed images from serverless workers + +**Models covered:** +- Stable Diffusion v1.5 img2img pipeline + +### 04_text_to_video +Prompt-guided text-to-video generation API. + +**What you'll learn:** +- Building text-to-video endpoints with `@remote` GPU workers +- Returning generated clips as portable GIF output +- Tuning temporal quality with frames, inference steps, and guidance +- Managing higher-memory multimodal inference workloads + +**Models covered:** +- damo-vilab/text-to-video-ms-1.7b + +### 05_image_to_video +Image animation API with Stable Video Diffusion. + +**What you'll learn:** +- Turning still images into short animated clips on serverless GPUs +- Sending and validating base64-encoded image inputs +- Controlling animation dynamics with motion and noise settings +- Returning generated clips with preview frames + +**Models covered:** +- stabilityai/stable-video-diffusion-img2vid-xt + +### 06_embeddings _(coming soon)_ Text embedding API for semantic search and RAG. **What you'll learn:** @@ -46,7 +80,7 @@ Text embedding API for semantic search and RAG. - OpenAI-compatible embeddings - Multilingual models -### 04_multimodal _(coming soon)_ +### 07_multimodal _(coming soon)_ Vision-language models (CLIP, LLaVA, etc.). **What you'll learn:** diff --git a/02_ml_inference/poddy.jpg b/02_ml_inference/poddy.jpg new file mode 100644 index 0000000..7493710 Binary files /dev/null and b/02_ml_inference/poddy.jpg differ diff --git a/README.md b/README.md index 04b053c..9ef9564 100644 --- a/README.md +++ b/README.md @@ -136,9 +136,12 @@ Learn the fundamentals of Flash applications. Deploy machine learning models as APIs. - 01_text_generation - LLM inference (Llama, Mistral, etc.) _(coming soon)_ -- 02_image_generation - Stable Diffusion image generation _(coming soon)_ -- 03_embeddings - Text embeddings API _(coming soon)_ -- 04_multimodal - Vision-language models _(coming soon)_ +- **[02_text_to_image](./02_ml_inference/02_text_to_image/)** - Serverless text-to-image generation with FLUX.1-schnell +- **[03_image_to_image](./02_ml_inference/03_image_to_image/)** - Serverless image-to-image transformations with Stable Diffusion +- **[04_text_to_video](./02_ml_inference/04_text_to_video/)** - Serverless text-to-video generation with Diffusers +- **[05_image_to_video](./02_ml_inference/05_image_to_video/)** - Serverless image-to-video animation with Stable Video Diffusion +- 06_embeddings - Text embeddings API _(coming soon)_ +- 07_multimodal - Vision-language models _(coming soon)_ ### 03 - Advanced Workers Production-ready worker patterns. @@ -415,4 +418,3 @@ All examples are continuously tested against Python 3.10-3.14 to ensure compatib ## License MIT License - see [LICENSE](./LICENSE) for details. - diff --git a/uv.lock b/uv.lock index 5e44705..efc2fd5 100644 --- a/uv.lock +++ b/uv.lock @@ -2992,12 +2992,14 @@ name = "runpod-flash-examples" version = "1.0.0" source = { editable = "." } dependencies = [ + { name = "fastapi" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pillow" }, { name = "python-multipart" }, { name = "runpod-flash" }, { name = "structlog" }, + { name = "uvicorn" }, ] [package.dev-dependencies] @@ -3014,11 +3016,13 @@ dev = [ [package.metadata] requires-dist = [ + { name = "fastapi", specifier = ">=0.104.0" }, { name = "numpy", specifier = ">=2.0.2" }, { name = "pillow", specifier = ">=10.0.0" }, { name = "python-multipart", specifier = ">=0.0.6" }, { name = "runpod-flash" }, { name = "structlog", specifier = ">=23.0.0" }, + { name = "uvicorn", specifier = ">=0.24.0" }, ] [package.metadata.requires-dev]