Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# LTX Desktop

LTX Desktop is an open-source desktop app for generating videos with LTX models — locally on supported Windows/Linux NVIDIA GPUs, with an API mode for unsupported hardware and macOS.
LTX Desktop is an open-source desktop app for generating videos with LTX models — locally on supported Windows/Linux NVIDIA GPUs and Apple Silicon Macs, with an API mode for unsupported hardware.

> **Status: Beta.** Expect breaking changes.
> Frontend architecture is under active refactor; large UI PRs may be declined for now (see [`CONTRIBUTING.md`](docs/CONTRIBUTING.md)).
Expand Down Expand Up @@ -34,7 +34,8 @@ LTX Desktop is an open-source desktop app for generating videos with LTX models
| Windows (no CUDA, <16GB VRAM, or unknown VRAM) | API-only | **LTX API key required** |
| Linux + CUDA GPU with **≥16GB VRAM** | Local generation | Downloads model weights locally |
| Linux (no CUDA, <16GB VRAM, or unknown VRAM) | API-only | **LTX API key required** |
| macOS (Apple Silicon builds) | API-only | **LTX API key required** |
| macOS + Apple Silicon with **≥15GB unified memory** | Local generation | Downloads model weights locally |
| macOS + Apple Silicon with <15GB unified memory | API-only | **LTX API key required** |

In API-only mode, available resolutions/durations may be limited to what the API supports.

Expand All @@ -55,9 +56,15 @@ In API-only mode, available resolutions/durations may be limited to what the API
- 16GB+ RAM (32GB recommended)
- Plenty of free disk space for model weights and outputs

### macOS (local generation)

- Apple Silicon (arm64) with **≥15GB unified memory**
- macOS 13+ (Ventura)
- **160GB+ free disk space** (for model weights, Python environment, and outputs)

### macOS (API-only)

- Apple Silicon (arm64)
- Apple Silicon (arm64) with <15GB unified memory
- macOS 13+ (Ventura)
- Stable internet connection

Expand Down Expand Up @@ -91,10 +98,10 @@ Text encoding: to generate videos you must configure text encoding:
The LTX API is used for:

- **Cloud text encoding and prompt enhancement** — **FREE**; text encoding is highly recommended to speed up inference and save memory
- API-based video generations (required on macOS and on unsupported Windows hardware) — paid
- API-based video generations (required on unsupported hardware and low-memory Apple Silicon Macs) — paid
- Retake — paid

An LTX API key is required in API-only mode, but optional on Windows/Linux local mode if you enable the Local Text Encoder.
An LTX API key is required in API-only mode, but optional on Windows/Linux/macOS local mode if you enable the Local Text Encoder.

Generate a FREE API key at the [LTX Console](https://console.ltx.video/). Text encoding is free; video generation API usage is paid. [Read more](https://ltx.io/model/model-blog/ltx-2-better-control-for-real-workflows).

Expand Down
33 changes: 25 additions & 8 deletions backend/handlers/health_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from handlers.pipelines_handler import PipelinesHandler
from logging_policy import log_background_exception
from services.interfaces import GpuInfo
from services.services_utils import get_device_type
from state.app_state_types import AppState, GpuSlot, StartupError, StartupLoading, StartupPending, StartupReady, VideoPipelineState, VideoPipelineWarmth

if TYPE_CHECKING:
Expand Down Expand Up @@ -108,14 +109,30 @@ def default_warmup(self) -> None:
self.set_startup_loading("Loading Fast pipeline", 30)
self._pipelines.load_gpu_pipeline("fast", should_warm=False)

self.set_startup_loading("Warming Fast pipeline", 60)
self._pipelines.warmup_pipeline("fast")
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as state):
state.warmth = VideoPipelineWarmth.WARM
case _:
pass
if get_device_type(self.config.device) != "mps":
self.set_startup_loading("Warming Fast pipeline", 60)
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as state):
state.warmth = VideoPipelineWarmth.WARMING
case _:
pass
try:
self._pipelines.warmup_pipeline("fast")
except Exception:
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as state) if state.warmth == VideoPipelineWarmth.WARMING:
state.warmth = VideoPipelineWarmth.COLD
case _:
pass
raise
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as state):
state.warmth = VideoPipelineWarmth.WARM
case _:
pass

zit_models_path = resolve_model_path(self.models_dir, self.config.model_download_specs,"zit")
zit_exists = zit_models_path.exists() and any(zit_models_path.iterdir())
Expand Down
22 changes: 21 additions & 1 deletion backend/handlers/pipelines_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
import time
from threading import RLock
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -256,6 +257,16 @@ def load_gpu_pipeline(self, model_type: VideoPipelineModelType, should_warm: boo
case _:
pass

if state is not None and state.warmth == VideoPipelineWarmth.WARMING:
while state.warmth == VideoPipelineWarmth.WARMING:
time.sleep(1.0)
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as refreshed):
state = refreshed
case _:
state = None

if state is None:
self._evict_gpu_pipeline_for_swap()
state = self._create_video_pipeline(model_type)
Expand Down Expand Up @@ -377,6 +388,15 @@ def load_retake_pipeline(self, *, distilled: bool = True) -> RetakePipelineState
return state

def warmup_pipeline(self, model_type: VideoPipelineModelType) -> None:
state = self.load_gpu_pipeline(model_type, should_warm=False)
with self._lock:
match self.state.gpu_slot:
case GpuSlot(active_pipeline=VideoPipelineState() as existing_state):
state: VideoPipelineState | None = existing_state
case _:
state = None

if state is None or not self._pipeline_matches_model_type(model_type):
state = self.load_gpu_pipeline(model_type, should_warm=False)

warmup_path = self.config.outputs_dir / f"_warmup_{model_type}.mp4"
state.pipeline.warmup(output_path=str(warmup_path))
16 changes: 10 additions & 6 deletions backend/handlers/video_generation_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,8 @@ def get_9_16_size(res: str) -> tuple[int, int]:
seed = self._resolve_seed()

try:
self._pipelines.load_gpu_pipeline("fast", should_warm=False)
self._generation.start_generation(generation_id)

output_path = self.generate_video(
generation_id=generation_id,
prompt=req.prompt,
image=image,
height=height,
Expand All @@ -164,6 +162,7 @@ def get_9_16_size(res: str) -> tuple[int, int]:

def generate_video(
self,
generation_id: str,
prompt: str,
image: Image.Image | None,
height: int,
Expand All @@ -186,12 +185,13 @@ def generate_video(

total_steps = 8

self._generation.update_progress("loading_model", 5, 0, total_steps)
t_load_start = time.perf_counter()
pipeline_state = self._pipelines.load_gpu_pipeline("fast", should_warm=False)
t_load_end = time.perf_counter()
logger.info("[%s] Pipeline load: %.2fs", gen_mode, t_load_end - t_load_start)

self._generation.start_generation(generation_id)
self._generation.update_progress("loading_model", 5, 0, total_steps)
self._generation.update_progress("encoding_text", 10, 0, total_steps)

enhanced_prompt = prompt + self.config.camera_motion_prompts.get(camera_motion, "")
Expand Down Expand Up @@ -224,6 +224,10 @@ def generate_video(
height = round(height / 64) * 64
width = round(width / 64) * 64

def _on_denoising_step(current_step: int, denoising_total: int) -> None:
pct = 15 + int(75 * current_step / denoising_total)
self._generation.update_progress("inference", pct, current_step, denoising_total)

t_inference_start = time.perf_counter()
pipeline_state.pipeline.generate(
prompt=enhanced_prompt,
Expand All @@ -234,6 +238,7 @@ def generate_video(
frame_rate=fps,
images=images,
output_path=str(output_path),
progress_callback=_on_denoising_step,
)
t_inference_end = time.perf_counter()
logger.info("[%s] Inference: %.2fs", gen_mode, t_inference_end - t_inference_start)
Expand Down Expand Up @@ -286,6 +291,7 @@ def _generate_a2v(
try:
a2v_state = self._pipelines.load_a2v_pipeline()
self._generation.start_generation(generation_id)
self._generation.update_progress("loading_model", 5, 0, 11)

enhanced_prompt = req.prompt + self.config.camera_motion_prompts.get(req.cameraMotion, "")
neg = req.negativePrompt if req.negativePrompt else self.config.default_negative_prompt
Expand All @@ -306,8 +312,6 @@ def _generate_a2v(
a2v_enhance = a2v_use_api and a2v_settings.prompt_enhancer_enabled_i2v
else:
a2v_enhance = a2v_use_api and a2v_settings.prompt_enhancer_enabled_t2v

self._generation.update_progress("loading_model", 5, 0, total_steps)
self._generation.update_progress("encoding_text", 10, 0, total_steps)
self._text.prepare_text_encoding(enhanced_prompt, enhance_prompt=a2v_enhance)
self._generation.update_progress("inference", 15, 0, total_steps)
Expand Down
8 changes: 8 additions & 0 deletions backend/ltx2_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@
del _safetensors_loader_fix
import services.patches.safetensors_metadata_fix as _safetensors_metadata_fix # pyright: ignore[reportUnusedImport] # Remove once safetensors supports read-only mmap
del _safetensors_metadata_fix
import services.patches.mps_layer_streaming_fix as _mps_layer_streaming_fix # pyright: ignore[reportUnusedImport] # Remove once ltx-core adds MPS awareness to _LayerStore
del _mps_layer_streaming_fix
import services.patches.mps_gpu_model_fix as _mps_gpu_model_fix # pyright: ignore[reportUnusedImport] # Remove once ltx-pipelines adds MPS awareness to gpu_model
del _mps_gpu_model_fix
import services.patches.mps_vocoder_fix as _mps_vocoder_fix # pyright: ignore[reportUnusedImport] # Remove once ltx-core adds MPS awareness to VocoderWithBWE
del _mps_vocoder_fix
import services.patches.mps_chunked_attention_fix as _mps_chunked_attention_fix # pyright: ignore[reportUnusedImport] # Remove once ltx-core ships a memory-efficient attention path for MPS
del _mps_chunked_attention_fix

from state.app_settings import AppSettings

Expand Down
4 changes: 3 additions & 1 deletion backend/runtime_config/runtime_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
def decide_force_api_generations(system: str, cuda_available: bool, vram_gb: int | None) -> bool:
"""Return whether API-only generation must be forced for this runtime."""
if system == "Darwin":
return True
if vram_gb is None:
return True
return vram_gb < 15

if system in ("Windows", "Linux"):
if not cuda_available:
Expand Down
9 changes: 7 additions & 2 deletions backend/services/a2v_pipeline/ltx_a2v_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from api_types import ImageConditioningInput
from services.ltx_pipeline_common import default_tiling_config, encode_video_output, video_chunks_number
from services.services_utils import AudioOrNone, TilingConfigType, device_supports_fp8
from services.services_utils import AudioOrNone, TilingConfigType, device_supports_fp8, get_device_type


class LTXa2vPipeline:
Expand Down Expand Up @@ -45,6 +45,11 @@ def __init__(
device=device,
quantization=QuantizationPolicy.fp8_cast() if device_supports_fp8(device) else None,
)
# MPS does not support CUDA streams or pin_memory(), so prefetch_count must be 0
# (synchronous layer streaming) rather than None (no streaming — loads the full
# transformer into GPU memory at once, which causes OOM on large generations).
# The mps_layer_streaming_fix patch makes synchronous streaming safe on MPS.
self._streaming_prefetch_count: int | None = 1 if get_device_type(device) == "mps" else 2

def _run_inference(
self,
Expand Down Expand Up @@ -74,7 +79,7 @@ def _run_inference(
audio_start_time=audio_start_time,
audio_max_duration=audio_max_duration,
tiling_config=tiling_config,
streaming_prefetch_count=2,
streaming_prefetch_count=self._streaming_prefetch_count,
)

@torch.inference_mode()
Expand Down
90 changes: 76 additions & 14 deletions backend/services/fast_video_pipeline/ltx_fast_video_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,58 @@

from __future__ import annotations

from collections.abc import Iterator
from collections.abc import Callable, Iterator
from contextlib import contextmanager
import os
from typing import Final, cast
from typing import Any, Final, cast

import torch

from api_types import ImageConditioningInput
from services.ltx_pipeline_common import default_tiling_config, encode_video_output, video_chunks_number
from services.services_utils import AudioOrNone, TilingConfigType, device_supports_fp8
from services.services_utils import AudioOrNone, TilingConfigType, device_supports_fp8, get_device_type

# Stage 1: 8 denoising steps, Stage 2: 3 denoising steps.
_STAGE1_STEPS = 8
_STAGE2_STEPS = 3
_TOTAL_DENOISING_STEPS = _STAGE1_STEPS + _STAGE2_STEPS

StepCallback = Callable[[int, int], None] # (current_step, total_steps)


@contextmanager
def _tqdm_progress_interceptor(callback: StepCallback) -> Iterator[None]:
"""Patch tqdm in ltx_pipelines.utils.samplers to forward step updates to callback.

The denoising loops in samplers.py use tqdm directly with no external
callback hook. We replace tqdm there with a thin wrapper that calls
callback(current_step, total_steps) on each update() call.
"""
import ltx_pipelines.utils.samplers as _samplers_module

_step_counter: list[int] = [0]

original_tqdm = _samplers_module.tqdm

class _ProgressTqdm:
def __init__(self, iterable: Any = None, **kwargs: Any) -> None:
self._items = list(iterable) if iterable is not None else []
self._tqdm = original_tqdm(self._items, **kwargs)

def __iter__(self) -> Iterator[Any]:
for item in self._tqdm:
yield item
_step_counter[0] += 1
callback(_step_counter[0], _TOTAL_DENOISING_STEPS)

def __len__(self) -> int:
return len(self._items)

try:
_samplers_module.tqdm = _ProgressTqdm # type: ignore[attr-defined]
yield
finally:
_samplers_module.tqdm = original_tqdm # type: ignore[attr-defined]


class LTXFastVideoPipeline:
Expand Down Expand Up @@ -39,6 +82,11 @@ def __init__(self, checkpoint_path: str, gemma_root: str | None, upsampler_path:
self._upsampler_path = upsampler_path
self._device = device
self._quantization = QuantizationPolicy.fp8_cast() if device_supports_fp8(device) else None
# MPS does not support CUDA streams or pin_memory(), so prefetch_count must be 0
# (synchronous layer streaming) rather than None (no streaming — loads the full
# transformer into GPU memory at once, which causes OOM on large generations).
# The mps_layer_streaming_fix patch makes synchronous streaming safe on MPS.
self._streaming_prefetch_count: int | None = 1 if get_device_type(device) == "mps" else 2

self.pipeline = DistilledPipeline(
distilled_checkpoint_path=checkpoint_path,
Expand Down Expand Up @@ -71,7 +119,7 @@ def _run_inference(
frame_rate=frame_rate,
images=[_LtxImageInput(img.path, img.frame_idx, img.strength) for img in images],
tiling_config=tiling_config,
streaming_prefetch_count=2,
streaming_prefetch_count=self._streaming_prefetch_count,
)

@torch.inference_mode()
Expand All @@ -85,18 +133,32 @@ def generate(
frame_rate: float,
images: list[ImageConditioningInput],
output_path: str,
progress_callback: StepCallback | None = None,
) -> None:
tiling_config = default_tiling_config()
video, audio = self._run_inference(
prompt=prompt,
seed=seed,
height=height,
width=width,
num_frames=num_frames,
frame_rate=frame_rate,
images=images,
tiling_config=tiling_config,
)
if progress_callback is not None:
with _tqdm_progress_interceptor(progress_callback):
video, audio = self._run_inference(
prompt=prompt,
seed=seed,
height=height,
width=width,
num_frames=num_frames,
frame_rate=frame_rate,
images=images,
tiling_config=tiling_config,
)
else:
video, audio = self._run_inference(
prompt=prompt,
seed=seed,
height=height,
width=width,
num_frames=num_frames,
frame_rate=frame_rate,
images=images,
tiling_config=tiling_config,
)
chunks = video_chunks_number(num_frames, tiling_config)
encode_video_output(video=video, audio=audio, fps=int(frame_rate), output_path=output_path, video_chunks_number_value=chunks)

Expand Down
Loading