test / skill_example /scripts /benchmark_example.py
Jack-Khuu
Demo
88a1dd2
#!/usr/bin/env python3
"""
End-to-end video generation benchmark for LTX-Video.
Compares performance between:
- Baseline (no optimizations)
- Custom CUDA kernels (optimized RMSNorm)
- torch.compile optimization
Usage:
python benchmark_example.py --use-optimized-kernels
python benchmark_example.py --no-optimized-kernels --compile
python benchmark_example.py --use-optimized-kernels --compile # Mutually exclusive!
"""
import argparse
import sys
import time
import torch
def parse_args():
parser = argparse.ArgumentParser(description="LTX-Video Benchmark")
parser.add_argument("--use-optimized-kernels", action="store_true",
help="Use custom H100 CUDA kernels")
parser.add_argument("--no-optimized-kernels", action="store_true",
help="Use baseline implementation")
parser.add_argument("--compile", action="store_true",
help="Enable torch.compile on transformer")
parser.add_argument("--batch-size", type=int, default=1,
help="Number of videos per prompt")
parser.add_argument("--num-frames", type=int, default=161,
help="Number of frames to generate")
parser.add_argument("--height", type=int, default=512,
help="Video height in pixels")
parser.add_argument("--width", type=int, default=768,
help="Video width in pixels")
parser.add_argument("--steps", type=int, default=50,
help="Denoising steps")
parser.add_argument("--warmup-iterations", type=int, default=2,
help="Warmup runs before benchmark")
return parser.parse_args()
def main():
args = parse_args()
if args.use_optimized_kernels and args.compile:
print("WARNING: --use-optimized-kernels and --compile are mutually exclusive.")
print("Custom kernels require PyTorch custom op registration to work with torch.compile.")
sys.exit(1)
print("=" * 60)
print("LTX-Video Benchmark")
print("=" * 60)
print(f"Configuration:")
print(f" Optimized kernels: {args.use_optimized_kernels}")
print(f" torch.compile: {args.compile}")
print(f" Batch size: {args.batch_size}")
print(f" Frames: {args.num_frames}")
print(f" Resolution: {args.width}x{args.height}")
print(f" Steps: {args.steps}")
print(f" Warmup iterations: {args.warmup_iterations}")
if not torch.cuda.is_available():
print("\nCUDA not available. This benchmark requires a GPU.")
sys.exit(1)
print(f"\nDevice: {torch.cuda.get_device_name()}")
print(f"Capability: {torch.cuda.get_device_capability()}")
# NOTE: Full implementation requires diffusers and ltx_kernels
print("\nThis is a reference benchmark script.")
print("To run the full benchmark, install diffusers and build the CUDA kernels:")
print(" pip install diffusers transformers accelerate")
print(" cd examples/ltx_video && uv pip install -e .")
if __name__ == "__main__":
main()