Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| End-to-end video generation benchmark for LTX-Video. | |
| Compares performance between: | |
| - Baseline (no optimizations) | |
| - Custom CUDA kernels (optimized RMSNorm) | |
| - torch.compile optimization | |
| Usage: | |
| python benchmark_example.py --use-optimized-kernels | |
| python benchmark_example.py --no-optimized-kernels --compile | |
| python benchmark_example.py --use-optimized-kernels --compile # Mutually exclusive! | |
| """ | |
| import argparse | |
| import sys | |
| import time | |
| import torch | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description="LTX-Video Benchmark") | |
| parser.add_argument("--use-optimized-kernels", action="store_true", | |
| help="Use custom H100 CUDA kernels") | |
| parser.add_argument("--no-optimized-kernels", action="store_true", | |
| help="Use baseline implementation") | |
| parser.add_argument("--compile", action="store_true", | |
| help="Enable torch.compile on transformer") | |
| parser.add_argument("--batch-size", type=int, default=1, | |
| help="Number of videos per prompt") | |
| parser.add_argument("--num-frames", type=int, default=161, | |
| help="Number of frames to generate") | |
| parser.add_argument("--height", type=int, default=512, | |
| help="Video height in pixels") | |
| parser.add_argument("--width", type=int, default=768, | |
| help="Video width in pixels") | |
| parser.add_argument("--steps", type=int, default=50, | |
| help="Denoising steps") | |
| parser.add_argument("--warmup-iterations", type=int, default=2, | |
| help="Warmup runs before benchmark") | |
| return parser.parse_args() | |
| def main(): | |
| args = parse_args() | |
| if args.use_optimized_kernels and args.compile: | |
| print("WARNING: --use-optimized-kernels and --compile are mutually exclusive.") | |
| print("Custom kernels require PyTorch custom op registration to work with torch.compile.") | |
| sys.exit(1) | |
| print("=" * 60) | |
| print("LTX-Video Benchmark") | |
| print("=" * 60) | |
| print(f"Configuration:") | |
| print(f" Optimized kernels: {args.use_optimized_kernels}") | |
| print(f" torch.compile: {args.compile}") | |
| print(f" Batch size: {args.batch_size}") | |
| print(f" Frames: {args.num_frames}") | |
| print(f" Resolution: {args.width}x{args.height}") | |
| print(f" Steps: {args.steps}") | |
| print(f" Warmup iterations: {args.warmup_iterations}") | |
| if not torch.cuda.is_available(): | |
| print("\nCUDA not available. This benchmark requires a GPU.") | |
| sys.exit(1) | |
| print(f"\nDevice: {torch.cuda.get_device_name()}") | |
| print(f"Capability: {torch.cuda.get_device_capability()}") | |
| # NOTE: Full implementation requires diffusers and ltx_kernels | |
| print("\nThis is a reference benchmark script.") | |
| print("To run the full benchmark, install diffusers and build the CUDA kernels:") | |
| print(" pip install diffusers transformers accelerate") | |
| print(" cd examples/ltx_video && uv pip install -e .") | |
| if __name__ == "__main__": | |
| main() | |