| |
| """ |
| Flux VAE decoder (16-ch latent → RGB image) on Neuron. |
| Checkpoint: black-forest-labs/FLUX.1-dev/vae |
| """ |
| import argparse |
| import logging |
| import time |
| from pathlib import Path |
|
|
| import torch |
| from diffusers import AutoencoderKL |
| import torch_neuronx |
| from PIL import Image |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Flux VAE decoder (latent → image) with torch.compile on Neuron" |
| ) |
| parser.add_argument( |
| "--model", |
| type=str, |
| |
| default="/workspace/flux_weight/", |
| help="Flux VAE checkpoint on Hugging Face Hub", |
| ) |
| parser.add_argument("--latent-ch", type=int, default=16, help="Latent channels (Flux=16)") |
| parser.add_argument("--scale", type=int, default=32, help="Latent spatial size (256 px / 8)") |
| parser.add_argument("--output", type=str, default="flux_vae_out.png", help="Output image path") |
| args = parser.parse_args() |
|
|
| torch.set_default_dtype(torch.float32) |
| torch.manual_seed(42) |
|
|
| |
| vae = AutoencoderKL.from_pretrained(args.model, subfolder="vae", torch_dtype=torch.float32).eval() |
|
|
| |
| latent = torch.randn(1, args.latent_ch, args.scale, args.scale, dtype=torch.float32) |
|
|
| |
| with torch.no_grad(): |
| _ = vae.decode(latent).sample |
|
|
| |
| decode_fn = torch.compile(vae.decode, backend="neuron", fullgraph=True) |
|
|
| |
| warmup_start = time.time() |
| with torch.no_grad(): |
| _ = decode_fn(latent) |
| warmup_time = time.time() - warmup_start |
|
|
| |
| run_start = time.time() |
| with torch.no_grad(): |
| image = decode_fn(latent).sample |
| run_time = time.time() - run_start |
|
|
| logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time) |
| logger.info("VAE output shape: %s", image.shape) |
|
|
| |
| image = (image / 2 + 0.5).clamp(0, 1) |
| image = image.cpu().float() |
| Image.fromarray((image[0].permute(1, 2, 0).numpy() * 255).astype("uint8")).save(args.output) |
| logger.info("Saved decoded image to %s", Path(args.output).resolve()) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|
| """ |
| The compilation process took more than 2 hours. |
| /usr/local/lib/python3.10/site-packages/torch_mlir/dialects/stablehlo/__init__.py:24: UserWarning: Could not import StableHLO C++ extension: libStablehloUnifiedPythonCAPI.so.22.0git: cannot open shared object file: No such file or directory |
| warnings.warn(f"Could not import StableHLO C++ extension: {e}") |
| INFO:__main__:Warmup: 4010.52 s, Run: 22.5420 s |
| INFO:__main__:VAE output shape: torch.Size([1, 3, 256, 256]) |
| INFO:__main__:Saved decoded image to /workspace/torch_neuron_samples/torch-neuron-samples/scripts/torch_compile/flux/flux_vae_out.png |
| """ |