from datetime import datetime

import gradio as gr
import spaces
import torch
from diffusers import FluxPipeline
from fa3 import FlashFluxAttnProcessor3_0 
from aoti import aoti_load_

# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipeline = FluxPipeline.from_pretrained(
    "black-forest-labs/Flux.1-Dev", torch_dtype=torch.bfloat16
).to(device)
pipeline.transformer.set_attn_processor(FlashFluxAttnProcessor3_0())
aoti_load_(pipeline.transformer, "zerogpu-aoti/flux-dev-aot", "flux-dev-aot.pt2")


@spaces.GPU
def generate_image(prompt: str, progress=gr.Progress(track_tqdm=True)):
    generator = torch.Generator(device='cuda').manual_seed(42)
    t0 = datetime.now()
    output = pipeline(
        prompt=prompt,
        num_inference_steps=28,
        generator=generator,
    )
    return [(output.images[0], f'{(datetime.now() - t0).total_seconds():.2f}s')]


gr.Interface(
    fn=generate_image,
    inputs=gr.Text(label="Prompt"),
    outputs=gr.Gallery(),
    examples=["A cat playing with a ball of yarn"],
    cache_examples=False,
).launch()