import os import gradio as gr import torch from diffusers import DiffusionPipeline print(f"Is CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") pipe_sd = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, revision="fp16", use_auth_token=os.getenv("HUGGING_FACE_HUB_TOKEN")).to("cuda") pipe_vq = DiffusionPipeline.from_pretrained("microsoft/vq-diffusion-ithq", torch_dtype=torch.float16, revision="fp16").to("cuda") else: pipe_sd = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", use_auth_token=os.getenv("HUGGING_FACE_HUB_TOKEN")) pipe_vq = DiffusionPipeline.from_pretrained("microsoft/vq-diffusion-ithq") examples = [ ["A sketch of a palm tree."], ["A teddy bear playing in the pool."], ["A simple wedding cake with lego bride and groom topper and cake pops."], ["A realistic tree using a mixture of different colored pencils."], ["Muscular Santa Claus."], ["A man with a pineapple head."], ["Pebble tower standing on the left on the sea beach."], ] title = "VQ Diffusion vs. Stable Diffusion 1-5" description = "This demo compares [VQ-Diffusion-ITHQ](https://huggingface.co/microsoft/vq-diffusion-ithq) and [Stable-Diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) for text to image generation." def inference(text): output_sd = pipe_sd(text).images[0] output_vq_diffusion = pipe_vq(text, truncation_rate=0.86).images[0] return [output_vq_diffusion, output_sd] io = gr.Interface( inference, gr.Textbox(lines=3), outputs=[ gr.Image(type="pil", label="VQ-Diffusion"), gr.Image(type="pil", label="Stable Diffusion"), ], title=title, description=description, examples=examples ) io.launch()