""" app.py An interactive demo for text-guided panorama generation. """ import os from os.path import join from PIL import Image import torch import gradio as gr from syncdiffusion.syncdiffusion_model import SyncDiffusion from syncdiffusion.utils import seed_everything # set device device = torch.device("cuda") # load SyncDiffusion model syncdiffusion = SyncDiffusion(device, sd_version="2.0") def run_inference( prompt: str, width: int = 2048, sync_weight: float = 20.0, sync_thres: int = 5, seed: int = 0 ): # set random seed seed_everything(seed) img = syncdiffusion.sample_syncdiffusion( prompts = prompt, negative_prompts = "", height = 512, width = width, num_inference_steps = 50, guidance_scale = 7.5, sync_weight = sync_weight, sync_decay_rate = 0.99, sync_freq = 1, sync_thres = sync_thres, stride = 16 ) return [img] if __name__=="__main__": title = "SyncDiffusion: Text-Guided Panorama Generation" description_text = ''' This demo features text-guided panorama generation from our work SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions, NeurIPS 2023. Please refer to our project page for details. (Note: Generation takes about 100 seconds for the default setting below, with the current A10G GPU.) ''' # create UI with gr.Blocks(title=title) as demo: # description of demo gr.Markdown(description_text) # inputs with gr.Row(): with gr.Column(): run_button = gr.Button(label="Generate") prompt = gr.Textbox(label="Text Prompt", value='a cinematic view of a castle in the sunset') width = gr.Slider(label="Width", minimum=512, maximum=3072, value=2048, step=128) sync_weight = gr.Slider(label="Sync Weight", minimum=0.0, maximum=30.0, value=20.0, step=5.0) sync_thres = gr.Slider(label="Sync Threshold (If N, apply SyncDiffusion for the first N steps)", minimum=0, maximum=15, value=5, step=1) seed = gr.Number(label="Seed", value=0) with gr.Column(): result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto') # display examples examples = gr.Examples( examples=[ ['a cinematic view of a castle in the sunset', 2048, 20.0, 5, 1], ['natural landscape in anime style illustration', 2048, 20.0, 5, 2], ['a photo of a lake under the northern lights', 2048, 20.0, 5, 6] ], inputs=[prompt, width, sync_weight, sync_thres, seed], ) # display example images with gr.Row(): gr.Image(Image.open(join("assets", "result_castle_seed_1.png")), label="Sample output 1") with gr.Row(): gr.Textbox(label="Prompt", type="text", value="a cinematic view of a castle in the sunset") with gr.Row(): gr.Image(Image.open(join("assets", "result_natural_seed_2.png")), label="Sample output 2") with gr.Row(): gr.Textbox(label="Prompt", type="text", value="natural landscape in anime style illustration") with gr.Row(): gr.Image(Image.open(join("assets", "result_northern_seed_6.png")), label="Sample output 3") with gr.Row(): gr.Textbox(label="Prompt", type="text", value="a photo of a lake under the northern lights") ips = [prompt, width, sync_weight, sync_thres, seed] run_button.click(fn=run_inference, inputs=ips, outputs=[result_gallery]) demo.queue(max_size=30) demo.launch()