!pip install torch torchvision torchaudio !git clone https://github.com/gnobitab/FuseDream.git !pip install ftfy regex tqdm numpy scipy h5py lpips==0.1.4 !pip install git+https://github.com/openai/CLIP.git !pip install gdown !pip install gradio !gdown 'https://drive.google.com/uc?id=17ymX6rhsgHDZw_g5XgAFW4xLSDocARCM' !gdown 'https://drive.google.com/uc?id=1sOZ9og9kJLsqMNhaDnPJgzVsBZQ1sjZ5' !ls !cp biggan-256.pth FuseDream/BigGAN_utils/weights/ !cp biggan-512.pth FuseDream/BigGAN_utils/weights/ %cd FuseDream import torch from tqdm import tqdm from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize import torchvision import BigGAN_utils.utils as utils import clip import torch.nn.functional as F from DiffAugment_pytorch import DiffAugment import numpy as np from fusedream_utils import FuseDreamBaseGenerator, get_G, save_image import gradio as gr def Text_To_Img(prompt, INIT_ITERS, OPT_ITERS, NUM_BASIS, MODEL, seed): import sys sys.argv = [''] ### workaround to deal with the argparse in Jupyter utils.seed_rng(seed) if MODEL == "biggan-256": G, config = get_G(256) elif MODEL == "biggan-512": G, config = get_G(512) else: raise Exception('Model not supported') generator = FuseDreamBaseGenerator(G, config, 10) z_cllt, y_cllt = generator.generate_basis(prompt, init_iters=INIT_ITERS, num_basis=NUM_BASIS) z_cllt_save = torch.cat(z_cllt).cpu().numpy() y_cllt_save = torch.cat(y_cllt).cpu().numpy() img, z, y = generator.optimize_clip_score(z_cllt, y_cllt, sentence, latent_noise=False, augment=True, opt_iters=OPT_ITERS, optimize_y=True) score = generator.measureAugCLIP(z, y, sentence, augment=True, num_samples=20) return img, score demo = gr.Interface( fn = Text_To_Img, inputs = [ gr.inputs.Textbox(lines=1, placeholder="Enter your text prompt here.", default="", label="Prompt", optional=False), gr.inputs.Slider(100, 10000, step=20, default=500, label="Init_Iters"), gr.inputs.Slider(100, 10000, step=20, default=500, label="Opt_Iters"), gr.inputs.Slider(0, 1000, step=5, default=5, label="Num_Basis"), gr.inputs.Radio(["biggan-256", "biggan-512"], default="biggan-512", label="Model"), gr.inputs.Slider(0, 1000, step=0, default=0, label="Seed") ], outputs = [ gr.outputs.Image(type="auto", label="Generated Image"), gr.outputs.Textbox(type="str", label="AugCLIP Score") ], #live=True, #examples = [ # ["A beautiful morning sunrise in the fields", 1000, 1000, 5, "biggan-512", 0], # ["A black pyramid", 500, 500, 5, "biggan-512", 1729], # ["Two dogs sitting on a bench in a park", 500, 500, 5, "biggan-256", 1234], # ], #theme = "dark", title = "Text to Image Generation", description = "This was achieved using a combination of BIGGAN as the image generator and AugCLIP as the image-prompt correlator. Below are some examples. Try them for yourself!", ) if __name__ == "__main__": app, local_url, share_url = demo.launch(debug = True)