text_to_image / app.py
arjunk's picture
Update app.py
cd67e15
!pip install torch torchvision torchaudio
!git clone https://github.com/gnobitab/FuseDream.git
!pip install ftfy regex tqdm numpy scipy h5py lpips==0.1.4
!pip install git+https://github.com/openai/CLIP.git
!pip install gdown
!pip install gradio
!gdown 'https://drive.google.com/uc?id=17ymX6rhsgHDZw_g5XgAFW4xLSDocARCM'
!gdown 'https://drive.google.com/uc?id=1sOZ9og9kJLsqMNhaDnPJgzVsBZQ1sjZ5'
!ls
!cp biggan-256.pth FuseDream/BigGAN_utils/weights/
!cp biggan-512.pth FuseDream/BigGAN_utils/weights/
%cd FuseDream
import torch
from tqdm import tqdm
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
import torchvision
import BigGAN_utils.utils as utils
import clip
import torch.nn.functional as F
from DiffAugment_pytorch import DiffAugment
import numpy as np
from fusedream_utils import FuseDreamBaseGenerator, get_G, save_image
import gradio as gr
def Text_To_Img(prompt, INIT_ITERS, OPT_ITERS, NUM_BASIS, MODEL, seed):
import sys
sys.argv = [''] ### workaround to deal with the argparse in Jupyter
utils.seed_rng(seed)
if MODEL == "biggan-256":
G, config = get_G(256)
elif MODEL == "biggan-512":
G, config = get_G(512)
else:
raise Exception('Model not supported')
generator = FuseDreamBaseGenerator(G, config, 10)
z_cllt, y_cllt = generator.generate_basis(prompt, init_iters=INIT_ITERS, num_basis=NUM_BASIS)
z_cllt_save = torch.cat(z_cllt).cpu().numpy()
y_cllt_save = torch.cat(y_cllt).cpu().numpy()
img, z, y = generator.optimize_clip_score(z_cllt, y_cllt, sentence, latent_noise=False, augment=True, opt_iters=OPT_ITERS, optimize_y=True)
score = generator.measureAugCLIP(z, y, sentence, augment=True, num_samples=20)
return img, score
demo = gr.Interface(
fn = Text_To_Img,
inputs = [
gr.inputs.Textbox(lines=1, placeholder="Enter your text prompt here.", default="", label="Prompt", optional=False),
gr.inputs.Slider(100, 10000, step=20, default=500, label="Init_Iters"),
gr.inputs.Slider(100, 10000, step=20, default=500, label="Opt_Iters"),
gr.inputs.Slider(0, 1000, step=5, default=5, label="Num_Basis"),
gr.inputs.Radio(["biggan-256", "biggan-512"], default="biggan-512", label="Model"),
gr.inputs.Slider(0, 1000, step=0, default=0, label="Seed")
],
outputs = [
gr.outputs.Image(type="auto", label="Generated Image"),
gr.outputs.Textbox(type="str", label="AugCLIP Score")
],
#live=True,
#examples = [
# ["A beautiful morning sunrise in the fields", 1000, 1000, 5, "biggan-512", 0],
# ["A black pyramid", 500, 500, 5, "biggan-512", 1729],
# ["Two dogs sitting on a bench in a park", 500, 500, 5, "biggan-256", 1234],
# ],
#theme = "dark",
title = "Text to Image Generation",
description = "This was achieved using a combination of BIGGAN as the image generator and AugCLIP as the image-prompt correlator. Below are some examples. Try them for yourself!",
)
if __name__ == "__main__":
app, local_url, share_url = demo.launch(debug = True)