Spaces:

arjunk
/

text_to_image

Runtime error

File size: 3,175 Bytes

!pip install torch torchvision torchaudio
!git clone https://github.com/gnobitab/FuseDream.git
!pip install ftfy regex tqdm numpy scipy h5py lpips==0.1.4
!pip install git+https://github.com/openai/CLIP.git
!pip install gdown
!pip install gradio
!gdown 'https://drive.google.com/uc?id=17ymX6rhsgHDZw_g5XgAFW4xLSDocARCM'
!gdown 'https://drive.google.com/uc?id=1sOZ9og9kJLsqMNhaDnPJgzVsBZQ1sjZ5'


!ls
!cp biggan-256.pth FuseDream/BigGAN_utils/weights/
!cp biggan-512.pth FuseDream/BigGAN_utils/weights/
%cd FuseDream

import torch
from tqdm import tqdm
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
import torchvision
import BigGAN_utils.utils as utils
import clip
import torch.nn.functional as F
from DiffAugment_pytorch import DiffAugment
import numpy as np
from fusedream_utils import FuseDreamBaseGenerator, get_G, save_image
import gradio as gr


def Text_To_Img(prompt, INIT_ITERS, OPT_ITERS, NUM_BASIS, MODEL, seed):
  import sys
  sys.argv = [''] ### workaround to deal with the argparse in Jupyter

  utils.seed_rng(seed)
  if MODEL == "biggan-256":
    G, config = get_G(256) 
  elif MODEL == "biggan-512":
    G, config = get_G(512) 
  else:
    raise Exception('Model not supported')

  generator = FuseDreamBaseGenerator(G, config, 10)
  z_cllt, y_cllt = generator.generate_basis(prompt, init_iters=INIT_ITERS, num_basis=NUM_BASIS)
  z_cllt_save = torch.cat(z_cllt).cpu().numpy()
  y_cllt_save = torch.cat(y_cllt).cpu().numpy()
  img, z, y = generator.optimize_clip_score(z_cllt, y_cllt, sentence, latent_noise=False, augment=True, opt_iters=OPT_ITERS, optimize_y=True)

  score = generator.measureAugCLIP(z, y, sentence, augment=True, num_samples=20)

  return img, score


demo = gr.Interface(
    fn = Text_To_Img,
    inputs = [
              gr.inputs.Textbox(lines=1, placeholder="Enter your text prompt here.", default="", label="Prompt", optional=False),
              gr.inputs.Slider(100, 10000, step=20, default=500, label="Init_Iters"),
              gr.inputs.Slider(100, 10000, step=20, default=500, label="Opt_Iters"), 
              gr.inputs.Slider(0, 1000, step=5, default=5, label="Num_Basis"), 
              gr.inputs.Radio(["biggan-256", "biggan-512"], default="biggan-512", label="Model"), 
              gr.inputs.Slider(0, 1000, step=0, default=0, label="Seed")
              ],

    outputs = [
               gr.outputs.Image(type="auto", label="Generated Image"), 
               gr.outputs.Textbox(type="str", label="AugCLIP Score")
               ],

    #live=True,
    #examples = [
    #            ["A beautiful morning sunrise in the fields", 1000, 1000, 5, "biggan-512", 0],
    #            ["A black pyramid", 500, 500, 5, "biggan-512", 1729],
    #            ["Two dogs sitting on a bench in a park", 500, 500, 5, "biggan-256", 1234],
    #            ],
    #theme = "dark",
    title = "Text to Image Generation",
    description = "This was achieved using a combination of BIGGAN as the image generator and AugCLIP as the image-prompt correlator. Below are some examples. Try them for yourself!",
)


if __name__ == "__main__":
  app, local_url, share_url = demo.launch(debug = True)