import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np import streamlit as st from PIL import Image import clip from dalle.models import Dalle from dalle.utils.utils import clip_score, download url = "https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz" root = os.path.expanduser("~/.cache/minDALL-E") filename = os.path.basename(url) pathname = filename[:-len('.tar.gz')] expected_md5 = url.split("/")[-2] download_target = os.path.join(root, filename) result_path = os.path.join(root, pathname) if not os.path.exists(result_path): result_path = download(url, root) device = "cpu" model = Dalle.from_pretrained("minDALL-E/1.3B") # This will automatically download the pretrained model. model.to(device=device) model_clip, preprocess_clip = clip.load("ViT-B/32", device=device) model_clip.to(device=device) def sample(prompt): # Sampling images = ( model.sampling(prompt=prompt, top_k=256, top_p=None, softmax_temperature=1.0, num_candidates=3, device=device) .cpu() .numpy() ) images = np.transpose(images, (0, 2, 3, 1)) # CLIP Re-ranking rank = clip_score( prompt=prompt, images=images, model_clip=model_clip, preprocess_clip=preprocess_clip, device=device ) # Save images images = images[rank] # print(rank, images.shape) pil_images = [] for i in range(len(images)): im = Image.fromarray((images[i] * 255).astype(np.uint8)) pil_images.append(im) # im = Image.fromarray((images[0] * 255).astype(np.uint8)) return pil_images st.header("minDALL-E") st.subheader("Generate images from text") prompt = st.text_input("What do you want to see?") DEBUG = False if prompt != "": container = st.empty() container.markdown( f"""
Predictions may take up to 40s under high load. Please stand by. """, unsafe_allow_html=True, ) print(f"Getting selections: {prompt}") selected = sample(prompt) margin = 0.1 #for better position of zoom in arrow n_columns = 3 cols = st.columns([1] + [margin, 1] * (n_columns - 1)) for i, img in enumerate(selected): cols[(i % n_columns) * 2].image(img) container.markdown(f"**{prompt}**") st.button("Again!", key="again_button")