helliun commited on
Commit
067f0ce
1 Parent(s): 1d82191

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -0
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydoc import describe
2
+ import gradio as gr
3
+ import torch
4
+ from omegaconf import OmegaConf
5
+ import sys
6
+ sys.path.append(".")
7
+ sys.path.append('./taming-transformers')
8
+ sys.path.append('./latent-diffusion')
9
+ from taming.models import vqgan
10
+ from ldm.util import instantiate_from_config
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ model_path_e = hf_hub_download(repo_id="multimodalart/compvis-latent-diffusion-text2img-large", filename="txt2img-f8-large.ckpt")
14
+
15
+ #@title Import stuff
16
+ import argparse, os, sys, glob
17
+ import numpy as np
18
+ from PIL import Image
19
+ from einops import rearrange
20
+ from torchvision.utils import make_grid
21
+ import transformers
22
+ import gc
23
+ from ldm.util import instantiate_from_config
24
+ from ldm.models.diffusion.ddim import DDIMSampler
25
+ from ldm.models.diffusion.plms import PLMSSampler
26
+ from open_clip import tokenizer
27
+ import open_clip
28
+
29
+ def load_model_from_config(config, ckpt, verbose=False):
30
+ print(f"Loading model from {ckpt}")
31
+ pl_sd = torch.load(ckpt, map_location="cuda")
32
+ sd = pl_sd["state_dict"]
33
+ model = instantiate_from_config(config.model)
34
+ m, u = model.load_state_dict(sd, strict=False)
35
+ if len(m) > 0 and verbose:
36
+ print("missing keys:")
37
+ print(m)
38
+ if len(u) > 0 and verbose:
39
+ print("unexpected keys:")
40
+ print(u)
41
+
42
+ model = model.half().cuda()
43
+ model.eval()
44
+ return model
45
+
46
+ def load_safety_model(clip_model):
47
+ """load the safety model"""
48
+ import autokeras as ak # pylint: disable=import-outside-toplevel
49
+ from tensorflow.keras.models import load_model # pylint: disable=import-outside-toplevel
50
+ from os.path import expanduser # pylint: disable=import-outside-toplevel
51
+
52
+ home = expanduser("~")
53
+
54
+ cache_folder = home + "/.cache/clip_retrieval/" + clip_model.replace("/", "_")
55
+ if clip_model == "ViT-L/14":
56
+ model_dir = cache_folder + "/clip_autokeras_binary_nsfw"
57
+ dim = 768
58
+ elif clip_model == "ViT-B/32":
59
+ model_dir = cache_folder + "/clip_autokeras_nsfw_b32"
60
+ dim = 512
61
+ else:
62
+ raise ValueError("Unknown clip model")
63
+ if not os.path.exists(model_dir):
64
+ os.makedirs(cache_folder, exist_ok=True)
65
+
66
+ from urllib.request import urlretrieve # pylint: disable=import-outside-toplevel
67
+
68
+ path_to_zip_file = cache_folder + "/clip_autokeras_binary_nsfw.zip"
69
+ if clip_model == "ViT-L/14":
70
+ url_model = "https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_binary_nsfw.zip"
71
+ elif clip_model == "ViT-B/32":
72
+ url_model = (
73
+ "https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_nsfw_b32.zip"
74
+ )
75
+ else:
76
+ raise ValueError("Unknown model {}".format(clip_model))
77
+ urlretrieve(url_model, path_to_zip_file)
78
+ import zipfile # pylint: disable=import-outside-toplevel
79
+
80
+ with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
81
+ zip_ref.extractall(cache_folder)
82
+
83
+ loaded_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS)
84
+ loaded_model.predict(np.random.rand(10 ** 3, dim).astype("float32"), batch_size=10 ** 3)
85
+
86
+ return loaded_model
87
+
88
+ def is_unsafe(safety_model, embeddings, threshold=0.5):
89
+ """find unsafe embeddings"""
90
+ nsfw_values = safety_model.predict(embeddings, batch_size=embeddings.shape[0])
91
+ x = np.array([e[0] for e in nsfw_values])
92
+ return True if x > threshold else False
93
+
94
+ config = OmegaConf.load("latent-diffusion/configs/latent-diffusion/txt2img-1p4B-eval.yaml")
95
+ model = load_model_from_config(config,model_path_e)
96
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
97
+ model = model.to(device)
98
+
99
+ #NSFW CLIP Filter
100
+ safety_model = load_safety_model("ViT-B/32")
101
+ clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
102
+
103
+ def run(prompt, steps, width, height, images, scale):
104
+ opt = argparse.Namespace(
105
+ prompt = prompt,
106
+ outdir='latent-diffusion/outputs',
107
+ ddim_steps = int(steps),
108
+ ddim_eta = 0,
109
+ n_iter = 1,
110
+ W=int(width),
111
+ H=int(height),
112
+ n_samples=int(images),
113
+ scale=scale,
114
+ plms=True
115
+ )
116
+
117
+ if opt.plms:
118
+ opt.ddim_eta = 0
119
+ sampler = PLMSSampler(model)
120
+ else:
121
+ sampler = DDIMSampler(model)
122
+
123
+ os.makedirs(opt.outdir, exist_ok=True)
124
+ outpath = opt.outdir
125
+
126
+ prompt = opt.prompt
127
+
128
+
129
+ sample_path = os.path.join(outpath, "samples")
130
+ os.makedirs(sample_path, exist_ok=True)
131
+ base_count = len(os.listdir(sample_path))
132
+
133
+ all_samples=list()
134
+ all_samples_images=list()
135
+ with torch.no_grad():
136
+ with torch.cuda.amp.autocast():
137
+ with model.ema_scope():
138
+ uc = None
139
+ if opt.scale > 0:
140
+ uc = model.get_learned_conditioning(opt.n_samples * [""])
141
+ for n in range(opt.n_iter):
142
+ c = model.get_learned_conditioning(opt.n_samples * [prompt])
143
+ shape = [4, opt.H//8, opt.W//8]
144
+ samples_ddim, _ = sampler.sample(S=opt.ddim_steps,
145
+ conditioning=c,
146
+ batch_size=opt.n_samples,
147
+ shape=shape,
148
+ verbose=False,
149
+ unconditional_guidance_scale=opt.scale,
150
+ unconditional_conditioning=uc,
151
+ eta=opt.ddim_eta)
152
+
153
+ x_samples_ddim = model.decode_first_stage(samples_ddim)
154
+ x_samples_ddim = torch.clamp((x_samples_ddim+1.0)/2.0, min=0.0, max=1.0)
155
+
156
+ for x_sample in x_samples_ddim:
157
+ x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
158
+ image_vector = Image.fromarray(x_sample.astype(np.uint8))
159
+ image_preprocess = preprocess(image_vector).unsqueeze(0)
160
+ with torch.no_grad():
161
+ image_features = clip_model.encode_image(image_preprocess)
162
+ image_features /= image_features.norm(dim=-1, keepdim=True)
163
+ query = image_features.cpu().detach().numpy().astype("float32")
164
+ unsafe = is_unsafe(safety_model,query,0.5)
165
+ if(not unsafe):
166
+ all_samples_images.append(image_vector)
167
+ else:
168
+ return(None,None,"Sorry, potential NSFW content was detected on your outputs by our NSFW detection model. Try again with different prompts. If you feel your prompt was not supposed to give NSFW outputs, this may be due to a bias in the model. Read more about biases in the Biases Acknowledgment section below.")
169
+ #Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join(sample_path, f"{base_count:04}.png"))
170
+ base_count += 1
171
+ all_samples.append(x_samples_ddim)
172
+
173
+
174
+ # additionally, save as grid
175
+ grid = torch.stack(all_samples, 0)
176
+ grid = rearrange(grid, 'n b c h w -> (n b) c h w')
177
+ grid = make_grid(grid, nrow=2)
178
+ # to image
179
+ grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
180
+
181
+ Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'{prompt.replace(" ", "-")}.png'))
182
+ return(Image.fromarray(grid.astype(np.uint8)),all_samples_images,None)
183
+
184
+ image = gr.outputs.Image(type="pil", label="Your result")
185
+ css = ".output-image{height: 528px !important} .output-carousel .output-image{height:272px !important} a{text-decoration: underline}"
186
+ iface = gr.Interface(fn=run, inputs=[
187
+ gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
188
+ gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1),
189
+ gr.inputs.Radio(label="Width", choices=[32,64,128,256],default=256),
190
+ gr.inputs.Radio(label="Height", choices=[32,64,128,256],default=256),
191
+ gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4),
192
+ gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0),
193
+ #gr.inputs.Slider(label="ETA - between 0 and 1. Lower values can provide better quality, higher values can be more diverse",default=0.0,minimum=0.0, maximum=1.0,step=0.1),
194
+ ],
195
+ outputs=[image,gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")],
196
+ css=css,
197
+ title="Generate images from text with Latent Diffusion LAION-400M",
198
+ description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
199
+ article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>")
200
+ iface.launch(enable_queue=True)