zwl commited on
Commit
2a8cba1
1 Parent(s): 63f3205
Files changed (3) hide show
  1. app.py +272 -0
  2. nsfw.png +0 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import AutoencoderKL, UNet2DConditionModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
2
+ import gradio as gr
3
+ import torch
4
+ from PIL import Image
5
+ import os
6
+
7
+ scheduler = UniPCMultistepScheduler(
8
+ beta_start=0.00085,
9
+ beta_end =0.012,
10
+ solver_order=2,
11
+ prediction_type="epsilon",
12
+ thresholding=False,
13
+ solver_type='bh2',
14
+ lower_order_final=True,
15
+ disable_corrector=[0],
16
+ )
17
+
18
+ class Model:
19
+ def __init__(self, name, path, prefix):
20
+ self.name = name
21
+ self.path = path
22
+ self.prefix = prefix
23
+ self.pipe_t2i = None
24
+ self.pipe_i2i = None
25
+
26
+ models = [
27
+ Model("Stable-Diffusion-v1.4", "CompVis/stable-diffusion-v1-4", "The 1.4 version of official stable-diffusion"),
28
+ Model("Waifu", "hakurei/waifu-diffusion", "anime style"),
29
+ ]
30
+
31
+ last_mode = "txt2img"
32
+ current_model = models[0]
33
+ current_model_path = current_model.path
34
+
35
+ auth_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
36
+
37
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
38
+
39
+ if torch.cuda.is_available():
40
+ vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", torch_dtype=torch.float16, use_auth_token=auth_token)
41
+ for model in models:
42
+ try:
43
+ unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", torch_dtype=torch.float16, use_auth_token=auth_token)
44
+ model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
45
+ model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
46
+ except:
47
+ models.remove(model)
48
+ pipe = models[0].pipe_t2i
49
+ pipe = pipe.to("cuda")
50
+
51
+ else:
52
+ vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", use_auth_token=auth_token)
53
+ for model in models:
54
+ try:
55
+ unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", use_auth_token=auth_token)
56
+ model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
57
+ model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
58
+ except:
59
+ models.remove(model)
60
+ pipe = models[0].pipe_t2i
61
+ pipe = pipe.to("cpu")
62
+
63
+ device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
64
+
65
+ def inference(model_name, prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):
66
+
67
+ global current_model
68
+ for model in models:
69
+ if model.name == model_name:
70
+ current_model = model
71
+ model_path = current_model.path
72
+
73
+ generator = torch.Generator('cuda' if torch.cuda.is_available() else 'cpu').manual_seed(seed) if seed != 0 else None
74
+
75
+ if img is not None:
76
+ return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
77
+ else:
78
+ return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator)
79
+
80
+ def txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator=None):
81
+
82
+ global last_mode
83
+ global pipe
84
+ global current_model_path
85
+ if model_path != current_model_path or last_mode != "txt2img":
86
+ current_model_path = model_path
87
+
88
+ pipe.to("cpu")
89
+ pipe = current_model.pipe_t2i
90
+
91
+ if torch.cuda.is_available():
92
+ pipe = pipe.to("cuda")
93
+ last_mode = "txt2img"
94
+
95
+ prompt = current_model.prefix + prompt
96
+ result = pipe(
97
+ prompt,
98
+ negative_prompt = neg_prompt,
99
+ # num_images_per_prompt=n_images,
100
+ num_inference_steps = int(steps),
101
+ guidance_scale = guidance,
102
+ width = width,
103
+ height = height,
104
+ generator = generator)
105
+
106
+ return replace_nsfw_images(result)
107
+
108
+ def img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator=None):
109
+
110
+ global last_mode
111
+ global pipe
112
+ global current_model_path
113
+ if model_path != current_model_path or last_mode != "img2img":
114
+ current_model_path = model_path
115
+
116
+ pipe.to("cpu")
117
+ pipe = current_model.pipe_i2i
118
+
119
+ if torch.cuda.is_available():
120
+ pipe = pipe.to("cuda")
121
+ last_mode = "img2img"
122
+
123
+ prompt = current_model.prefix + prompt
124
+ ratio = min(height / img.height, width / img.width)
125
+ img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
126
+ result = pipe(
127
+ prompt,
128
+ negative_prompt = neg_prompt,
129
+ # num_images_per_prompt=n_images,
130
+ init_image = img,
131
+ num_inference_steps = int(steps),
132
+ strength = strength,
133
+ guidance_scale = guidance,
134
+ #width = width,
135
+ #height = height,
136
+ generator = generator)
137
+
138
+ return replace_nsfw_images(result)
139
+
140
+ def replace_nsfw_images(results):
141
+ for i in range(len(results.images)):
142
+ if results.nsfw_content_detected[i]:
143
+ results.images[i] = Image.open("nsfw.png")
144
+ return results.images[0]
145
+
146
+ css = """
147
+ <style>
148
+ .finetuned-diffusion-div {
149
+ text-align: center;
150
+ max-width: 700px;
151
+ margin: 0 auto;
152
+ font-family: 'IBM Plex Sans', sans-serif;
153
+ }
154
+ .finetuned-diffusion-div div {
155
+ display: inline-flex;
156
+ align-items: center;
157
+ gap: 0.8rem;
158
+ font-size: 1.75rem;
159
+ }
160
+ .finetuned-diffusion-div div h1 {
161
+ font-weight: 900;
162
+ margin-top: 15px;
163
+ margin-bottom: 15px;
164
+ text-align: center;
165
+ line-height: 150%;
166
+ }
167
+ .finetuned-diffusion-div p {
168
+ margin-bottom: 10px;
169
+ font-size: 94%;
170
+ }
171
+ .finetuned-diffusion-div p a {
172
+ text-decoration: underline;
173
+ }
174
+ .tabs {
175
+ margin-top: 0px;
176
+ margin-bottom: 0px;
177
+ }
178
+ #gallery {
179
+ min-height: 20rem;
180
+ }
181
+ .container {
182
+ max-width: 1000px;
183
+ margin: auto;
184
+ padding-top: 1.5rem;
185
+ }
186
+ </style>
187
+ """
188
+ with gr.Blocks(css=css) as demo:
189
+ gr.HTML(
190
+ f"""
191
+ <div class="finetuned-diffusion-div">
192
+ <div>
193
+ <h1>Stable-Diffusion with UniPC</h1>
194
+ </div>
195
+ <br>
196
+ <p>
197
+ ❤️ Acknowledgement: Hardware resources of this demo are supported by HuggingFace 🤗 . Many thanks for the help!
198
+ </p>
199
+ <br>
200
+ <p>
201
+ This is a demo of sampling by UniPC with two variants of Stable Diffusion models, including <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4">Stable-Diffusion-v1.4</a> and <a href="https://huggingface.co/hakurei/waifu-diffusion">Waifu</a>.
202
+ </p>
203
+ <br>
204
+ <p>
205
+ <a href="https://github.com/wl-zhao/UniPC">UniPC</a> is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders.
206
+ </p>
207
+ <p>
208
+ We use <a href="https://github.com/huggingface/diffusers">Diffusers</a> 🧨 to implement this demo, which currently supports the multistep UniPC scheduler. For more details of UniPC with Diffusers, check <a href="https://github.com/huggingface/diffusers/pull/2373">this pull request</a>.
209
+ </p>
210
+ <br>
211
+ <br>
212
+ <p>
213
+ Running on <b>{device}</b>
214
+ </p>
215
+ </div>
216
+ """
217
+ )
218
+
219
+ with gr.Row():
220
+
221
+ with gr.Column(scale=55):
222
+ with gr.Group():
223
+ model_name = gr.Dropdown(label="Model", choices=[m.name for m in models], value=current_model.name)
224
+ with gr.Row():
225
+ prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="Enter prompt. Style applied automatically").style(container=False)
226
+ generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
227
+
228
+
229
+ image_out = gr.Image(height=512)
230
+ # gallery = gr.Gallery(
231
+ # label="Generated images", show_label=False, elem_id="gallery"
232
+ # ).style(grid=[1], height="auto")
233
+
234
+ with gr.Column(scale=45):
235
+ with gr.Tab("Options"):
236
+ with gr.Group():
237
+ neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
238
+
239
+ # n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)
240
+
241
+ with gr.Row():
242
+ guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
243
+ steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=100, step=1)
244
+
245
+ with gr.Row():
246
+ width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
247
+ height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)
248
+
249
+ seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
250
+
251
+ with gr.Tab("Image to image"):
252
+ with gr.Group():
253
+ image = gr.Image(label="Image", height=256, tool="editor", type="pil")
254
+ strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
255
+
256
+ # model_name.change(lambda x: gr.update(visible = x == models[0].name), inputs=model_name, outputs=custom_model_group)
257
+
258
+ inputs = [model_name, prompt, guidance, steps, width, height, seed, image, strength, neg_prompt]
259
+ prompt.submit(inference, inputs=inputs, outputs=image_out)
260
+
261
+ generate.click(inference, inputs=inputs, outputs=image_out)
262
+
263
+
264
+ gr.Markdown('''
265
+ Stable-diffusion Models by [CompVis](https://huggingface.co/CompVis) and [stabilityai](https://huggingface.co/stabilityai), Waifu-diffusion models by [@hakurei](https://huggingface.co/hakurei). Most of the code of this demo are copied from [@anzorq's fintuned-diffusion](https://huggingface.co/spaces/anzorq/finetuned_diffusion/tree/main) ❤️<br>
266
+ Space by [Wenliang Zhao](https://github.com/wl-zhao).
267
+
268
+ ![visitors](https://visitor-badge.glitch.me/badge?page_id=wl-zhao.unipc_sdm)
269
+ ''')
270
+
271
+ demo.queue(concurrency_count=1)
272
+ demo.launch(debug=False, share=False)
nsfw.png ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ diffusers
3
+ transformers
4
+ scipy
5
+ ftfy
6
+ accelerate