itsVilen commited on
Commit
0195b80
1 Parent(s): 2b79f48

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +351 -0
app.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ import PIL.Image
7
+ import torch
8
+ import torchvision.transforms.functional as TF
9
+
10
+ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
11
+ from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
12
+ from controlnet_aux import PidiNetDetector, HEDdetector
13
+ from diffusers.utils import load_image
14
+ from huggingface_hub import HfApi
15
+ from pathlib import Path
16
+ from PIL import Image, ImageOps
17
+ import torch
18
+ import numpy as np
19
+ import cv2
20
+ import os
21
+ import random
22
+ import spaces
23
+ from gradio_imageslider import ImageSlider
24
+
25
+ js_func = """
26
+ function refresh() {
27
+ const url = new URL(window.location);
28
+ }
29
+ """
30
+ def nms(x, t, s):
31
+ x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
32
+
33
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
34
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
35
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
36
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
37
+
38
+ y = np.zeros_like(x)
39
+
40
+ for f in [f1, f2, f3, f4]:
41
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
42
+
43
+ z = np.zeros_like(y, dtype=np.uint8)
44
+ z[y > t] = 255
45
+ return z
46
+
47
+ def HWC3(x):
48
+ assert x.dtype == np.uint8
49
+ if x.ndim == 2:
50
+ x = x[:, :, None]
51
+ assert x.ndim == 3
52
+ H, W, C = x.shape
53
+ assert C == 1 or C == 3 or C == 4
54
+ if C == 3:
55
+ return x
56
+ if C == 1:
57
+ return np.concatenate([x, x, x], axis=2)
58
+ if C == 4:
59
+ color = x[:, :, 0:3].astype(np.float32)
60
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
61
+ y = color * alpha + 255.0 * (1.0 - alpha)
62
+ y = y.clip(0, 255).astype(np.uint8)
63
+ return y
64
+
65
+ DESCRIPTION = ''''''
66
+
67
+ if not torch.cuda.is_available():
68
+ DESCRIPTION += ""
69
+
70
+ style_list = [
71
+ {
72
+ "name": "(No style)",
73
+ "prompt": "{prompt}",
74
+ "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
75
+ },
76
+ {
77
+ "name": "Cinematic",
78
+ "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
79
+ "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
80
+ },
81
+ {
82
+ "name": "3D Model",
83
+ "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
84
+ "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
85
+ },
86
+ {
87
+ "name": "Anime",
88
+ "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
89
+ "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
90
+ },
91
+ {
92
+ "name": "Digital Art",
93
+ "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
94
+ "negative_prompt": "photo, photorealistic, realism, ugly",
95
+ },
96
+ {
97
+ "name": "Photographic",
98
+ "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
99
+ "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
100
+ },
101
+ {
102
+ "name": "Pixel art",
103
+ "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
104
+ "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
105
+ },
106
+ {
107
+ "name": "Fantasy art",
108
+ "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
109
+ "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
110
+ },
111
+ {
112
+ "name": "Neonpunk",
113
+ "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
114
+ "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
115
+ },
116
+ {
117
+ "name": "Manga",
118
+ "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
119
+ "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
120
+ },
121
+ ]
122
+
123
+ styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
124
+ STYLE_NAMES = list(styles.keys())
125
+ DEFAULT_STYLE_NAME = "(No style)"
126
+
127
+
128
+ def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
129
+ p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
130
+ return p.replace("{prompt}", positive), n + negative
131
+
132
+
133
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
134
+
135
+ eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
136
+
137
+
138
+ controlnet = ControlNetModel.from_pretrained(
139
+ "xinsir/controlnet-union-sdxl-1.0",
140
+ torch_dtype=torch.float16
141
+ )
142
+ controlnet_canny = ControlNetModel.from_pretrained(
143
+ "xinsir/controlnet-union-sdxl-1.0",
144
+ torch_dtype=torch.float16
145
+ )
146
+ # when test with other base model, you need to change the vae also.
147
+ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
148
+
149
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
150
+ "stabilityai/stable-diffusion-xl-base-1.0",
151
+ controlnet=controlnet,
152
+ vae=vae,
153
+ torch_dtype=torch.float16,
154
+ scheduler=eulera_scheduler,
155
+ )
156
+ pipe.to(device)
157
+ # Load model.
158
+ pipe_canny = StableDiffusionXLControlNetPipeline.from_pretrained(
159
+ "SG161222/RealVisXL_V3.0_Turbo",
160
+ controlnet=controlnet_canny,
161
+ vae=vae,
162
+ safety_checker=None,
163
+ torch_dtype=torch.float16,
164
+ scheduler=eulera_scheduler,
165
+ )
166
+
167
+ pipe_canny.to(device)
168
+
169
+ MAX_SEED = np.iinfo(np.int32).max
170
+ processor = HEDdetector.from_pretrained('lllyasviel/Annotators')
171
+ def nms(x, t, s):
172
+ x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
173
+
174
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
175
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
176
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
177
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
178
+
179
+ y = np.zeros_like(x)
180
+
181
+ for f in [f1, f2, f3, f4]:
182
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
183
+
184
+ z = np.zeros_like(y, dtype=np.uint8)
185
+ z[y > t] = 255
186
+ return z
187
+
188
+ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
189
+ if randomize_seed:
190
+ seed = random.randint(0, MAX_SEED)
191
+ return seed
192
+
193
+ @spaces.GPU
194
+ def run(
195
+ image: dict,
196
+ prompt: str,
197
+ negative_prompt: str,
198
+ style_name: str = DEFAULT_STYLE_NAME,
199
+ num_steps: int = 25,
200
+ guidance_scale: float = 5,
201
+ controlnet_conditioning_scale: float = 1.0,
202
+ seed: int = 0,
203
+ use_hed: bool = False,
204
+ use_canny: bool = False,
205
+ progress=gr.Progress(track_tqdm=True),
206
+ ) -> PIL.Image.Image:
207
+ # Get the composite image from the EditorValue dict
208
+ composite_image = image['composite']
209
+ width, height = composite_image.size
210
+
211
+ # Calculate new dimensions to fit within 1024x1024 while maintaining aspect ratio
212
+ max_size = 1024
213
+ ratio = min(max_size / width, max_size / height)
214
+ new_width = int(width * ratio)
215
+ new_height = int(height * ratio)
216
+
217
+ # Resize the image
218
+ resized_image = composite_image.resize((new_width, new_height), Image.LANCZOS)
219
+
220
+ if use_canny:
221
+ controlnet_img = np.array(resized_image)
222
+ controlnet_img = cv2.Canny(controlnet_img, 100, 200)
223
+ controlnet_img = HWC3(controlnet_img)
224
+ image = Image.fromarray(controlnet_img)
225
+ elif not use_hed:
226
+ controlnet_img = resized_image
227
+ image = resized_image
228
+ else:
229
+ controlnet_img = processor(resized_image, scribble=False)
230
+ controlnet_img = np.array(controlnet_img)
231
+ controlnet_img = nms(controlnet_img, 127, 3)
232
+ controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
233
+ random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
234
+ controlnet_img[controlnet_img > random_val] = 255
235
+ controlnet_img[controlnet_img < 255] = 0
236
+ image = Image.fromarray(controlnet_img)
237
+
238
+ prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
239
+
240
+ generator = torch.Generator(device=device).manual_seed(seed)
241
+
242
+ if use_canny:
243
+ out = pipe_canny(
244
+ prompt=prompt,
245
+ negative_prompt=negative_prompt,
246
+ image=image,
247
+ num_inference_steps=num_steps,
248
+ generator=generator,
249
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
250
+ guidance_scale=guidance_scale,
251
+ width=new_width,
252
+ height=new_height,
253
+ ).images[0]
254
+ else:
255
+ out = pipe(
256
+ prompt=prompt,
257
+ negative_prompt=negative_prompt,
258
+ image=image,
259
+ num_inference_steps=num_steps,
260
+ generator=generator,
261
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
262
+ guidance_scale=guidance_scale,
263
+ width=new_width,
264
+ height=new_height,
265
+ ).images[0]
266
+
267
+ return (controlnet_img, out)
268
+
269
+ with gr.Blocks(css="style.css", js=js_func) as demo:
270
+ gr.Markdown(DESCRIPTION, elem_id="description")
271
+ gr.DuplicateButton(
272
+ value="Duplicate Space for private use",
273
+ elem_id="duplicate-button",
274
+ visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
275
+ )
276
+
277
+ with gr.Row():
278
+ with gr.Column():
279
+ with gr.Group():
280
+ image = gr.ImageEditor(type="pil", label="Sketch your image or upload one", width=512, height=512)
281
+ prompt = gr.Textbox(label="Prompt")
282
+ style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
283
+ use_hed = gr.Checkbox(label="use HED detector", value=False, info="check this box if you upload an image and want to turn it to a sketch")
284
+ use_canny = gr.Checkbox(label="use Canny", value=False, info="check this to use ControlNet canny instead of scribble")
285
+ run_button = gr.Button("Run")
286
+ with gr.Accordion("Advanced options", open=False):
287
+ negative_prompt = gr.Textbox(
288
+ label="Negative prompt",
289
+ value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
290
+ )
291
+ num_steps = gr.Slider(
292
+ label="Number of steps",
293
+ minimum=1,
294
+ maximum=50,
295
+ step=1,
296
+ value=25,
297
+ )
298
+ guidance_scale = gr.Slider(
299
+ label="Guidance scale",
300
+ minimum=0.1,
301
+ maximum=10.0,
302
+ step=0.1,
303
+ value=5,
304
+ )
305
+ controlnet_conditioning_scale = gr.Slider(
306
+ label="controlnet conditioning scale",
307
+ minimum=0.5,
308
+ maximum=5.0,
309
+ step=0.1,
310
+ value=0.9,
311
+ )
312
+ seed = gr.Slider(
313
+ label="Seed",
314
+ minimum=0,
315
+ maximum=MAX_SEED,
316
+ step=1,
317
+ value=0,
318
+ )
319
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
320
+
321
+ with gr.Column():
322
+ with gr.Group():
323
+ image_slider = ImageSlider(position=0.5)
324
+
325
+
326
+ inputs = [
327
+ image,
328
+ prompt,
329
+ negative_prompt,
330
+ style,
331
+ num_steps,
332
+ guidance_scale,
333
+ controlnet_conditioning_scale,
334
+ seed,
335
+ use_hed,
336
+ use_canny
337
+ ]
338
+ outputs = [image_slider]
339
+ run_button.click(
340
+ fn=randomize_seed_fn,
341
+ inputs=[seed, randomize_seed],
342
+ outputs=seed,
343
+ queue=False,
344
+ api_name=False,
345
+ ).then(lambda x: None, inputs=None, outputs=image_slider).then(
346
+ fn=run, inputs=inputs, outputs=outputs
347
+ )
348
+
349
+
350
+
351
+ demo.queue().launch(show_error=True, ssl_verify=False)