xi0v commited on
Commit
48fd452
·
verified ·
1 Parent(s): dc34a81

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.whl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ venv/
3
+ public/
4
+ *.pem
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: SDXL Enhancer
3
- emoji: 🌖
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.31.5
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: SDXL Image Enhancer
3
+ emoji: 🔍🕵️
4
+ colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.29.0
8
  app_file: app.py
9
  pinned: false
10
+ suggested_hardware: t4-medium
11
+ disable_embedding: true
12
+ short_description: Creative Upscaler High-Res Image Generation HiDiffusion SDXL
13
  ---
 
 
app.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from gradio_imageslider import ImageSlider
4
+ import torch
5
+
6
+ torch.jit.script = lambda f: f
7
+ from hidiffusion import apply_hidiffusion
8
+ from diffusers import (
9
+ ControlNetModel,
10
+ StableDiffusionXLControlNetImg2ImgPipeline,
11
+ DDIMScheduler,
12
+ )
13
+ from controlnet_aux import AnylineDetector
14
+ from compel import Compel, ReturnedEmbeddingsType
15
+ from PIL import Image
16
+ import os
17
+ import time
18
+ import numpy as np
19
+
20
+ IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
21
+ IS_SPACE = os.environ.get("SPACE_ID", None) is not None
22
+
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ dtype = torch.float16
25
+
26
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
27
+
28
+ print(f"device: {device}")
29
+ print(f"dtype: {dtype}")
30
+ print(f"low memory: {LOW_MEMORY}")
31
+
32
+
33
+ model = "stabilityai/stable-diffusion-xl-base-1.0"
34
+ # model = "stabilityai/sdxl-turbo"
35
+ # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
36
+ scheduler = DDIMScheduler.from_pretrained(model, subfolder="scheduler")
37
+ # controlnet = ControlNetModel.from_pretrained(
38
+ # "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
39
+ # )
40
+ controlnet = ControlNetModel.from_pretrained(
41
+ "TheMistoAI/MistoLine",
42
+ torch_dtype=torch.float16,
43
+ revision="refs/pr/3",
44
+ variant="fp16",
45
+ )
46
+ pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
47
+ model,
48
+ controlnet=controlnet,
49
+ torch_dtype=dtype,
50
+ variant="fp16",
51
+ use_safetensors=True,
52
+ scheduler=scheduler,
53
+ )
54
+
55
+ compel = Compel(
56
+ tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
57
+ text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
58
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
59
+ requires_pooled=[False, True],
60
+ )
61
+ pipe = pipe.to(device)
62
+
63
+ if not IS_SPACES_ZERO:
64
+ apply_hidiffusion(pipe)
65
+ # pipe.enable_xformers_memory_efficient_attention()
66
+ pipe.enable_model_cpu_offload()
67
+ pipe.enable_vae_tiling()
68
+
69
+ anyline = AnylineDetector.from_pretrained(
70
+ "TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
71
+ ).to(device)
72
+
73
+
74
+ def pad_image(image):
75
+ w, h = image.size
76
+ if w == h:
77
+ return image
78
+ elif w > h:
79
+ new_image = Image.new(image.mode, (w, w), (0, 0, 0))
80
+ pad_w = 0
81
+ pad_h = (w - h) // 2
82
+ new_image.paste(image, (0, pad_h))
83
+ return new_image
84
+ else:
85
+ new_image = Image.new(image.mode, (h, h), (0, 0, 0))
86
+ pad_w = (h - w) // 2
87
+ pad_h = 0
88
+ new_image.paste(image, (pad_w, 0))
89
+ return new_image
90
+
91
+
92
+ @spaces.GPU
93
+ def predict(
94
+ input_image,
95
+ prompt,
96
+ negative_prompt,
97
+ seed,
98
+ guidance_scale=8.5,
99
+ scale=2,
100
+ controlnet_conditioning_scale=0.5,
101
+ strength=1.0,
102
+ controlnet_start=0.0,
103
+ controlnet_end=1.0,
104
+ guassian_sigma=2.0,
105
+ intensity_threshold=3,
106
+ progress=gr.Progress(track_tqdm=True),
107
+ ):
108
+ if IS_SPACES_ZERO:
109
+ apply_hidiffusion(pipe)
110
+ if input_image is None:
111
+ raise gr.Error("Please upload an image.")
112
+ padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
113
+ conditioning, pooled = compel([prompt, negative_prompt])
114
+ generator = torch.manual_seed(seed)
115
+ last_time = time.time()
116
+ anyline_image = anyline(
117
+ padded_image,
118
+ detect_resolution=1280,
119
+ guassian_sigma=max(0.01, guassian_sigma),
120
+ intensity_threshold=intensity_threshold,
121
+ )
122
+
123
+ images = pipe(
124
+ image=padded_image,
125
+ control_image=anyline_image,
126
+ strength=strength,
127
+ prompt_embeds=conditioning[0:1],
128
+ pooled_prompt_embeds=pooled[0:1],
129
+ negative_prompt_embeds=conditioning[1:2],
130
+ negative_pooled_prompt_embeds=pooled[1:2],
131
+ width=1024 * scale,
132
+ height=1024 * scale,
133
+ controlnet_conditioning_scale=float(controlnet_conditioning_scale),
134
+ controlnet_start=float(controlnet_start),
135
+ controlnet_end=float(controlnet_end),
136
+ generator=generator,
137
+ num_inference_steps=30,
138
+ guidance_scale=guidance_scale,
139
+ eta=1.0,
140
+ )
141
+ print(f"Time taken: {time.time() - last_time}")
142
+ return (padded_image, images.images[0]), padded_image, anyline_image
143
+
144
+
145
+ css = """
146
+ #intro{
147
+ # max-width: 32rem;
148
+ # text-align: center;
149
+ # margin: 0 auto;
150
+ }
151
+ """
152
+
153
+ with gr.Blocks(css=css) as demo:
154
+ gr.Markdown(
155
+ """
156
+ # Enhance This
157
+ ### HiDiffusion SDXL
158
+
159
+ [HiDiffusion](https://github.com/megvii-research/HiDiffusion) enables higher-resolution image generation.
160
+ You can upload an initial image and prompt to generate an enhanced version.
161
+ SDXL Controlnet [TheMistoAI/MistoLine](https://huggingface.co/TheMistoAI/MistoLine)
162
+ [Duplicate Space](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL?duplicate=true) to avoid the queue.
163
+
164
+ <small>
165
+ <b>Notes</b> The author advises against the term "super resolution" because it's more like image-to-image generation than enhancement, but it's still a lot of fun!
166
+
167
+ </small>
168
+ """,
169
+ elem_id="intro",
170
+ )
171
+ with gr.Row():
172
+ with gr.Column(scale=1):
173
+ image_input = gr.Image(type="pil", label="Input Image")
174
+ prompt = gr.Textbox(
175
+ label="Prompt",
176
+ info="The prompt is very important to get the desired results. Please try to describe the image as best as you can. Accepts Compel Syntax",
177
+ )
178
+ negative_prompt = gr.Textbox(
179
+ label="Negative Prompt",
180
+ value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
181
+ )
182
+ seed = gr.Slider(
183
+ minimum=0,
184
+ maximum=2**64 - 1,
185
+ value=1415926535897932,
186
+ step=1,
187
+ label="Seed",
188
+ randomize=True,
189
+ )
190
+ with gr.Accordion(label="Advanced", open=False):
191
+ guidance_scale = gr.Slider(
192
+ minimum=0,
193
+ maximum=50,
194
+ value=8.5,
195
+ step=0.001,
196
+ label="Guidance Scale",
197
+ )
198
+ scale = gr.Slider(
199
+ minimum=1,
200
+ maximum=5,
201
+ value=2,
202
+ step=1,
203
+ label="Magnification Scale",
204
+ interactive=not IS_SPACE,
205
+ )
206
+ controlnet_conditioning_scale = gr.Slider(
207
+ minimum=0,
208
+ maximum=1,
209
+ step=0.001,
210
+ value=0.5,
211
+ label="ControlNet Conditioning Scale",
212
+ )
213
+ strength = gr.Slider(
214
+ minimum=0,
215
+ maximum=1,
216
+ step=0.001,
217
+ value=1,
218
+ label="Strength",
219
+ )
220
+ controlnet_start = gr.Slider(
221
+ minimum=0,
222
+ maximum=1,
223
+ step=0.001,
224
+ value=0.0,
225
+ label="ControlNet Start",
226
+ )
227
+ controlnet_end = gr.Slider(
228
+ minimum=0.0,
229
+ maximum=1.0,
230
+ step=0.001,
231
+ value=1.0,
232
+ label="ControlNet End",
233
+ )
234
+ guassian_sigma = gr.Slider(
235
+ minimum=0.01,
236
+ maximum=10.0,
237
+ step=0.1,
238
+ value=2.0,
239
+ label="(Anyline) Guassian Sigma",
240
+ )
241
+ intensity_threshold = gr.Slider(
242
+ minimum=0,
243
+ maximum=255,
244
+ step=1,
245
+ value=3,
246
+ label="(Anyline) Intensity Threshold",
247
+ )
248
+
249
+ btn = gr.Button()
250
+ with gr.Column(scale=2):
251
+ with gr.Group():
252
+ image_slider = ImageSlider(position=0.5)
253
+ with gr.Row():
254
+ padded_image = gr.Image(type="pil", label="Padded Image")
255
+ anyline_image = gr.Image(type="pil", label="Anyline Image")
256
+ inputs = [
257
+ image_input,
258
+ prompt,
259
+ negative_prompt,
260
+ seed,
261
+ guidance_scale,
262
+ scale,
263
+ controlnet_conditioning_scale,
264
+ strength,
265
+ controlnet_start,
266
+ controlnet_end,
267
+ guassian_sigma,
268
+ intensity_threshold,
269
+ ]
270
+ outputs = [image_slider, padded_image, anyline_image]
271
+ btn.click(lambda x: None, inputs=None, outputs=image_slider).then(
272
+ fn=predict, inputs=inputs, outputs=outputs
273
+ )
274
+ gr.Examples(
275
+ fn=predict,
276
+ inputs=inputs,
277
+ outputs=outputs,
278
+ examples=[
279
+ [
280
+ "./examples/lara.jpeg",
281
+ "photography of lara croft 8k high definition award winning",
282
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
283
+ 5436236241,
284
+ 8.5,
285
+ 2,
286
+ 0.8,
287
+ 1.0,
288
+ 0.0,
289
+ 0.9,
290
+ 2,
291
+ 3,
292
+ ],
293
+ [
294
+ "./examples/cybetruck.jpeg",
295
+ "photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future",
296
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
297
+ 383472451451,
298
+ 8.5,
299
+ 2,
300
+ 0.8,
301
+ 0.8,
302
+ 0.0,
303
+ 0.9,
304
+ 2,
305
+ 3,
306
+ ],
307
+ [
308
+ "./examples/jesus.png",
309
+ "a photorealistic painting of Jesus Christ, 4k high definition",
310
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
311
+ 13317204146129588000,
312
+ 8.5,
313
+ 2,
314
+ 0.8,
315
+ 0.8,
316
+ 0.0,
317
+ 0.9,
318
+ 2,
319
+ 3,
320
+ ],
321
+ [
322
+ "./examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg",
323
+ "A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow",
324
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
325
+ 5623124123512,
326
+ 8.5,
327
+ 2,
328
+ 0.8,
329
+ 0.8,
330
+ 0.0,
331
+ 0.9,
332
+ 2,
333
+ 3,
334
+ ],
335
+ [
336
+ "./examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg",
337
+ "a large red flower on a black background 4k high definition",
338
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
339
+ 23123412341234,
340
+ 8.5,
341
+ 2,
342
+ 0.8,
343
+ 0.8,
344
+ 0.0,
345
+ 0.9,
346
+ 2,
347
+ 3,
348
+ ],
349
+ [
350
+ "./examples/huggingface.jpg",
351
+ "photo realistic huggingface human emoji costume, round, yellow, (human skin)+++ (human texture)+++",
352
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic, emoji cartoon, drawing, pixelated",
353
+ 12312353423,
354
+ 15.206,
355
+ 2,
356
+ 0.364,
357
+ 0.8,
358
+ 0.0,
359
+ 0.9,
360
+ 2,
361
+ 3,
362
+ ],
363
+ ],
364
+ cache_examples="lazy",
365
+ )
366
+
367
+
368
+ demo.queue(api_open=False)
369
+ demo.launch(show_api=False)
canny_gpu.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision.transforms import ToTensor, ToPILImage
4
+ from PIL import Image
5
+
6
+
7
+ class SobelOperator(nn.Module):
8
+ SOBEL_KERNEL_X = torch.tensor(
9
+ [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]]
10
+ )
11
+ SOBEL_KERNEL_Y = torch.tensor(
12
+ [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]]
13
+ )
14
+
15
+ def __init__(self, device="cuda"):
16
+ super(SobelOperator, self).__init__()
17
+ self.device = device
18
+ self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
19
+ self.device
20
+ )
21
+ self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
22
+ self.device
23
+ )
24
+ self.edge_conv_x.weight = nn.Parameter(
25
+ self.SOBEL_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
26
+ )
27
+ self.edge_conv_y.weight = nn.Parameter(
28
+ self.SOBEL_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
29
+ )
30
+
31
+ @torch.no_grad()
32
+ def forward(
33
+ self,
34
+ image: Image.Image,
35
+ low_threshold: float,
36
+ high_threshold: float,
37
+ output_type="pil",
38
+ ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
39
+ # Convert PIL image to PyTorch tensor
40
+ image_gray = image.convert("L")
41
+ image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
42
+
43
+ # Compute gradients
44
+ edge_x = self.edge_conv_x(image_tensor)
45
+ edge_y = self.edge_conv_y(image_tensor)
46
+ edge = torch.sqrt(torch.square(edge_x) + torch.square(edge_y))
47
+
48
+ # Apply thresholding
49
+ edge.div_(edge.max()) # Normalize to 0-1 (in-place operation)
50
+ edge[edge >= high_threshold] = 1.0
51
+ edge[edge <= low_threshold] = 0.0
52
+
53
+ # Convert the result back to a PIL image
54
+ if output_type == "pil":
55
+ return ToPILImage()(edge.squeeze(0).cpu())
56
+ elif output_type == "tensor":
57
+ return edge
58
+ elif output_type == "pil,tensor":
59
+ return ToPILImage()(edge.squeeze(0).cpu()), edge
60
+
61
+
62
+ class ScharrOperator(nn.Module):
63
+ SCHARR_KERNEL_X = torch.tensor(
64
+ [[-3.0, 0.0, 3.0], [-10.0, 0.0, 10.0], [-3.0, 0.0, 3.0]]
65
+ )
66
+ SCHARR_KERNEL_Y = torch.tensor(
67
+ [[-3.0, -10.0, -3.0], [0.0, 0.0, 0.0], [3.0, 10.0, 3.0]]
68
+ )
69
+
70
+ def __init__(self, device="cuda"):
71
+ super(ScharrOperator, self).__init__()
72
+ self.device = device
73
+ self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
74
+ self.device
75
+ )
76
+ self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
77
+ self.device
78
+ )
79
+ self.edge_conv_x.weight = nn.Parameter(
80
+ self.SCHARR_KERNEL_X.view((1, 1, 3, 3)).to(self.device)
81
+ )
82
+ self.edge_conv_y.weight = nn.Parameter(
83
+ self.SCHARR_KERNEL_Y.view((1, 1, 3, 3)).to(self.device)
84
+ )
85
+
86
+ @torch.no_grad()
87
+ def forward(
88
+ self,
89
+ image: Image.Image,
90
+ low_threshold: float,
91
+ high_threshold: float,
92
+ output_type="pil",
93
+ invert: bool = False,
94
+ ) -> Image.Image | torch.Tensor | tuple[Image.Image, torch.Tensor]:
95
+ # Convert PIL image to PyTorch tensor
96
+ image_gray = image.convert("L")
97
+ image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
98
+
99
+ # Compute gradients
100
+ edge_x = self.edge_conv_x(image_tensor)
101
+ edge_y = self.edge_conv_y(image_tensor)
102
+ edge = torch.abs(edge_x) + torch.abs(edge_y)
103
+
104
+ # Apply thresholding
105
+ edge.div_(edge.max()) # Normalize to 0-1 (in-place operation)
106
+ edge[edge >= high_threshold] = 1.0
107
+ edge[edge <= low_threshold] = 0.0
108
+ if invert:
109
+ edge = 1 - edge
110
+
111
+ # Convert the result back to a PIL image
112
+ if output_type == "pil":
113
+ return ToPILImage()(edge.squeeze(0).cpu())
114
+ elif output_type == "tensor":
115
+ return edge
116
+ elif output_type == "pil,tensor":
117
+ return ToPILImage()(edge.squeeze(0).cpu()), edge
examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg ADDED
examples/cybetruck.jpeg ADDED
examples/huggingface.jpg ADDED
examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg ADDED
examples/jesus.png ADDED
examples/lara.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.29.0
2
+ accelerate
3
+ transformers
4
+ torchvision
5
+ xformers
6
+ accelerate
7
+ invisible-watermark
8
+ huggingface-hub
9
+ hf-transfer
10
+ gradio_imageslider==0.0.20
11
+ compel
12
+ opencv-python
13
+ numpy
14
+ diffusers==0.27.0
15
+ transformers
16
+ accelerate
17
+ safetensors
18
+ hidiffusion==0.1.8
19
+ spaces
20
+ torch==2.2
21
+ controlnet-aux @ git+https://github.com/huggingface/controlnet_aux