rbanfield commited on
Commit
05c6bed
1 Parent(s): c28312e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. .app.py.swp +0 -0
  2. app.py +234 -57
.app.py.swp ADDED
Binary file (16.4 kB). View file
 
app.py CHANGED
@@ -13,23 +13,43 @@ import math
13
  import io
14
  from PIL import Image
15
 
16
- from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
 
 
 
 
 
 
 
 
17
  from diffusers.utils import load_image
18
  from transformers import pipeline
19
 
20
  import gradio as gr
21
 
22
- vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
 
 
23
 
24
 
25
- canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
 
 
26
  canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
27
- "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
 
 
 
28
  )
29
 
30
- canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
 
 
31
  canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
32
- "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
 
 
 
33
  )
34
  canny_pipe_img2img.enable_model_cpu_offload()
35
  canny_pipe_img2img.enable_xformers_memory_efficient_attention()
@@ -40,10 +60,11 @@ canny_pipe.enable_model_cpu_offload()
40
  canny_pipe.enable_xformers_memory_efficient_attention()
41
 
42
  controlnet_xl = ControlNetModel.from_pretrained(
43
- "diffusers/controlnet-canny-sdxl-1.0",
44
- torch_dtype=torch.float16
 
 
45
  )
46
- vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
47
  pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
48
  "stabilityai/stable-diffusion-xl-base-1.0",
49
  controlnet=controlnet_xl,
@@ -67,62 +88,100 @@ refiner = DiffusionPipeline.from_pretrained(
67
  refiner.enable_xformers_memory_efficient_attention()
68
  refiner.enable_model_cpu_offload()
69
 
 
70
  def resize_image_output(im, width, height):
71
- im = np.array(im)
72
- newSize = (width,height)
73
  img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
74
  img = Image.fromarray(img)
75
  return img
76
 
77
- def resize_image(im, max_size = 590000):
78
- [x,y,z] = im.shape
79
- new_size = [0,0]
80
 
 
 
 
81
 
82
  min_size = 262144
83
- if x*y > max_size:
84
- scale_ratio = math.sqrt((x*y)/max_size)
85
  new_size[0] = int(x / scale_ratio)
86
  new_size[1] = int(y / scale_ratio)
87
- elif x*y <= min_size:
88
- scale_ratio = math.sqrt((x*y)/min_size)
89
  new_size[0] = int(x / scale_ratio)
90
  new_size[1] = int(y / scale_ratio)
91
  else:
92
  new_size[0] = int(x)
93
  new_size[1] = int(y)
94
-
95
  height = (new_size[0] // 8) * 8
96
  width = (new_size[1] // 8) * 8
97
-
98
- newSize = (width,height)
99
  img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
100
  return img
101
 
102
- def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  image = input_image
105
 
106
  return canny_pipe_img2img(
107
- prompt = '',
108
  image=image,
109
- control_image = image,
110
  num_inference_steps=20,
111
  guidance_scale=4,
112
- strength = 0.3,
113
- guess_mode = True,
114
  negative_prompt=n_prompt,
115
  num_images_per_prompt=1,
116
  eta=eta,
117
- generator=torch.Generator(device="cpu").manual_seed(seed)
118
  )
119
 
120
- def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  image = input_image
123
 
124
  return canny_pipe(
125
- prompt=','.join([prompt,a_prompt]),
126
  image=image,
127
  height=x,
128
  width=y,
@@ -132,15 +191,33 @@ def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, ima
132
  num_images_per_prompt=num_samples,
133
  eta=eta,
134
  controlnet_conditioning_scale=strength,
135
- generator=torch.Generator(device="cpu").manual_seed(seed)
136
  )
137
 
138
- def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  image = input_image
141
-
142
  image = pipe_xl(
143
- prompt=','.join([prompt,a_prompt]),
144
  image=image,
145
  height=x,
146
  width=y,
@@ -151,31 +228,87 @@ def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples
151
  eta=eta,
152
  controlnet_conditioning_scale=strength,
153
  generator=torch.Generator(device="cpu").manual_seed(seed),
154
- output_type="latent"
155
  ).images
156
-
157
  return refiner(
158
- prompt=prompt,
159
- num_inference_steps=ddim_steps,
160
- num_images_per_prompt=num_samples,
161
- denoising_start=0.8,
162
- image=image,
163
  )
164
 
165
 
166
- def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
 
 
 
 
 
 
 
 
 
 
 
 
167
  image = load_image(image)
168
  image = np.array(image)
169
- [x_orig,y_orig,z_orig] = image.shape
170
  image = resize_image(image)
171
- [x,y,z] = image.shape
172
 
173
  image = cv2.Canny(image, low_threshold, high_threshold)
174
  image = image[:, :, None]
175
  image = np.concatenate([image, image, image], axis=2)
176
  image = Image.fromarray(image)
177
 
178
- return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  demo = gr.Blocks().queue()
@@ -190,22 +323,66 @@ with demo:
190
  input_prompt = gr.Textbox()
191
  run_button = gr.Button(label="Run")
192
 
193
- with gr.Accordion("Advanced Options"):
194
- strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
195
- low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
196
- high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
197
- ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
198
- scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1) # default value was 9.0
199
- seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  eta = gr.Number(label="eta (DDIM)", value=0.0)
201
- a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
202
- n_prompt = gr.Textbox(label="Negative Prompt",
203
- value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
 
 
 
 
204
 
205
  with gr.Column():
206
- result = gr.outputs.Image(label='Output', type="numpy")
207
-
208
- ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
 
 
 
 
 
 
 
 
 
 
 
 
209
  run_button.click(fn=process, inputs=ips, outputs=[result])
210
 
211
 
 
13
  import io
14
  from PIL import Image
15
 
16
+ from diffusers import (
17
+ AutoencoderKL,
18
+ StableDiffusionControlNetPipeline,
19
+ ControlNetModel,
20
+ UniPCMultistepScheduler,
21
+ StableDiffusionControlNetImg2ImgPipeline,
22
+ StableDiffusionXLControlNetPipeline,
23
+ DiffusionPipeline,
24
+ )
25
  from diffusers.utils import load_image
26
  from transformers import pipeline
27
 
28
  import gradio as gr
29
 
30
+ vae = AutoencoderKL.from_pretrained(
31
+ "stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
32
+ )
33
 
34
 
35
+ canny_controlnet = ControlNetModel.from_pretrained(
36
+ "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
37
+ )
38
  canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
39
+ "SG161222/Realistic_Vision_V3.0_VAE",
40
+ controlnet=canny_controlnet,
41
+ torch_dtype=torch.float16,
42
+ use_safetensors=True,
43
  )
44
 
45
+ canny_controlnet_tile = ControlNetModel.from_pretrained(
46
+ "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
47
+ )
48
  canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
49
+ "SG161222/Realistic_Vision_V3.0_VAE",
50
+ controlnet=canny_controlnet_tile,
51
+ torch_dtype=torch.float16,
52
+ use_safetensors=True,
53
  )
54
  canny_pipe_img2img.enable_model_cpu_offload()
55
  canny_pipe_img2img.enable_xformers_memory_efficient_attention()
 
60
  canny_pipe.enable_xformers_memory_efficient_attention()
61
 
62
  controlnet_xl = ControlNetModel.from_pretrained(
63
+ "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
64
+ )
65
+ vae_xl = AutoencoderKL.from_pretrained(
66
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
67
  )
 
68
  pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
69
  "stabilityai/stable-diffusion-xl-base-1.0",
70
  controlnet=controlnet_xl,
 
88
  refiner.enable_xformers_memory_efficient_attention()
89
  refiner.enable_model_cpu_offload()
90
 
91
+
92
  def resize_image_output(im, width, height):
93
+ im = np.array(im)
94
+ newSize = (width, height)
95
  img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
96
  img = Image.fromarray(img)
97
  return img
98
 
 
 
 
99
 
100
+ def resize_image(im, max_size=590000):
101
+ [x, y, z] = im.shape
102
+ new_size = [0, 0]
103
 
104
  min_size = 262144
105
+ if x * y > max_size:
106
+ scale_ratio = math.sqrt((x * y) / max_size)
107
  new_size[0] = int(x / scale_ratio)
108
  new_size[1] = int(y / scale_ratio)
109
+ elif x * y <= min_size:
110
+ scale_ratio = math.sqrt((x * y) / min_size)
111
  new_size[0] = int(x / scale_ratio)
112
  new_size[1] = int(y / scale_ratio)
113
  else:
114
  new_size[0] = int(x)
115
  new_size[1] = int(y)
116
+
117
  height = (new_size[0] // 8) * 8
118
  width = (new_size[1] // 8) * 8
119
+
120
+ newSize = (width, height)
121
  img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
122
  return img
123
 
124
+
125
+ def process_canny_tile(
126
+ input_image,
127
+ control_image,
128
+ x,
129
+ y,
130
+ prompt,
131
+ a_prompt,
132
+ n_prompt,
133
+ num_samples,
134
+ image_resolution,
135
+ ddim_steps,
136
+ guess_mode,
137
+ strength_conditioning,
138
+ scale,
139
+ seed,
140
+ eta,
141
+ low_threshold,
142
+ high_threshold,
143
+ ):
144
 
145
  image = input_image
146
 
147
  return canny_pipe_img2img(
148
+ prompt="",
149
  image=image,
150
+ control_image=image,
151
  num_inference_steps=20,
152
  guidance_scale=4,
153
+ strength=0.3,
154
+ guess_mode=True,
155
  negative_prompt=n_prompt,
156
  num_images_per_prompt=1,
157
  eta=eta,
158
+ generator=torch.Generator(device="cpu").manual_seed(seed),
159
  )
160
 
161
+
162
+ def process_canny(
163
+ input_image,
164
+ x,
165
+ y,
166
+ prompt,
167
+ a_prompt,
168
+ n_prompt,
169
+ num_samples,
170
+ image_resolution,
171
+ ddim_steps,
172
+ guess_mode,
173
+ strength,
174
+ scale,
175
+ seed,
176
+ eta,
177
+ low_threshold,
178
+ high_threshold,
179
+ ):
180
 
181
  image = input_image
182
 
183
  return canny_pipe(
184
+ prompt=",".join([prompt, a_prompt]),
185
  image=image,
186
  height=x,
187
  width=y,
 
191
  num_images_per_prompt=num_samples,
192
  eta=eta,
193
  controlnet_conditioning_scale=strength,
194
+ generator=torch.Generator(device="cpu").manual_seed(seed),
195
  )
196
 
197
+
198
+ def process_canny_sdxl(
199
+ input_image,
200
+ x,
201
+ y,
202
+ prompt,
203
+ a_prompt,
204
+ n_prompt,
205
+ num_samples,
206
+ image_resolution,
207
+ ddim_steps,
208
+ guess_mode,
209
+ strength,
210
+ scale,
211
+ seed,
212
+ eta,
213
+ low_threshold,
214
+ high_threshold,
215
+ ):
216
 
217
  image = input_image
218
+
219
  image = pipe_xl(
220
+ prompt=",".join([prompt, a_prompt]),
221
  image=image,
222
  height=x,
223
  width=y,
 
228
  eta=eta,
229
  controlnet_conditioning_scale=strength,
230
  generator=torch.Generator(device="cpu").manual_seed(seed),
231
+ output_type="latent",
232
  ).images
233
+
234
  return refiner(
235
+ prompt=prompt,
236
+ num_inference_steps=ddim_steps,
237
+ num_images_per_prompt=num_samples,
238
+ denoising_start=0.8,
239
+ image=image,
240
  )
241
 
242
 
243
+ def process(
244
+ image,
245
+ prompt,
246
+ a_prompt,
247
+ n_prompt,
248
+ ddim_steps,
249
+ strength,
250
+ scale,
251
+ seed,
252
+ eta,
253
+ low_threshold,
254
+ high_threshold,
255
+ ):
256
  image = load_image(image)
257
  image = np.array(image)
258
+ [x_orig, y_orig, z_orig] = image.shape
259
  image = resize_image(image)
260
+ [x, y, z] = image.shape
261
 
262
  image = cv2.Canny(image, low_threshold, high_threshold)
263
  image = image[:, :, None]
264
  image = np.concatenate([image, image, image], axis=2)
265
  image = Image.fromarray(image)
266
 
267
+ result = process_canny(
268
+ image,
269
+ x,
270
+ y,
271
+ prompt,
272
+ a_prompt,
273
+ n_prompt,
274
+ 1,
275
+ None,
276
+ ddim_steps,
277
+ False,
278
+ float(strength),
279
+ scale,
280
+ seed,
281
+ eta,
282
+ low_threshold,
283
+ high_threshold,
284
+ )
285
+
286
+ im = result.images[0]
287
+ im = resize_image_output(im, y_orig, x_orig)
288
+ highres = False
289
+ if highres:
290
+ result_upscaled = process_canny_tile(
291
+ im,
292
+ im,
293
+ x_orig,
294
+ y_orig,
295
+ prompt,
296
+ a_prompt,
297
+ n_prompt,
298
+ num_samples,
299
+ None,
300
+ ddim_steps,
301
+ False,
302
+ strength,
303
+ scale,
304
+ seed,
305
+ eta,
306
+ low_threshold,
307
+ high_threshold,
308
+ )
309
+ im = result_upscaled.images[0]
310
+
311
+ return im
312
 
313
 
314
  demo = gr.Blocks().queue()
 
323
  input_prompt = gr.Textbox()
324
  run_button = gr.Button(label="Run")
325
 
326
+ with gr.Accordion("Advanced Options", open=False):
327
+ strength = gr.Slider(
328
+ label="Control Strength",
329
+ minimum=0.0,
330
+ maximum=2.0,
331
+ value=1.0,
332
+ step=0.01,
333
+ )
334
+ low_threshold = gr.Slider(
335
+ label="Canny low threshold",
336
+ minimum=1,
337
+ maximum=255,
338
+ value=100,
339
+ step=1,
340
+ )
341
+ high_threshold = gr.Slider(
342
+ label="Canny high threshold",
343
+ minimum=1,
344
+ maximum=255,
345
+ value=200,
346
+ step=1,
347
+ )
348
+ ddim_steps = gr.Slider(
349
+ label="Steps", minimum=1, maximum=100, value=20, step=1
350
+ )
351
+ scale = gr.Slider(
352
+ label="Guidance Scale",
353
+ minimum=0.1,
354
+ maximum=30.0,
355
+ value=7.5,
356
+ step=0.1,
357
+ ) # default value was 9.0
358
+ seed = gr.Slider(
359
+ label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True
360
+ )
361
  eta = gr.Number(label="eta (DDIM)", value=0.0)
362
+ a_prompt = gr.Textbox(
363
+ label="Added Prompt", value="best quality, extremely detailed"
364
+ )
365
+ n_prompt = gr.Textbox(
366
+ label="Negative Prompt",
367
+ value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
368
+ )
369
 
370
  with gr.Column():
371
+ result = gr.Image(label="Output", type="numpy")
372
+
373
+ ips = [
374
+ input_image,
375
+ input_prompt,
376
+ a_prompt,
377
+ n_prompt,
378
+ ddim_steps,
379
+ strength,
380
+ scale,
381
+ seed,
382
+ eta,
383
+ low_threshold,
384
+ high_threshold,
385
+ ]
386
  run_button.click(fn=process, inputs=ips, outputs=[result])
387
 
388