Files changed (1) hide show
  1. app.py +71 -182
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- from gradio import processing_utils, utils
5
  from PIL import Image
6
  import random
7
-
8
  from diffusers import (
9
  DiffusionPipeline,
10
  AutoencoderKL,
@@ -18,12 +16,8 @@ from diffusers import (
18
  )
19
  import tempfile
20
  import time
21
- from share_btn import community_icon_html, loading_icon_html, share_js
22
- import user_history
23
- from illusion_style import css
24
  import os
25
  from transformers import CLIPImageProcessor
26
- from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
27
 
28
  BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
29
 
@@ -35,6 +29,7 @@ controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrco
35
  SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
36
  safety_checker = None
37
  feature_extractor = None
 
38
  if SAFETY_CHECKER_ENABLED:
39
  safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
40
  feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
@@ -48,30 +43,6 @@ main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
48
  torch_dtype=torch.float16,
49
  ).to("cuda")
50
 
51
- # Function to check NSFW images
52
- #def check_nsfw_images(images: list[Image.Image]) -> tuple[list[Image.Image], list[bool]]:
53
- # if SAFETY_CHECKER_ENABLED:
54
- # safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda")
55
- # has_nsfw_concepts = safety_checker(
56
- # images=[images],
57
- # clip_input=safety_checker_input.pixel_values.to("cuda")
58
- # )
59
- # return images, has_nsfw_concepts
60
- # else:
61
- # return images, [False] * len(images)
62
-
63
- #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
64
- #main_pipe.unet.to(memory_format=torch.channels_last)
65
- #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
66
- #model_id = "stabilityai/sd-x2-latent-upscaler"
67
- image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)
68
-
69
-
70
- #image_pipe.unet = torch.compile(image_pipe.unet, mode="reduce-overhead", fullgraph=True)
71
- #upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
72
- #upscaler.to("cuda")
73
-
74
-
75
  # Sampler map
76
  SAMPLER_MAP = {
77
  "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
@@ -80,92 +51,74 @@ SAMPLER_MAP = {
80
 
81
  def center_crop_resize(img, output_size=(512, 512)):
82
  width, height = img.size
83
-
84
- # Calculate dimensions to crop to the center
85
  new_dimension = min(width, height)
86
- left = (width - new_dimension)/2
87
- top = (height - new_dimension)/2
88
- right = (width + new_dimension)/2
89
- bottom = (height + new_dimension)/2
90
-
91
- # Crop and resize
92
  img = img.crop((left, top, right, bottom))
93
  img = img.resize(output_size)
94
-
95
  return img
96
 
97
  def common_upscale(samples, width, height, upscale_method, crop=False):
98
- if crop == "center":
99
- old_width = samples.shape[3]
100
- old_height = samples.shape[2]
101
- old_aspect = old_width / old_height
102
- new_aspect = width / height
103
- x = 0
104
- y = 0
105
- if old_aspect > new_aspect:
106
- x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
107
- elif old_aspect < new_aspect:
108
- y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
109
- s = samples[:,:,y:old_height-y,x:old_width-x]
110
- else:
111
- s = samples
112
-
113
- return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)
 
 
 
114
 
115
  def upscale(samples, upscale_method, scale_by):
116
- #s = samples.copy()
117
- width = round(samples["images"].shape[3] * scale_by)
118
- height = round(samples["images"].shape[2] * scale_by)
119
- s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
120
- return (s)
 
121
 
122
  def check_inputs(prompt: str, control_image: Image.Image):
123
  if control_image is None:
124
  raise gr.Error("Please select or upload an Input Illusion")
 
125
  if prompt is None or prompt == "":
126
  raise gr.Error("Prompt is required")
127
 
128
- def convert_to_pil(base64_image):
129
- pil_image = Image.open(base64_image)
130
- return pil_image
131
-
132
- def convert_to_base64(pil_image):
133
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
134
- image.save(temp_file.name)
135
- return temp_file.name
136
-
137
- # Inference function
138
- @spaces.GPU
139
- def inference(
140
- control_image: Image.Image,
141
- prompt: str,
142
- negative_prompt: str,
143
- guidance_scale: float = 8.0,
144
- controlnet_conditioning_scale: float = 1,
145
- control_guidance_start: float = 1,
146
- control_guidance_end: float = 1,
147
- upscaler_strength: float = 0.5,
148
- seed: int = -1,
149
- sampler = "DPM++ Karras SDE",
150
- progress = gr.Progress(track_tqdm=True),
151
- profile: gr.OAuthProfile | None = None,
152
- ):
153
  start_time = time.time()
154
- start_time_struct = time.localtime(start_time)
155
- start_time_formatted = time.strftime("%H:%M:%S", start_time_struct)
156
- print(f"Inference started at {start_time_formatted}")
157
 
158
- # Generate the initial image
159
- #init_image = init_pipe(prompt).images[0]
160
-
161
- # Rest of your existing code
162
  control_image_small = center_crop_resize(control_image)
163
- control_image_large = center_crop_resize(control_image, (1024, 1024))
164
-
165
  main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
 
166
  my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
167
  generator = torch.Generator(device="cuda").manual_seed(my_seed)
168
-
169
  out = main_pipe(
170
  prompt=prompt,
171
  negative_prompt=negative_prompt,
@@ -178,11 +131,13 @@ def inference(
178
  num_inference_steps=15,
179
  output_type="latent"
180
  )
 
181
  upscaled_latents = upscale(out, "nearest-exact", 2)
182
- out_image = image_pipe(
 
183
  prompt=prompt,
184
  negative_prompt=negative_prompt,
185
- control_image=control_image_large,
186
  image=upscaled_latents,
187
  guidance_scale=float(guidance_scale),
188
  generator=generator,
@@ -192,97 +147,31 @@ def inference(
192
  control_guidance_end=float(control_guidance_end),
193
  controlnet_conditioning_scale=float(controlnet_conditioning_scale)
194
  )
195
- end_time = time.time()
196
- end_time_struct = time.localtime(end_time)
197
- end_time_formatted = time.strftime("%H:%M:%S", end_time_struct)
198
- print(f"Inference ended at {end_time_formatted}, taking {end_time-start_time}s")
199
 
200
- # Save image + metadata
201
- user_history.save_image(
202
- label=prompt,
203
- image=out_image["images"][0],
204
- profile=profile,
205
- metadata={
206
- "prompt": prompt,
207
- "negative_prompt": negative_prompt,
208
- "guidance_scale": guidance_scale,
209
- "controlnet_conditioning_scale": controlnet_conditioning_scale,
210
- "control_guidance_start": control_guidance_start,
211
- "control_guidance_end": control_guidance_end,
212
- "upscaler_strength": upscaler_strength,
213
- "seed": seed,
214
- "sampler": sampler,
215
- },
216
- )
217
 
218
- return out_image["images"][0], gr.update(visible=True), gr.update(visible=True), my_seed
219
-
220
  with gr.Blocks() as app:
221
- gr.Markdown(
222
- '''
223
- <div style="text-align: center;">
224
- <h1>Illusion Diffusion HQ πŸŒ€</h1>
225
- <p style="font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</p>
226
- <p>Illusion Diffusion is back up with a safety checker! Because I have been asked, if you would like to support me, consider using <a href="https://deforum.studio">deforum.studio</a></p>
227
- <p>A space by AP <a href="https://twitter.com/angrypenguinPNG">Follow me on Twitter</a> with big contributions from <a href="https://twitter.com/multimodalart">multimodalart</a></p>
228
- <p>This project works by using <a href="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster">Monster Labs QR Control Net</a>. Given a prompt and your pattern, we use a QR code conditioned controlnet to create a stunning illusion! Credit to: <a href="https://twitter.com/MrUgleh">MrUgleh</a> for discovering the workflow :)</p>
229
- </div>
230
- '''
231
- )
232
-
233
-
234
- state_img_input = gr.State()
235
- state_img_output = gr.State()
236
  with gr.Row():
237
  with gr.Column():
238
- control_image = gr.Image(label="Input Illusion", type="pil", elem_id="control_image")
239
- controlnet_conditioning_scale = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.8, label="Illusion strength", elem_id="illusion_strength", info="ControlNet conditioning scale")
240
- gr.Examples(examples=["checkers.png", "checkers_mid.jpg", "pattern.png", "ultra_checkers.png", "spiral.jpeg", "funky.jpeg" ], inputs=control_image)
241
- prompt = gr.Textbox(label="Prompt", elem_id="prompt", info="Type what you want to generate", placeholder="Medieval village scene with busy streets and castle in the distance")
242
- negative_prompt = gr.Textbox(label="Negative Prompt", info="Type what you don't want to see", value="low quality", elem_id="negative_prompt")
243
- with gr.Accordion(label="Advanced Options", open=False):
244
- guidance_scale = gr.Slider(minimum=0.0, maximum=50.0, step=0.25, value=7.5, label="Guidance Scale")
245
- sampler = gr.Dropdown(choices=list(SAMPLER_MAP.keys()), value="Euler")
246
- control_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0, label="Start of ControlNet")
247
- control_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="End of ControlNet")
248
- strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="Strength of the upscaler")
249
- seed = gr.Slider(minimum=-1, maximum=9999999999, step=1, value=-1, label="Seed", info="-1 means random seed")
250
- used_seed = gr.Number(label="Last seed used",interactive=False)
251
  run_btn = gr.Button("Run")
252
- with gr.Column():
253
- result_image = gr.Image(label="Illusion Diffusion Output", interactive=False, elem_id="output")
254
- with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
255
- community_icon = gr.HTML(community_icon_html)
256
- loading_icon = gr.HTML(loading_icon_html)
257
- share_button = gr.Button("Share to community", elem_id="share-btn")
258
-
259
- prompt.submit(
260
- check_inputs,
261
- inputs=[prompt, control_image],
262
- queue=False
263
- ).success(
264
- inference,
265
- inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
266
- outputs=[result_image, result_image, share_group, used_seed])
267
-
268
- run_btn.click(
269
- check_inputs,
270
- inputs=[prompt, control_image],
271
- queue=False
272
- ).success(
273
- inference,
274
- inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
275
- outputs=[result_image, result_image, share_group, used_seed])
276
-
277
- share_button.click(None, [], [], js=share_js)
278
-
279
- with gr.Blocks(css=css) as app_with_history:
280
- with gr.Tab("Demo"):
281
- app.render()
282
- with gr.Tab("Past generations"):
283
- user_history.render()
284
 
285
- app_with_history.queue(max_size=20,api_open=False )
 
 
286
 
287
  if __name__ == "__main__":
288
- app_with_history.launch(max_threads=400)
 
1
  import spaces
2
  import torch
3
  import gradio as gr
 
4
  from PIL import Image
5
  import random
 
6
  from diffusers import (
7
  DiffusionPipeline,
8
  AutoencoderKL,
 
16
  )
17
  import tempfile
18
  import time
 
 
 
19
  import os
20
  from transformers import CLIPImageProcessor
 
21
 
22
  BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
23
 
 
29
  SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
30
  safety_checker = None
31
  feature_extractor = None
32
+
33
  if SAFETY_CHECKER_ENABLED:
34
  safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
35
  feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
 
43
  torch_dtype=torch.float16,
44
  ).to("cuda")
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Sampler map
47
  SAMPLER_MAP = {
48
  "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
 
51
 
52
  def center_crop_resize(img, output_size=(512, 512)):
53
  width, height = img.size
 
 
54
  new_dimension = min(width, height)
55
+ left = (width - new_dimension) / 2
56
+ top = (height - new_dimension) / 2
57
+ right = (width + new_dimension) / 2
58
+ bottom = (height + new_dimension) / 2
59
+
 
60
  img = img.crop((left, top, right, bottom))
61
  img = img.resize(output_size)
62
+
63
  return img
64
 
65
  def common_upscale(samples, width, height, upscale_method, crop=False):
66
+ if crop == "center":
67
+ old_width = samples.shape[3]
68
+ old_height = samples.shape[2]
69
+ old_aspect = old_width / old_height
70
+ new_aspect = width / height
71
+
72
+ x = 0
73
+ y = 0
74
+
75
+ if old_aspect > new_aspect:
76
+ x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
77
+ elif old_aspect < new_aspect:
78
+ y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
79
+
80
+ s = samples[:, :, y:old_height - y, x:old_width - x]
81
+ else:
82
+ s = samples
83
+
84
+ return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)
85
 
86
  def upscale(samples, upscale_method, scale_by):
87
+ width = round(samples["images"].shape[3] * scale_by)
88
+ height = round(samples["images"].shape[2] * scale_by)
89
+
90
+ s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
91
+
92
+ return s
93
 
94
  def check_inputs(prompt: str, control_image: Image.Image):
95
  if control_image is None:
96
  raise gr.Error("Please select or upload an Input Illusion")
97
+
98
  if prompt is None or prompt == "":
99
  raise gr.Error("Prompt is required")
100
 
101
+ @spaces.GPU
102
+ def inference(control_image: Image.Image, prompt: str, negative_prompt: str,
103
+ guidance_scale: float = 8.0,
104
+ controlnet_conditioning_scale: float = 1,
105
+ control_guidance_start: float = 1,
106
+ control_guidance_end: float = 1,
107
+ upscaler_strength: float = 0.5,
108
+ seed: int = -1,
109
+ sampler="DPM++ Karras SDE",
110
+ progress=gr.Progress(track_tqdm=True),
111
+ profile=None):
112
+
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  start_time = time.time()
 
 
 
114
 
 
 
 
 
115
  control_image_small = center_crop_resize(control_image)
116
+
 
117
  main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
118
+
119
  my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
120
  generator = torch.Generator(device="cuda").manual_seed(my_seed)
121
+
122
  out = main_pipe(
123
  prompt=prompt,
124
  negative_prompt=negative_prompt,
 
131
  num_inference_steps=15,
132
  output_type="latent"
133
  )
134
+
135
  upscaled_latents = upscale(out, "nearest-exact", 2)
136
+
137
+ out_image = main_pipe(
138
  prompt=prompt,
139
  negative_prompt=negative_prompt,
140
+ control_image=center_crop_resize(control_image, (1024, 1024)),
141
  image=upscaled_latents,
142
  guidance_scale=float(guidance_scale),
143
  generator=generator,
 
147
  control_guidance_end=float(control_guidance_end),
148
  controlnet_conditioning_scale=float(controlnet_conditioning_scale)
149
  )
 
 
 
 
150
 
151
+ end_time = time.time()
152
+
153
+ # Save image + metadata logic here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
 
 
155
  with gr.Blocks() as app:
156
+ gr.Markdown('''
157
+ <div style="text-align: center;">
158
+ <h1>Illusion Diffusion HQ πŸŒ€</h1>
159
+ <p style="font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</p>
160
+ </div>
161
+ ''')
162
+
 
 
 
 
 
 
 
 
163
  with gr.Row():
164
  with gr.Column():
165
+ control_image = gr.Image(label="Input Illusion", type="pil")
166
+ prompt = gr.Textbox(label="Prompt", placeholder="Medieval village scene with busy streets and castle in the distance")
167
+ negative_prompt = gr.Textbox(label="Negative Prompt", value="low quality")
 
 
 
 
 
 
 
 
 
 
168
  run_btn = gr.Button("Run")
169
+
170
+ result_image = gr.Image(label="Illusion Diffusion Output", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ run_btn.click(check_inputs, inputs=[prompt, control_image]).success(
173
+ inference, inputs=[control_image, prompt, negative_prompt], outputs=[result_image]
174
+ )
175
 
176
  if __name__ == "__main__":
177
+ app.launch()