primerz commited on
Commit
b8a464d
·
verified ·
1 Parent(s): 12fc679

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -277
app.py CHANGED
@@ -38,7 +38,7 @@ from compel import Compel, ReturnedEmbeddingsType
38
 
39
  from gradio_imageslider import ImageSlider
40
 
41
- # Load LoRA configurations - now only LucasArts style
42
  with open("sdxl_loras.json", "r") as file:
43
  data = json.load(file)
44
  sdxl_loras_raw = [
@@ -61,8 +61,9 @@ with open("sdxl_loras.json", "r") as file:
61
 
62
  with open("defaults_data.json", "r") as file:
63
  lora_defaults = json.load(file)
 
64
 
65
- device = "cuda"
66
 
67
  # Cache for LoRA state dicts
68
  state_dicts = {}
@@ -80,7 +81,7 @@ for item in sdxl_loras_raw:
80
  }
81
 
82
  sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
83
-
84
  # Download models
85
  hf_hub_download(
86
  repo_id="InstantX/InstantID",
@@ -111,6 +112,7 @@ app.prepare(ctx_id=0, det_size=(768, 768))
111
  face_adapter = f'/data/checkpoints/ip-adapter.bin'
112
  controlnet_path = f'/data/checkpoints/ControlNetModel'
113
 
 
114
  st = time.time()
115
  identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
116
  zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
@@ -123,9 +125,8 @@ et = time.time()
123
  print('Loading VAE took: ', et - st, 'seconds')
124
 
125
  st = time.time()
126
- # CHANGED: Using AlbedoBase XL v2.1 for better quality
127
  pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
128
- "frankjoshua/albedobaseXL_v21",
129
  vae=vae,
130
  controlnet=[identitynet, zoedepthnet],
131
  torch_dtype=torch.float16
@@ -133,8 +134,7 @@ pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
133
 
134
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
135
  pipe.load_ip_adapter_instantid(face_adapter)
136
- # IMPROVED: Higher IP adapter scale for better face preservation
137
- pipe.set_ip_adapter_scale(1.0)
138
  et = time.time()
139
  print('Loading pipeline took: ', et - st, 'seconds')
140
 
@@ -159,123 +159,17 @@ last_lora = ""
159
  last_fused = False
160
  lora_archive = "/data"
161
 
162
- # Enhanced face detection with better face quality filtering
163
- def detect_faces(face_image, use_multiple_faces=False):
164
- """
165
- Detect faces in the image with quality filtering
166
- Returns: list of face info dictionaries, or empty list if no faces
167
- """
168
- try:
169
- face_info_list = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
170
-
171
- if not face_info_list or len(face_info_list) == 0:
172
- print("No faces detected")
173
- return []
174
-
175
- # Filter faces by quality score if available
176
- filtered_faces = []
177
- for face_info in face_info_list:
178
- # Check if face has minimum quality
179
- if 'det_score' in face_info and face_info['det_score'] > 0.5:
180
- filtered_faces.append(face_info)
181
- elif 'det_score' not in face_info:
182
- filtered_faces.append(face_info)
183
-
184
- if not filtered_faces:
185
- print("No high-quality faces detected")
186
- return []
187
-
188
- # Sort faces by size (largest first)
189
- filtered_faces = sorted(
190
- filtered_faces,
191
- key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]),
192
- reverse=True
193
- )
194
-
195
- if use_multiple_faces:
196
- print(f"Detected {len(filtered_faces)} high-quality faces")
197
- return filtered_faces
198
- else:
199
- print(f"Using largest face (detected {len(filtered_faces)} total)")
200
- return [filtered_faces[0]]
201
-
202
- except Exception as e:
203
- print(f"Face detection error: {e}")
204
- return []
205
-
206
- def process_face_embeddings_separately(face_info_list):
207
- """
208
- Process face embeddings separately for multi-face generation
209
- Returns: list of individual face embeddings
210
- """
211
- if not face_info_list:
212
- return []
213
-
214
- embeddings = [face_info['embedding'] for face_info in face_info_list]
215
- return embeddings
216
-
217
- def create_face_kps_image(face_image, face_info_list):
218
- """
219
- Create keypoints image from face info with enhanced visibility
220
- """
221
- if not face_info_list:
222
- return face_image
223
-
224
- # For multiple faces, draw all keypoints with different colors
225
- if len(face_info_list) > 1:
226
- return draw_multiple_kps(face_image, [f['kps'] for f in face_info_list])
227
- else:
228
- return draw_kps(face_image, face_info_list[0]['kps'])
229
-
230
- def draw_multiple_kps(image_pil, kps_list, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
231
- """
232
- Draw keypoints for multiple faces with enhanced visibility
233
- """
234
- stickwidth = 4
235
- limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
236
-
237
- w, h = image_pil.size
238
- out_img = np.zeros([h, w, 3])
239
-
240
- for idx, kps in enumerate(kps_list):
241
- kps = np.array(kps)
242
- # Use different colors for different faces
243
- color_offset = idx % len(color_list)
244
-
245
- for i in range(len(limbSeq)):
246
- index = limbSeq[i]
247
- color = color_list[(index[0] + color_offset) % len(color_list)]
248
-
249
- x = kps[index][:, 0]
250
- y = kps[index][:, 1]
251
- length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
252
- angle = np.degrees(np.arctan2(y[0] - y[1], x[0] - x[1]))
253
- polygon = cv2.ellipse2Poly(
254
- (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
255
- )
256
- out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
257
-
258
- out_img = (out_img * 0.6).astype(np.uint8)
259
-
260
- for idx_kp, kp in enumerate(kps):
261
- color = color_list[(idx_kp + color_offset) % len(color_list)]
262
- x, y = kp
263
- out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
264
-
265
- out_img_pil = Image.fromarray(out_img.astype(np.uint8))
266
- return out_img_pil
267
-
268
  def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
269
  lora_repo = sdxl_loras[selected_state.index]["repo"]
270
  new_placeholder = "Type a prompt to use your selected LoRA"
271
  weight_name = sdxl_loras[selected_state.index]["weights"]
272
- updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) ✨ {'(non-commercial LoRA, `cc-by-nc`)' if sdxl_loras[selected_state.index]['is_nc'] else '' }"
273
 
274
  for lora_list in lora_defaults:
275
  if lora_list["model"] == sdxl_loras[selected_state.index]["repo"]:
276
- face_strength = lora_list.get("face_strength", 1.0)
277
- image_strength = lora_list.get("image_strength", 0.15)
278
- weight = lora_list.get("weight", 1.0)
279
  depth_control_scale = lora_list.get("depth_control_scale", 0.8)
280
  negative = lora_list.get("negative", "")
281
 
@@ -318,10 +212,9 @@ def resize_image_aspect_ratio(img, max_dim=1280):
318
 
319
 
320
  def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
321
- guidance_scale, depth_control_scale, sdxl_loras, custom_lora, use_multiple_faces=False,
322
- progress=gr.Progress(track_tqdm=True)):
323
  """
324
- Enhanced run_lora with improved face preservation and landscape mode
325
  """
326
  print("Custom LoRA:", custom_lora)
327
  custom_lora_path = custom_lora[0] if custom_lora else None
@@ -330,55 +223,41 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
330
  st = time.time()
331
  face_image = resize_image_aspect_ratio(face_image)
332
 
333
- # Enhanced face detection
334
- face_info_list = detect_faces(face_image, use_multiple_faces)
335
- face_detected = len(face_info_list) > 0
336
-
337
- if face_detected:
338
- # CHANGED: Process faces separately instead of averaging
339
- face_embeddings = process_face_embeddings_separately(face_info_list)
340
- face_kps = create_face_kps_image(face_image, face_info_list)
341
- print(f"Processing with {len(face_info_list)} face(s) separately")
342
-
343
- # For multiple faces, we'll generate with the primary face (largest)
344
- face_emb = face_embeddings[0]
345
- else:
346
  face_emb = None
347
  face_kps = face_image
348
- print("No faces detected - using enhanced landscape/depth mode")
349
 
350
  et = time.time()
351
  print('Face processing took:', et - st, 'seconds')
352
 
353
  st = time.time()
354
 
355
- # Enhanced prompt processing
356
  if custom_lora_path and custom_lora[1]:
357
  prompt = f"{prompt} {custom_lora[1]}"
358
- elif selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
359
- # Only apply default prompt template if we have a valid selection
360
- for lora_list in lora_defaults:
361
- if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
362
- prompt_full = lora_list.get("prompt", None)
363
- if prompt_full:
364
- prompt = prompt_full.replace("<subject>", prompt)
365
- break # Found the matching template
366
-
367
- # Add LucasArts trigger word if not present (check for both variations)
368
- if "lucasarts" not in prompt.lower():
369
- prompt = f"{prompt}, lucasarts artstyle"
370
-
371
- print("Constructed prompt:", prompt)
372
  if prompt == "":
373
- prompt = "a beautiful cinematic scene" if not face_detected else "a person in cinematic lighting"
374
- print(f"Final prompt to execute: {prompt}")
375
 
376
  if negative == "":
377
- # Enhanced negative prompt
378
- if not face_detected:
379
- negative = "worst quality, low quality, blurry, distorted, deformed, ugly, bad anatomy"
380
- else:
381
- negative = "worst quality, low quality, blurry, distorted, deformed, ugly, bad anatomy, bad proportions"
382
 
383
  print("Custom Loaded LoRA:", custom_lora_path)
384
 
@@ -387,11 +266,12 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
387
  elif custom_lora_path:
388
  repo_name = custom_lora_path
389
  full_path_lora = custom_lora_path
390
- elif selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
391
- repo_name = sdxl_loras[selected_state_index]["repo"]
392
- full_path_lora = state_dicts[repo_name]["saved_name"]
393
  else:
394
- raise gr.Error("Invalid style selection. Please select a style again.")
 
 
 
 
395
 
396
  repo_name = repo_name.rstrip("/").lower()
397
 
@@ -400,22 +280,8 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
400
  et = time.time()
401
  print('Prompt processing took:', et - st, 'seconds')
402
 
403
- # IMPROVED: Better parameter adjustment for face/landscape modes
404
- if not face_detected:
405
- # Enhanced landscape mode parameters
406
- face_strength = 0.0
407
- depth_control_scale = 1.0 # Maximum depth control for landscapes
408
- image_strength = 0.25 # Higher structure preservation
409
- print("Adjusted parameters for enhanced landscape mode")
410
- else:
411
- # Enhanced face preservation
412
- face_strength = max(face_strength, 1.0) # Ensure strong face preservation
413
- depth_control_scale = max(depth_control_scale, 0.8) # Good depth control
414
- print("Adjusted parameters for enhanced face preservation")
415
-
416
  st = time.time()
417
 
418
- # Generate single image with best face (or landscape)
419
  image = generate_image(
420
  prompt, negative, face_emb, face_image, face_kps, image_strength,
421
  guidance_scale, face_strength, depth_control_scale, repo_name,
@@ -427,7 +293,7 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
427
  run_lora.zerogpu = True
428
 
429
 
430
- @spaces.GPU(duration=90) # Increased duration for better quality
431
  def generate_image(prompt, negative, face_emb, face_image, face_kps, image_strength, guidance_scale,
432
  face_strength, depth_control_scale, repo_name, loaded_state_dict, lora_scale,
433
  sdxl_loras, selected_state_index, face_detected, st):
@@ -436,17 +302,9 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
436
  print("Loaded state dict:", loaded_state_dict)
437
  print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
438
 
439
- # IMPROVED: Better control image preparation
440
- depth_image = zoe(face_image)
441
-
442
- if face_detected:
443
- # Face mode: use both face keypoints and depth
444
- control_images = [face_kps, depth_image]
445
- control_scales = [face_strength, depth_control_scale]
446
- else:
447
- # Landscape mode: only depth control with enhanced parameters
448
- control_images = [depth_image]
449
- control_scales = [depth_control_scale]
450
 
451
  # Handle custom LoRA from HuggingFace
452
  if repo_name.startswith("https://huggingface.co"):
@@ -463,84 +321,41 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
463
  else:
464
  full_path_lora = loaded_state_dict
465
 
466
- # Improved LoRA loading and caching
467
  if last_lora != repo_name:
468
  if last_fused:
469
  pipe.unfuse_lora()
470
  pipe.unload_lora_weights()
471
  pipe.unload_textual_inversion()
472
 
473
- # Load LoRA with better error handling
474
- try:
475
- pipe.load_lora_weights(full_path_lora)
476
- pipe.fuse_lora(lora_scale=lora_scale)
477
- last_fused = True
478
-
479
- # Handle pivotal tuning embeddings (if needed for future LoRAs)
480
- # Only check this if we're not using a custom LoRA and have a valid index
481
- if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
482
- is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
483
- if is_pivotal:
484
- text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
485
- embedding_path = hf_hub_download(repo_id=repo_name, filename=text_embedding_name, repo_type="model")
486
- state_dict_embedding = load_file(embedding_path)
487
- pipe.load_textual_inversion(
488
- state_dict_embedding["clip_l" if "clip_l" in state_dict_embedding else "text_encoders_0"],
489
- token=["<s0>", "<s1>"],
490
- text_encoder=pipe.text_encoder,
491
- tokenizer=pipe.tokenizer
492
- )
493
- pipe.load_textual_inversion(
494
- state_dict_embedding["clip_g" if "clip_g" in state_dict_embedding else "text_encoders_1"],
495
- token=["<s0>", "<s1>"],
496
- text_encoder=pipe.text_encoder_2,
497
- tokenizer=pipe.tokenizer_2
498
- )
499
- except Exception as e:
500
- print(f"Error loading LoRA: {e}")
501
- import traceback
502
- traceback.print_exc()
503
- raise gr.Error(f"Failed to load LoRA: {str(e)}")
504
 
505
  print("Processing prompt...")
506
-
507
- # Truncate prompts if they're too long for the tokenizer
508
- # CLIP tokenizers have a max length of 77 tokens
509
- def truncate_prompt(text, max_length=75):
510
- """Truncate prompt to fit within token limits, leaving room for special tokens"""
511
- if not text:
512
- return text
513
- try:
514
- tokens = pipe.tokenizer(text, truncation=False, add_special_tokens=False)['input_ids']
515
- if len(tokens) > max_length:
516
- # Tokenize with truncation
517
- truncated_text = pipe.tokenizer.decode(tokens[:max_length], skip_special_tokens=True)
518
- print(f"Warning: Prompt truncated from {len(tokens)} to {max_length} tokens")
519
- print(f" Original: {text}")
520
- print(f" Truncated: {truncated_text}")
521
- return truncated_text
522
- return text
523
- except Exception as e:
524
- print(f"Warning: Could not truncate prompt, using as-is: {e}")
525
- return text
526
-
527
- prompt = truncate_prompt(prompt)
528
- negative = truncate_prompt(negative) if negative else ""
529
-
530
- try:
531
- prompt_token_count = len(pipe.tokenizer(prompt)['input_ids'])
532
- negative_token_count = len(pipe.tokenizer(negative)['input_ids']) if negative else 0
533
- print(f"Prompt token count: {prompt_token_count}/77")
534
- print(f"Negative prompt token count: {negative_token_count}/77")
535
- except Exception as e:
536
- print(f"Could not count tokens: {e}")
537
-
538
  conditioning, pooled = compel(prompt)
539
  negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
540
 
541
- # IMPROVED: Enhanced generation parameters for better quality
542
- num_inference_steps = 50 # Increased for better quality
543
-
544
  print("Generating image...")
545
  image = pipe(
546
  prompt_embeds=conditioning,
@@ -551,9 +366,9 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
551
  height=face_image.height,
552
  image_embeds=face_emb if face_detected else None,
553
  image=face_image,
554
- strength=1-image_strength, # Higher strength = more transformation
555
  control_image=control_images,
556
- num_inference_steps=num_inference_steps,
557
  guidance_scale=guidance_scale,
558
  controlnet_conditioning_scale=control_scales,
559
  ).images[0]
@@ -644,7 +459,7 @@ def get_civitai_safetensors(link):
644
  gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
645
  raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
646
  return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
647
-
648
  def check_custom_model(link):
649
  if(link.startswith("https://")):
650
  if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
@@ -686,12 +501,12 @@ with gr.Blocks(css="custom.css") as demo:
686
  gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
687
  title = gr.HTML(
688
  """<h1><img src="https://i.imgur.com/DVoGw04.png">
689
- <span>LucasArts Style - Enhanced Face Preservation<br><small style="
690
  font-size: 13px;
691
  display: block;
692
  font-weight: normal;
693
  opacity: 0.75;
694
- ">🔥 Improved: Better face identity preservation, Enhanced landscape mode, Multiple face support<br>AlbedoBase XL v2.1 + InstantID + ControlNet</small></span></h1>""",
695
  elem_id="title",
696
  )
697
  selected_state = gr.State()
@@ -700,7 +515,7 @@ with gr.Blocks(css="custom.css") as demo:
700
  with gr.Row(elem_id="main_app"):
701
  with gr.Column(scale=4, elem_id="box_column"):
702
  with gr.Group(elem_id="gallery_box"):
703
- photo = gr.Image(label="Upload a picture (with or without faces)", interactive=True, type="pil", height=300)
704
  selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
705
  gallery = gr.Gallery(
706
  label="LucasArts Style",
@@ -717,7 +532,7 @@ with gr.Blocks(css="custom.css") as demo:
717
  with gr.Column(scale=5):
718
  with gr.Row():
719
  prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
720
- info="Describe your subject or scene", value="a person", elem_id="prompt")
721
  button = gr.Button("Run", elem_id="run_button")
722
 
723
  result = ImageSlider(
@@ -730,32 +545,25 @@ with gr.Blocks(css="custom.css") as demo:
730
  share_button = gr.Button("Share to community", elem_id="share-btn")
731
 
732
  with gr.Accordion("Advanced options", open=False):
733
- use_multiple_faces = gr.Checkbox(
734
- label="Process multiple faces separately",
735
- value=False,
736
- info="Generate separate outputs for each detected face"
737
- )
738
  negative = gr.Textbox(label="Negative Prompt")
739
- weight = gr.Slider(0, 10, value=1.0, step=0.1, label="LoRA weight")
740
  face_strength = gr.Slider(
741
- 0, 2, value=1.0, step=0.01, label="Face identity strength",
742
- info="Higher = stronger face preservation (auto-adjusted for landscapes)"
743
  )
744
  image_strength = gr.Slider(
745
- 0, 1, value=0.15, step=0.01, label="Image structure strength",
746
- info="Lower = more transformation, Higher = more original structure"
747
  )
748
  guidance_scale = gr.Slider(
749
- 0, 50, value=7.5, step=0.1, label="Guidance Scale",
750
- info="How closely to follow the prompt"
751
  )
752
  depth_control_scale = gr.Slider(
753
- 0, 1, value=0.8, step=0.01, label="Depth ControlNet strength",
754
- info="3D structure preservation (auto-maximized for landscapes)"
755
  )
756
 
757
  prompt_title = gr.Markdown(
758
- value="### Click 'Run' to generate with LucasArts style",
759
  visible=True,
760
  elem_id="selected_lora",
761
  )
@@ -786,7 +594,7 @@ with gr.Blocks(css="custom.css") as demo:
786
  ).success(
787
  fn=run_lora,
788
  inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
789
- guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
790
  outputs=[result, share_group],
791
  )
792
 
@@ -797,7 +605,7 @@ with gr.Blocks(css="custom.css") as demo:
797
  ).success(
798
  fn=run_lora,
799
  inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
800
- guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
801
  outputs=[result, share_group],
802
  )
803
 
 
38
 
39
  from gradio_imageslider import ImageSlider
40
 
41
+ # Load LoRA configurations
42
  with open("sdxl_loras.json", "r") as file:
43
  data = json.load(file)
44
  sdxl_loras_raw = [
 
61
 
62
  with open("defaults_data.json", "r") as file:
63
  lora_defaults = json.load(file)
64
+
65
 
66
+ device = "cuda"
67
 
68
  # Cache for LoRA state dicts
69
  state_dicts = {}
 
81
  }
82
 
83
  sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
84
+
85
  # Download models
86
  hf_hub_download(
87
  repo_id="InstantX/InstantID",
 
112
  face_adapter = f'/data/checkpoints/ip-adapter.bin'
113
  controlnet_path = f'/data/checkpoints/ControlNetModel'
114
 
115
+ # Load IdentityNet
116
  st = time.time()
117
  identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
118
  zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
 
125
  print('Loading VAE took: ', et - st, 'seconds')
126
 
127
  st = time.time()
 
128
  pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
129
+ "SG161222/RealVisXL_V5.0",
130
  vae=vae,
131
  controlnet=[identitynet, zoedepthnet],
132
  torch_dtype=torch.float16
 
134
 
135
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
136
  pipe.load_ip_adapter_instantid(face_adapter)
137
+ pipe.set_ip_adapter_scale(0.9)
 
138
  et = time.time()
139
  print('Loading pipeline took: ', et - st, 'seconds')
140
 
 
159
  last_fused = False
160
  lora_archive = "/data"
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
163
  lora_repo = sdxl_loras[selected_state.index]["repo"]
164
  new_placeholder = "Type a prompt to use your selected LoRA"
165
  weight_name = sdxl_loras[selected_state.index]["weights"]
166
+ updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) {'(non-commercial LoRA, `cc-by-nc`)' if sdxl_loras[selected_state.index]['is_nc'] else '' }"
167
 
168
  for lora_list in lora_defaults:
169
  if lora_list["model"] == sdxl_loras[selected_state.index]["repo"]:
170
+ face_strength = lora_list.get("face_strength", 0.9)
171
+ image_strength = lora_list.get("image_strength", 0.2)
172
+ weight = lora_list.get("weight", 0.95)
173
  depth_control_scale = lora_list.get("depth_control_scale", 0.8)
174
  negative = lora_list.get("negative", "")
175
 
 
212
 
213
 
214
  def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
215
+ guidance_scale, depth_control_scale, sdxl_loras, custom_lora, progress=gr.Progress(track_tqdm=True)):
 
216
  """
217
+ Working version - matches old code exactly
218
  """
219
  print("Custom LoRA:", custom_lora)
220
  custom_lora_path = custom_lora[0] if custom_lora else None
 
223
  st = time.time()
224
  face_image = resize_image_aspect_ratio(face_image)
225
 
226
+ # Simple working face detection
227
+ face_detected = True
228
+ try:
229
+ face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
230
+ face_info = sorted(face_info, key=lambda x: (x['bbox'][2]-x['bbox'][0]) * (x['bbox'][3]-x['bbox'][1]))[-1]
231
+ face_emb = face_info['embedding']
232
+ face_kps = draw_kps(face_image, face_info['kps'])
233
+ except:
234
+ face_detected = False
 
 
 
 
235
  face_emb = None
236
  face_kps = face_image
 
237
 
238
  et = time.time()
239
  print('Face processing took:', et - st, 'seconds')
240
 
241
  st = time.time()
242
 
243
+ # Prompt processing
244
  if custom_lora_path and custom_lora[1]:
245
  prompt = f"{prompt} {custom_lora[1]}"
246
+ else:
247
+ if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
248
+ for lora_list in lora_defaults:
249
+ if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
250
+ prompt_full = lora_list.get("prompt", None)
251
+ if prompt_full:
252
+ prompt = prompt_full.replace("<subject>", prompt)
253
+
254
+ print("Prompt:", prompt)
 
 
 
 
 
255
  if prompt == "":
256
+ prompt = "a person"
257
+ print(f"Executing prompt: {prompt}")
258
 
259
  if negative == "":
260
+ negative = None
 
 
 
 
261
 
262
  print("Custom Loaded LoRA:", custom_lora_path)
263
 
 
266
  elif custom_lora_path:
267
  repo_name = custom_lora_path
268
  full_path_lora = custom_lora_path
 
 
 
269
  else:
270
+ if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
271
+ repo_name = sdxl_loras[selected_state_index]["repo"]
272
+ full_path_lora = state_dicts[repo_name]["saved_name"]
273
+ else:
274
+ raise gr.Error("Invalid selection")
275
 
276
  repo_name = repo_name.rstrip("/").lower()
277
 
 
280
  et = time.time()
281
  print('Prompt processing took:', et - st, 'seconds')
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  st = time.time()
284
 
 
285
  image = generate_image(
286
  prompt, negative, face_emb, face_image, face_kps, image_strength,
287
  guidance_scale, face_strength, depth_control_scale, repo_name,
 
293
  run_lora.zerogpu = True
294
 
295
 
296
+ @spaces.GPU(duration=75)
297
  def generate_image(prompt, negative, face_emb, face_image, face_kps, image_strength, guidance_scale,
298
  face_strength, depth_control_scale, repo_name, loaded_state_dict, lora_scale,
299
  sdxl_loras, selected_state_index, face_detected, st):
 
302
  print("Loaded state dict:", loaded_state_dict)
303
  print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
304
 
305
+ # Control images setup
306
+ control_images = [face_kps, zoe(face_image)] if face_detected else [zoe(face_image)]
307
+ control_scales = [face_strength, depth_control_scale] if face_detected else [depth_control_scale]
 
 
 
 
 
 
 
 
308
 
309
  # Handle custom LoRA from HuggingFace
310
  if repo_name.startswith("https://huggingface.co"):
 
321
  else:
322
  full_path_lora = loaded_state_dict
323
 
324
+ # LoRA loading
325
  if last_lora != repo_name:
326
  if last_fused:
327
  pipe.unfuse_lora()
328
  pipe.unload_lora_weights()
329
  pipe.unload_textual_inversion()
330
 
331
+ pipe.load_lora_weights(full_path_lora)
332
+ pipe.fuse_lora(lora_scale)
333
+ last_fused = True
334
+
335
+ # Handle pivotal tuning if needed
336
+ if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
337
+ is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
338
+ if is_pivotal:
339
+ text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
340
+ embedding_path = hf_hub_download(repo_id=repo_name, filename=text_embedding_name, repo_type="model")
341
+ state_dict_embedding = load_file(embedding_path)
342
+ pipe.load_textual_inversion(
343
+ state_dict_embedding["clip_l" if "clip_l" in state_dict_embedding else "text_encoders_0"],
344
+ token=["<s0>", "<s1>"],
345
+ text_encoder=pipe.text_encoder,
346
+ tokenizer=pipe.tokenizer
347
+ )
348
+ pipe.load_textual_inversion(
349
+ state_dict_embedding["clip_g" if "clip_g" in state_dict_embedding else "text_encoders_1"],
350
+ token=["<s0>", "<s1>"],
351
+ text_encoder=pipe.text_encoder_2,
352
+ tokenizer=pipe.tokenizer_2
353
+ )
 
 
 
 
 
 
 
 
354
 
355
  print("Processing prompt...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  conditioning, pooled = compel(prompt)
357
  negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
358
 
 
 
 
359
  print("Generating image...")
360
  image = pipe(
361
  prompt_embeds=conditioning,
 
366
  height=face_image.height,
367
  image_embeds=face_emb if face_detected else None,
368
  image=face_image,
369
+ strength=1-image_strength,
370
  control_image=control_images,
371
+ num_inference_steps=36,
372
  guidance_scale=guidance_scale,
373
  controlnet_conditioning_scale=control_scales,
374
  ).images[0]
 
459
  gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
460
  raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
461
  return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
462
+
463
  def check_custom_model(link):
464
  if(link.startswith("https://")):
465
  if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
 
501
  gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
502
  title = gr.HTML(
503
  """<h1><img src="https://i.imgur.com/DVoGw04.png">
504
+ <span>LucasArts Style<br><small style="
505
  font-size: 13px;
506
  display: block;
507
  font-weight: normal;
508
  opacity: 0.75;
509
+ ">🧨 diffusers InstantID + ControlNet</small></span></h1>""",
510
  elem_id="title",
511
  )
512
  selected_state = gr.State()
 
515
  with gr.Row(elem_id="main_app"):
516
  with gr.Column(scale=4, elem_id="box_column"):
517
  with gr.Group(elem_id="gallery_box"):
518
+ photo = gr.Image(label="Upload a picture", interactive=True, type="pil", height=300)
519
  selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
520
  gallery = gr.Gallery(
521
  label="LucasArts Style",
 
532
  with gr.Column(scale=5):
533
  with gr.Row():
534
  prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
535
+ info="Describe your subject", value="a person", elem_id="prompt")
536
  button = gr.Button("Run", elem_id="run_button")
537
 
538
  result = ImageSlider(
 
545
  share_button = gr.Button("Share to community", elem_id="share-btn")
546
 
547
  with gr.Accordion("Advanced options", open=False):
 
 
 
 
 
548
  negative = gr.Textbox(label="Negative Prompt")
549
+ weight = gr.Slider(0, 10, value=0.95, step=0.1, label="LoRA weight")
550
  face_strength = gr.Slider(
551
+ 0, 2, value=0.9, step=0.01, label="Face strength",
552
+ info="Higher values increase face likeness"
553
  )
554
  image_strength = gr.Slider(
555
+ 0, 1, value=0.20, step=0.01, label="Image strength",
556
+ info="Higher values preserve more of the original structure"
557
  )
558
  guidance_scale = gr.Slider(
559
+ 0, 50, value=8, step=0.1, label="Guidance Scale"
 
560
  )
561
  depth_control_scale = gr.Slider(
562
+ 0, 1, value=0.8, step=0.01, label="Zoe Depth ControlNet strength"
 
563
  )
564
 
565
  prompt_title = gr.Markdown(
566
+ value="### Click on a LoRA in the gallery to select it",
567
  visible=True,
568
  elem_id="selected_lora",
569
  )
 
594
  ).success(
595
  fn=run_lora,
596
  inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
597
+ guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
598
  outputs=[result, share_group],
599
  )
600
 
 
605
  ).success(
606
  fn=run_lora,
607
  inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
608
+ guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
609
  outputs=[result, share_group],
610
  )
611