face-to-art

Runtime error

App Files Files Community

primerz commited on 28 days ago

Commit

6590ed4

verified ·

1 Parent(s): b8a464d

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -64

app.py CHANGED Viewed

@@ -61,9 +61,8 @@ with open("sdxl_loras.json", "r") as file:
 with open("defaults_data.json", "r") as file:
     lora_defaults = json.load(file)
-device = "cuda"
 # Cache for LoRA state dicts
 state_dicts = {}
@@ -81,7 +80,7 @@ for item in sdxl_loras_raw:
     }
 sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
 # Download models
 hf_hub_download(
     repo_id="InstantX/InstantID",
@@ -112,7 +111,6 @@ app.prepare(ctx_id=0, det_size=(768, 768))
 face_adapter = f'/data/checkpoints/ip-adapter.bin'
 controlnet_path = f'/data/checkpoints/ControlNetModel'
-# Load IdentityNet
 st = time.time()
 identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
@@ -159,6 +157,101 @@ last_lora = ""
 last_fused = False
 lora_archive = "/data"
 def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
     lora_repo = sdxl_loras[selected_state.index]["repo"]
     new_placeholder = "Type a prompt to use your selected LoRA"
@@ -212,9 +305,13 @@ def resize_image_aspect_ratio(img, max_dim=1280):
 def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
-             guidance_scale, depth_control_scale, sdxl_loras, custom_lora, progress=gr.Progress(track_tqdm=True)):
     """
-    Working version - matches old code exactly
     """
     print("Custom LoRA:", custom_lora)
     custom_lora_path = custom_lora[0] if custom_lora else None
@@ -223,41 +320,42 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
     st = time.time()
     face_image = resize_image_aspect_ratio(face_image)
-    # Simple working face detection
-    face_detected = True
-    try:
-        face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
-        face_info = sorted(face_info, key=lambda x: (x['bbox'][2]-x['bbox'][0]) * (x['bbox'][3]-x['bbox'][1]))[-1]
-        face_emb = face_info['embedding']
-        face_kps = draw_kps(face_image, face_info['kps'])
-    except:
-        face_detected = False
         face_emb = None
         face_kps = face_image
     et = time.time()
     print('Face processing took:', et - st, 'seconds')
     st = time.time()
-    # Prompt processing
     if custom_lora_path and custom_lora[1]:
         prompt = f"{prompt} {custom_lora[1]}"
     else:
-        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
-            for lora_list in lora_defaults:
-                if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
-                    prompt_full = lora_list.get("prompt", None)
-                    if prompt_full:
-                        prompt = prompt_full.replace("<subject>", prompt)
     print("Prompt:", prompt)
     if prompt == "":
-        prompt = "a person"
     print(f"Executing prompt: {prompt}")
     if negative == "":
-        negative = None
     print("Custom Loaded LoRA:", custom_lora_path)
@@ -267,11 +365,8 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
         repo_name = custom_lora_path
         full_path_lora = custom_lora_path
     else:
-        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
-            repo_name = sdxl_loras[selected_state_index]["repo"]
-            full_path_lora = state_dicts[repo_name]["saved_name"]
-        else:
-            raise gr.Error("Invalid selection")
     repo_name = repo_name.rstrip("/").lower()
@@ -280,8 +375,15 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
     et = time.time()
     print('Prompt processing took:', et - st, 'seconds')
     st = time.time()
     image = generate_image(
         prompt, negative, face_emb, face_image, face_kps, image_strength,
         guidance_scale, face_strength, depth_control_scale, repo_name,
@@ -302,9 +404,14 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
     print("Loaded state dict:", loaded_state_dict)
     print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
-    # Control images setup
-    control_images = [face_kps, zoe(face_image)] if face_detected else [zoe(face_image)]
-    control_scales = [face_strength, depth_control_scale] if face_detected else [depth_control_scale]
     # Handle custom LoRA from HuggingFace
     if repo_name.startswith("https://huggingface.co"):
@@ -321,19 +428,20 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
     else:
         full_path_lora = loaded_state_dict
-    # LoRA loading
     if last_lora != repo_name:
         if last_fused:
             pipe.unfuse_lora()
             pipe.unload_lora_weights()
             pipe.unload_textual_inversion()
-        pipe.load_lora_weights(full_path_lora)
-        pipe.fuse_lora(lora_scale)
-        last_fused = True
-        # Handle pivotal tuning if needed
-        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
             is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
             if is_pivotal:
                 text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
@@ -351,11 +459,17 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
                     text_encoder=pipe.text_encoder_2,
                     tokenizer=pipe.tokenizer_2
                 )
     print("Processing prompt...")
     conditioning, pooled = compel(prompt)
     negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
     print("Generating image...")
     image = pipe(
         prompt_embeds=conditioning,
@@ -368,7 +482,7 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
         image=face_image,
         strength=1-image_strength,
         control_image=control_images,
-        num_inference_steps=36,
         guidance_scale=guidance_scale,
         controlnet_conditioning_scale=control_scales,
     ).images[0]
@@ -459,7 +573,7 @@ def get_civitai_safetensors(link):
         gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
         raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
     return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
 def check_custom_model(link):
     if(link.startswith("https://")):
         if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
@@ -501,12 +615,12 @@ with gr.Blocks(css="custom.css") as demo:
     gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
     title = gr.HTML(
         """<h1><img src="https://i.imgur.com/DVoGw04.png">
-<span>LucasArts Style<br><small style="
     font-size: 13px;
     display: block;
     font-weight: normal;
     opacity: 0.75;
-">🧨 diffusers InstantID + ControlNet</small></span></h1>""",
         elem_id="title",
     )
     selected_state = gr.State()
@@ -515,10 +629,10 @@ with gr.Blocks(css="custom.css") as demo:
     with gr.Row(elem_id="main_app"):
         with gr.Column(scale=4, elem_id="box_column"):
             with gr.Group(elem_id="gallery_box"):
-                photo = gr.Image(label="Upload a picture", interactive=True, type="pil", height=300)
                 selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
                 gallery = gr.Gallery(
-                    label="LucasArts Style",
                     allow_preview=False,
                     columns=4,
                     elem_id="gallery",
@@ -532,7 +646,7 @@ with gr.Blocks(css="custom.css") as demo:
         with gr.Column(scale=5):
             with gr.Row():
                 prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
-                                   info="Describe your subject", value="a person", elem_id="prompt")
                 button = gr.Button("Run", elem_id="run_button")
             result = ImageSlider(
@@ -545,22 +659,15 @@ with gr.Blocks(css="custom.css") as demo:
                 share_button = gr.Button("Share to community", elem_id="share-btn")
             with gr.Accordion("Advanced options", open=False):
                 negative = gr.Textbox(label="Negative Prompt")
-                weight = gr.Slider(0, 10, value=0.95, step=0.1, label="LoRA weight")
-                face_strength = gr.Slider(
-                    0, 2, value=0.9, step=0.01, label="Face strength",
-                    info="Higher values increase face likeness"
-                )
-                image_strength = gr.Slider(
-                    0, 1, value=0.20, step=0.01, label="Image strength",
-                    info="Higher values preserve more of the original structure"
-                )
-                guidance_scale = gr.Slider(
-                    0, 50, value=8, step=0.1, label="Guidance Scale"
-                )
-                depth_control_scale = gr.Slider(
-                    0, 1, value=0.8, step=0.01, label="Zoe Depth ControlNet strength"
-                )
             prompt_title = gr.Markdown(
                 value="### Click on a LoRA in the gallery to select it",
@@ -594,7 +701,7 @@ with gr.Blocks(css="custom.css") as demo:
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
-               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
         outputs=[result, share_group],
     )
@@ -605,7 +712,7 @@ with gr.Blocks(css="custom.css") as demo:
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
-               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
         outputs=[result, share_group],
     )

 with open("defaults_data.json", "r") as file:
     lora_defaults = json.load(file)
+device = "cuda"
 # Cache for LoRA state dicts
 state_dicts = {}
     }
 sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
 # Download models
 hf_hub_download(
     repo_id="InstantX/InstantID",
 face_adapter = f'/data/checkpoints/ip-adapter.bin'
 controlnet_path = f'/data/checkpoints/ControlNetModel'
 st = time.time()
 identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
 last_fused = False
 lora_archive = "/data"
+# Improved face detection with multi-face support
+def detect_faces(face_image, use_multiple_faces=False):
+    """
+    Detect faces in the image
+    Returns: list of face info dictionaries, or empty list if no faces
+    """
+    try:
+        face_info_list = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
+        if not face_info_list or len(face_info_list) == 0:
+            print("No faces detected")
+            return []
+        # Sort faces by size (largest first)
+        face_info_list = sorted(
+            face_info_list,
+            key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]),
+            reverse=True
+        )
+        if use_multiple_faces:
+            print(f"Detected {len(face_info_list)} faces")
+            return face_info_list
+        else:
+            print(f"Using largest face (detected {len(face_info_list)} total)")
+            return [face_info_list[0]]
+    except Exception as e:
+        print(f"Face detection error: {e}")
+        return []
+def process_face_embeddings(face_info_list):
+    """
+    Process face embeddings - average multiple faces or return single face
+    """
+    if not face_info_list:
+        return None
+    if len(face_info_list) == 1:
+        return face_info_list[0]['embedding']
+    # Average embeddings for multiple faces
+    embeddings = [face_info['embedding'] for face_info in face_info_list]
+    avg_embedding = np.mean(embeddings, axis=0)
+    return avg_embedding
+def create_face_kps_image(face_image, face_info_list):
+    """
+    Create keypoints image from face info
+    """
+    if not face_info_list:
+        return face_image
+    # For multiple faces, draw all keypoints
+    if len(face_info_list) > 1:
+        return draw_multiple_kps(face_image, [f['kps'] for f in face_info_list])
+    else:
+        return draw_kps(face_image, face_info_list[0]['kps'])
+def draw_multiple_kps(image_pil, kps_list, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
+    """
+    Draw keypoints for multiple faces
+    """
+    stickwidth = 4
+    limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
+    w, h = image_pil.size
+    out_img = np.zeros([h, w, 3])
+    for kps in kps_list:
+        kps = np.array(kps)
+        for i in range(len(limbSeq)):
+            index = limbSeq[i]
+            color = color_list[index[0]]
+            x = kps[index][:, 0]
+            y = kps[index][:, 1]
+            length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
+            angle = np.degrees(np.arctan2(y[0] - y[1], x[0] - x[1]))
+            polygon = cv2.ellipse2Poly(
+                (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
+            )
+            out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
+        out_img = (out_img * 0.6).astype(np.uint8)
+        for idx_kp, kp in enumerate(kps):
+            color = color_list[idx_kp]
+            x, y = kp
+            out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
+    out_img_pil = Image.fromarray(out_img.astype(np.uint8))
+    return out_img_pil
 def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
     lora_repo = sdxl_loras[selected_state.index]["repo"]
     new_placeholder = "Type a prompt to use your selected LoRA"
 def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
+             guidance_scale, depth_control_scale, sdxl_loras, custom_lora, use_multiple_faces=False,
+             progress=gr.Progress(track_tqdm=True)):
     """
+    Enhanced run_lora with support for:
+    - No faces (landscape mode)
+    - Multiple faces
+    - Improved results
     """
     print("Custom LoRA:", custom_lora)
     custom_lora_path = custom_lora[0] if custom_lora else None
     st = time.time()
     face_image = resize_image_aspect_ratio(face_image)
+    # Enhanced face detection
+    face_info_list = detect_faces(face_image, use_multiple_faces)
+    face_detected = len(face_info_list) > 0
+    if face_detected:
+        face_emb = process_face_embeddings(face_info_list)
+        face_kps = create_face_kps_image(face_image, face_info_list)
+        print(f"Processing with {len(face_info_list)} face(s)")
+    else:
         face_emb = None
         face_kps = face_image
+        print("No faces detected - using landscape/depth mode only")
     et = time.time()
     print('Face processing took:', et - st, 'seconds')
     st = time.time()
+    # Enhanced prompt processing
     if custom_lora_path and custom_lora[1]:
         prompt = f"{prompt} {custom_lora[1]}"
     else:
+        for lora_list in lora_defaults:
+            if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
+                prompt_full = lora_list.get("prompt", None)
+                if prompt_full:
+                    prompt = prompt_full.replace("<subject>", prompt)
     print("Prompt:", prompt)
     if prompt == "":
+        prompt = "a beautiful scene" if not face_detected else "a person"
     print(f"Executing prompt: {prompt}")
     if negative == "":
+        # Enhanced negative prompt for better quality
+        negative = "worst quality, low quality, blurry, distorted, deformed" if not face_detected else None
     print("Custom Loaded LoRA:", custom_lora_path)
         repo_name = custom_lora_path
         full_path_lora = custom_lora_path
     else:
+        repo_name = sdxl_loras[selected_state_index]["repo"]
+        full_path_lora = state_dicts[repo_name]["saved_name"]
     repo_name = repo_name.rstrip("/").lower()
     et = time.time()
     print('Prompt processing took:', et - st, 'seconds')
+    # Adjust parameters based on face detection
+    if not face_detected:
+        # For landscape/no face mode, reduce face strength and increase depth control
+        face_strength = 0.0
+        depth_control_scale = max(depth_control_scale, 0.9)
+        image_strength = min(image_strength, 0.4)
+        print("Adjusted parameters for no-face mode")
     st = time.time()
     image = generate_image(
         prompt, negative, face_emb, face_image, face_kps, image_strength,
         guidance_scale, face_strength, depth_control_scale, repo_name,
     print("Loaded state dict:", loaded_state_dict)
     print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
+    # Prepare control images and scales based on face detection
+    if face_detected:
+        control_images = [face_kps, zoe(face_image)]
+        control_scales = [face_strength, depth_control_scale]
+    else:
+        # Only use depth control for landscapes
+        control_images = [zoe(face_image)]
+        control_scales = [depth_control_scale]
     # Handle custom LoRA from HuggingFace
     if repo_name.startswith("https://huggingface.co"):
     else:
         full_path_lora = loaded_state_dict
+    # Improved LoRA loading and caching
     if last_lora != repo_name:
         if last_fused:
             pipe.unfuse_lora()
             pipe.unload_lora_weights()
             pipe.unload_textual_inversion()
+        # Load LoRA with better error handling
+        try:
+            pipe.load_lora_weights(full_path_lora)
+            pipe.fuse_lora(lora_scale)
+            last_fused = True
+            # Handle pivotal tuning embeddings
             is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
             if is_pivotal:
                 text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
                     text_encoder=pipe.text_encoder_2,
                     tokenizer=pipe.tokenizer_2
                 )
+        except Exception as e:
+            print(f"Error loading LoRA: {e}")
+            raise gr.Error(f"Failed to load LoRA: {str(e)}")
     print("Processing prompt...")
     conditioning, pooled = compel(prompt)
     negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
+    # Enhanced generation parameters
+    num_inference_steps = 40  # Increased for better quality
     print("Generating image...")
     image = pipe(
         prompt_embeds=conditioning,
         image=face_image,
         strength=1-image_strength,
         control_image=control_images,
+        num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
         controlnet_conditioning_scale=control_scales,
     ).images[0]
         gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
         raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
     return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
 def check_custom_model(link):
     if(link.startswith("https://")):
         if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
     gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
     title = gr.HTML(
         """<h1><img src="https://i.imgur.com/DVoGw04.png">
+<span>Face to All - Enhanced<br><small style="
     font-size: 13px;
     display: block;
     font-weight: normal;
     opacity: 0.75;
+">🔥 Supports: No faces (landscape), Multiple faces, Improved quality, Custom LoRAs<br> diffusers InstantID + ControlNet</small></span></h1>""",
         elem_id="title",
     )
     selected_state = gr.State()
     with gr.Row(elem_id="main_app"):
         with gr.Column(scale=4, elem_id="box_column"):
             with gr.Group(elem_id="gallery_box"):
+                photo = gr.Image(label="Upload a picture (with or without faces)", interactive=True, type="pil", height=300)
                 selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
                 gallery = gr.Gallery(
+                    label="Pick a style from the gallery",
                     allow_preview=False,
                     columns=4,
                     elem_id="gallery",
         with gr.Column(scale=5):
             with gr.Row():
                 prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
+                                   info="Describe your subject or scene", value="a person", elem_id="prompt")
                 button = gr.Button("Run", elem_id="run_button")
             result = ImageSlider(
                 share_button = gr.Button("Share to community", elem_id="share-btn")
             with gr.Accordion("Advanced options", open=False):
+                use_multiple_faces = gr.Checkbox(label="Use multiple faces (if detected)", value=False)
                 negative = gr.Textbox(label="Negative Prompt")
+                weight = gr.Slider(0, 10, value=0.9, step=0.1, label="LoRA weight")
+                face_strength = gr.Slider(0, 2, value=0.9, step=0.01, label="Face strength",
+                                         info="Higher values increase face likeness (auto-adjusted for no-face images)")
+                image_strength = gr.Slider(0, 1, value=0.20, step=0.01, label="Image strength",
+                                          info="Higher values increase similarity with original structure/colors")
+                guidance_scale = gr.Slider(0, 50, value=8, step=0.1, label="Guidance Scale")
+                depth_control_scale = gr.Slider(0, 1, value=0.8, step=0.01, label="Zoe Depth ControlNet strength")
             prompt_title = gr.Markdown(
                 value="### Click on a LoRA in the gallery to select it",
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
+               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
         outputs=[result, share_group],
     )
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
+               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
         outputs=[result, share_group],
     )