face-to-art

Runtime error

App Files Files Community

primerz commited on 29 days ago

Commit

b8a464d

verified ·

1 Parent(s): 12fc679

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -277

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ from compel import Compel, ReturnedEmbeddingsType
 from gradio_imageslider import ImageSlider
-# Load LoRA configurations - now only LucasArts style
 with open("sdxl_loras.json", "r") as file:
     data = json.load(file)
     sdxl_loras_raw = [
@@ -61,8 +61,9 @@ with open("sdxl_loras.json", "r") as file:
 with open("defaults_data.json", "r") as file:
     lora_defaults = json.load(file)
-device = "cuda"
 # Cache for LoRA state dicts
 state_dicts = {}
@@ -80,7 +81,7 @@ for item in sdxl_loras_raw:
     }
 sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
 # Download models
 hf_hub_download(
     repo_id="InstantX/InstantID",
@@ -111,6 +112,7 @@ app.prepare(ctx_id=0, det_size=(768, 768))
 face_adapter = f'/data/checkpoints/ip-adapter.bin'
 controlnet_path = f'/data/checkpoints/ControlNetModel'
 st = time.time()
 identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
@@ -123,9 +125,8 @@ et = time.time()
 print('Loading VAE took: ', et - st, 'seconds')
 st = time.time()
-# CHANGED: Using AlbedoBase XL v2.1 for better quality
 pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
-    "frankjoshua/albedobaseXL_v21",
     vae=vae,
     controlnet=[identitynet, zoedepthnet],
     torch_dtype=torch.float16
@@ -133,8 +134,7 @@ pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
 pipe.load_ip_adapter_instantid(face_adapter)
-# IMPROVED: Higher IP adapter scale for better face preservation
-pipe.set_ip_adapter_scale(1.0)
 et = time.time()
 print('Loading pipeline took: ', et - st, 'seconds')
@@ -159,123 +159,17 @@ last_lora = ""
 last_fused = False
 lora_archive = "/data"
-# Enhanced face detection with better face quality filtering
-def detect_faces(face_image, use_multiple_faces=False):
-    """
-    Detect faces in the image with quality filtering
-    Returns: list of face info dictionaries, or empty list if no faces
-    """
-    try:
-        face_info_list = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
-        if not face_info_list or len(face_info_list) == 0:
-            print("No faces detected")
-            return []
-        # Filter faces by quality score if available
-        filtered_faces = []
-        for face_info in face_info_list:
-            # Check if face has minimum quality
-            if 'det_score' in face_info and face_info['det_score'] > 0.5:
-                filtered_faces.append(face_info)
-            elif 'det_score' not in face_info:
-                filtered_faces.append(face_info)
-        if not filtered_faces:
-            print("No high-quality faces detected")
-            return []
-        # Sort faces by size (largest first)
-        filtered_faces = sorted(
-            filtered_faces,
-            key=lambda x: (x['bbox'][2] - x['bbox'][0]) * (x['bbox'][3] - x['bbox'][1]),
-            reverse=True
-        )
-        if use_multiple_faces:
-            print(f"Detected {len(filtered_faces)} high-quality faces")
-            return filtered_faces
-        else:
-            print(f"Using largest face (detected {len(filtered_faces)} total)")
-            return [filtered_faces[0]]
-    except Exception as e:
-        print(f"Face detection error: {e}")
-        return []
-def process_face_embeddings_separately(face_info_list):
-    """
-    Process face embeddings separately for multi-face generation
-    Returns: list of individual face embeddings
-    """
-    if not face_info_list:
-        return []
-    embeddings = [face_info['embedding'] for face_info in face_info_list]
-    return embeddings
-def create_face_kps_image(face_image, face_info_list):
-    """
-    Create keypoints image from face info with enhanced visibility
-    """
-    if not face_info_list:
-        return face_image
-    # For multiple faces, draw all keypoints with different colors
-    if len(face_info_list) > 1:
-        return draw_multiple_kps(face_image, [f['kps'] for f in face_info_list])
-    else:
-        return draw_kps(face_image, face_info_list[0]['kps'])
-def draw_multiple_kps(image_pil, kps_list, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
-    """
-    Draw keypoints for multiple faces with enhanced visibility
-    """
-    stickwidth = 4
-    limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
-    w, h = image_pil.size
-    out_img = np.zeros([h, w, 3])
-    for idx, kps in enumerate(kps_list):
-        kps = np.array(kps)
-        # Use different colors for different faces
-        color_offset = idx % len(color_list)
-        for i in range(len(limbSeq)):
-            index = limbSeq[i]
-            color = color_list[(index[0] + color_offset) % len(color_list)]
-            x = kps[index][:, 0]
-            y = kps[index][:, 1]
-            length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
-            angle = np.degrees(np.arctan2(y[0] - y[1], x[0] - x[1]))
-            polygon = cv2.ellipse2Poly(
-                (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
-            )
-            out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
-        out_img = (out_img * 0.6).astype(np.uint8)
-        for idx_kp, kp in enumerate(kps):
-            color = color_list[(idx_kp + color_offset) % len(color_list)]
-            x, y = kp
-            out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
-    out_img_pil = Image.fromarray(out_img.astype(np.uint8))
-    return out_img_pil
 def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
     lora_repo = sdxl_loras[selected_state.index]["repo"]
     new_placeholder = "Type a prompt to use your selected LoRA"
     weight_name = sdxl_loras[selected_state.index]["weights"]
-    updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) âœ¨ {'(non-commercial LoRA, `cc-by-nc`)' if sdxl_loras[selected_state.index]['is_nc'] else '' }"
     for lora_list in lora_defaults:
         if lora_list["model"] == sdxl_loras[selected_state.index]["repo"]:
-            face_strength = lora_list.get("face_strength", 1.0)
-            image_strength = lora_list.get("image_strength", 0.15)
-            weight = lora_list.get("weight", 1.0)
             depth_control_scale = lora_list.get("depth_control_scale", 0.8)
             negative = lora_list.get("negative", "")
@@ -318,10 +212,9 @@ def resize_image_aspect_ratio(img, max_dim=1280):
 def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
-             guidance_scale, depth_control_scale, sdxl_loras, custom_lora, use_multiple_faces=False,
-             progress=gr.Progress(track_tqdm=True)):
     """
-    Enhanced run_lora with improved face preservation and landscape mode
     """
     print("Custom LoRA:", custom_lora)
     custom_lora_path = custom_lora[0] if custom_lora else None
@@ -330,55 +223,41 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
     st = time.time()
     face_image = resize_image_aspect_ratio(face_image)
-    # Enhanced face detection
-    face_info_list = detect_faces(face_image, use_multiple_faces)
-    face_detected = len(face_info_list) > 0
-    if face_detected:
-        # CHANGED: Process faces separately instead of averaging
-        face_embeddings = process_face_embeddings_separately(face_info_list)
-        face_kps = create_face_kps_image(face_image, face_info_list)
-        print(f"Processing with {len(face_info_list)} face(s) separately")
-        # For multiple faces, we'll generate with the primary face (largest)
-        face_emb = face_embeddings[0]
-    else:
         face_emb = None
         face_kps = face_image
-        print("No faces detected - using enhanced landscape/depth mode")
     et = time.time()
     print('Face processing took:', et - st, 'seconds')
     st = time.time()
-    # Enhanced prompt processing
     if custom_lora_path and custom_lora[1]:
         prompt = f"{prompt} {custom_lora[1]}"
-    elif selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
-        # Only apply default prompt template if we have a valid selection
-        for lora_list in lora_defaults:
-            if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
-                prompt_full = lora_list.get("prompt", None)
-                if prompt_full:
-                    prompt = prompt_full.replace("<subject>", prompt)
-                    break  # Found the matching template
-    # Add LucasArts trigger word if not present (check for both variations)
-    if "lucasarts" not in prompt.lower():
-        prompt = f"{prompt}, lucasarts artstyle"
-    print("Constructed prompt:", prompt)
     if prompt == "":
-        prompt = "a beautiful cinematic scene" if not face_detected else "a person in cinematic lighting"
-    print(f"Final prompt to execute: {prompt}")
     if negative == "":
-        # Enhanced negative prompt
-        if not face_detected:
-            negative = "worst quality, low quality, blurry, distorted, deformed, ugly, bad anatomy"
-        else:
-            negative = "worst quality, low quality, blurry, distorted, deformed, ugly, bad anatomy, bad proportions"
     print("Custom Loaded LoRA:", custom_lora_path)
@@ -387,11 +266,12 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
     elif custom_lora_path:
         repo_name = custom_lora_path
         full_path_lora = custom_lora_path
-    elif selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
-        repo_name = sdxl_loras[selected_state_index]["repo"]
-        full_path_lora = state_dicts[repo_name]["saved_name"]
     else:
-        raise gr.Error("Invalid style selection. Please select a style again.")
     repo_name = repo_name.rstrip("/").lower()
@@ -400,22 +280,8 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
     et = time.time()
     print('Prompt processing took:', et - st, 'seconds')
-    # IMPROVED: Better parameter adjustment for face/landscape modes
-    if not face_detected:
-        # Enhanced landscape mode parameters
-        face_strength = 0.0
-        depth_control_scale = 1.0  # Maximum depth control for landscapes
-        image_strength = 0.25  # Higher structure preservation
-        print("Adjusted parameters for enhanced landscape mode")
-    else:
-        # Enhanced face preservation
-        face_strength = max(face_strength, 1.0)  # Ensure strong face preservation
-        depth_control_scale = max(depth_control_scale, 0.8)  # Good depth control
-        print("Adjusted parameters for enhanced face preservation")
     st = time.time()
-    # Generate single image with best face (or landscape)
     image = generate_image(
         prompt, negative, face_emb, face_image, face_kps, image_strength,
         guidance_scale, face_strength, depth_control_scale, repo_name,
@@ -427,7 +293,7 @@ def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_stre
 run_lora.zerogpu = True
-@spaces.GPU(duration=90)  # Increased duration for better quality
 def generate_image(prompt, negative, face_emb, face_image, face_kps, image_strength, guidance_scale,
                    face_strength, depth_control_scale, repo_name, loaded_state_dict, lora_scale,
                    sdxl_loras, selected_state_index, face_detected, st):
@@ -436,17 +302,9 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
     print("Loaded state dict:", loaded_state_dict)
     print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
-    # IMPROVED: Better control image preparation
-    depth_image = zoe(face_image)
-    if face_detected:
-        # Face mode: use both face keypoints and depth
-        control_images = [face_kps, depth_image]
-        control_scales = [face_strength, depth_control_scale]
-    else:
-        # Landscape mode: only depth control with enhanced parameters
-        control_images = [depth_image]
-        control_scales = [depth_control_scale]
     # Handle custom LoRA from HuggingFace
     if repo_name.startswith("https://huggingface.co"):
@@ -463,84 +321,41 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
     else:
         full_path_lora = loaded_state_dict
-    # Improved LoRA loading and caching
     if last_lora != repo_name:
         if last_fused:
             pipe.unfuse_lora()
             pipe.unload_lora_weights()
             pipe.unload_textual_inversion()
-        # Load LoRA with better error handling
-        try:
-            pipe.load_lora_weights(full_path_lora)
-            pipe.fuse_lora(lora_scale=lora_scale)
-            last_fused = True
-            # Handle pivotal tuning embeddings (if needed for future LoRAs)
-            # Only check this if we're not using a custom LoRA and have a valid index
-            if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
-                is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
-                if is_pivotal:
-                    text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
-                    embedding_path = hf_hub_download(repo_id=repo_name, filename=text_embedding_name, repo_type="model")
-                    state_dict_embedding = load_file(embedding_path)
-                    pipe.load_textual_inversion(
-                        state_dict_embedding["clip_l" if "clip_l" in state_dict_embedding else "text_encoders_0"],
-                        token=["<s0>", "<s1>"],
-                        text_encoder=pipe.text_encoder,
-                        tokenizer=pipe.tokenizer
-                    )
-                    pipe.load_textual_inversion(
-                        state_dict_embedding["clip_g" if "clip_g" in state_dict_embedding else "text_encoders_1"],
-                        token=["<s0>", "<s1>"],
-                        text_encoder=pipe.text_encoder_2,
-                        tokenizer=pipe.tokenizer_2
-                    )
-        except Exception as e:
-            print(f"Error loading LoRA: {e}")
-            import traceback
-            traceback.print_exc()
-            raise gr.Error(f"Failed to load LoRA: {str(e)}")
     print("Processing prompt...")
-    # Truncate prompts if they're too long for the tokenizer
-    # CLIP tokenizers have a max length of 77 tokens
-    def truncate_prompt(text, max_length=75):
-        """Truncate prompt to fit within token limits, leaving room for special tokens"""
-        if not text:
-            return text
-        try:
-            tokens = pipe.tokenizer(text, truncation=False, add_special_tokens=False)['input_ids']
-            if len(tokens) > max_length:
-                # Tokenize with truncation
-                truncated_text = pipe.tokenizer.decode(tokens[:max_length], skip_special_tokens=True)
-                print(f"Warning: Prompt truncated from {len(tokens)} to {max_length} tokens")
-                print(f"  Original: {text}")
-                print(f"  Truncated: {truncated_text}")
-                return truncated_text
-            return text
-        except Exception as e:
-            print(f"Warning: Could not truncate prompt, using as-is: {e}")
-            return text
-    prompt = truncate_prompt(prompt)
-    negative = truncate_prompt(negative) if negative else ""
-    try:
-        prompt_token_count = len(pipe.tokenizer(prompt)['input_ids'])
-        negative_token_count = len(pipe.tokenizer(negative)['input_ids']) if negative else 0
-        print(f"Prompt token count: {prompt_token_count}/77")
-        print(f"Negative prompt token count: {negative_token_count}/77")
-    except Exception as e:
-        print(f"Could not count tokens: {e}")
     conditioning, pooled = compel(prompt)
     negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
-    # IMPROVED: Enhanced generation parameters for better quality
-    num_inference_steps = 50  # Increased for better quality
     print("Generating image...")
     image = pipe(
         prompt_embeds=conditioning,
@@ -551,9 +366,9 @@ def generate_image(prompt, negative, face_emb, face_image, face_kps, image_stren
         height=face_image.height,
         image_embeds=face_emb if face_detected else None,
         image=face_image,
-        strength=1-image_strength,  # Higher strength = more transformation
         control_image=control_images,
-        num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
         controlnet_conditioning_scale=control_scales,
     ).images[0]
@@ -644,7 +459,7 @@ def get_civitai_safetensors(link):
         gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
         raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
     return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
 def check_custom_model(link):
     if(link.startswith("https://")):
         if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
@@ -686,12 +501,12 @@ with gr.Blocks(css="custom.css") as demo:
     gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
     title = gr.HTML(
         """<h1><img src="https://i.imgur.com/DVoGw04.png">
-<span>LucasArts Style - Enhanced Face Preservation<br><small style="
     font-size: 13px;
     display: block;
     font-weight: normal;
     opacity: 0.75;
-">ðŸ”¥ Improved: Better face identity preservation, Enhanced landscape mode, Multiple face support<br>AlbedoBase XL v2.1 + InstantID + ControlNet</small></span></h1>""",
         elem_id="title",
     )
     selected_state = gr.State()
@@ -700,7 +515,7 @@ with gr.Blocks(css="custom.css") as demo:
     with gr.Row(elem_id="main_app"):
         with gr.Column(scale=4, elem_id="box_column"):
             with gr.Group(elem_id="gallery_box"):
-                photo = gr.Image(label="Upload a picture (with or without faces)", interactive=True, type="pil", height=300)
                 selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
                 gallery = gr.Gallery(
                     label="LucasArts Style",
@@ -717,7 +532,7 @@ with gr.Blocks(css="custom.css") as demo:
         with gr.Column(scale=5):
             with gr.Row():
                 prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
-                                   info="Describe your subject or scene", value="a person", elem_id="prompt")
                 button = gr.Button("Run", elem_id="run_button")
             result = ImageSlider(
@@ -730,32 +545,25 @@ with gr.Blocks(css="custom.css") as demo:
                 share_button = gr.Button("Share to community", elem_id="share-btn")
             with gr.Accordion("Advanced options", open=False):
-                use_multiple_faces = gr.Checkbox(
-                    label="Process multiple faces separately",
-                    value=False,
-                    info="Generate separate outputs for each detected face"
-                )
                 negative = gr.Textbox(label="Negative Prompt")
-                weight = gr.Slider(0, 10, value=1.0, step=0.1, label="LoRA weight")
                 face_strength = gr.Slider(
-                    0, 2, value=1.0, step=0.01, label="Face identity strength",
-                    info="Higher = stronger face preservation (auto-adjusted for landscapes)"
                 )
                 image_strength = gr.Slider(
-                    0, 1, value=0.15, step=0.01, label="Image structure strength",
-                    info="Lower = more transformation, Higher = more original structure"
                 )
                 guidance_scale = gr.Slider(
-                    0, 50, value=7.5, step=0.1, label="Guidance Scale",
-                    info="How closely to follow the prompt"
                 )
                 depth_control_scale = gr.Slider(
-                    0, 1, value=0.8, step=0.01, label="Depth ControlNet strength",
-                    info="3D structure preservation (auto-maximized for landscapes)"
                 )
             prompt_title = gr.Markdown(
-                value="### Click 'Run' to generate with LucasArts style",
                 visible=True,
                 elem_id="selected_lora",
             )
@@ -786,7 +594,7 @@ with gr.Blocks(css="custom.css") as demo:
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
-               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
         outputs=[result, share_group],
     )
@@ -797,7 +605,7 @@ with gr.Blocks(css="custom.css") as demo:
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
-               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora, use_multiple_faces],
         outputs=[result, share_group],
     )

 from gradio_imageslider import ImageSlider
+# Load LoRA configurations
 with open("sdxl_loras.json", "r") as file:
     data = json.load(file)
     sdxl_loras_raw = [
 with open("defaults_data.json", "r") as file:
     lora_defaults = json.load(file)
+device = "cuda"
 # Cache for LoRA state dicts
 state_dicts = {}
     }
 sdxl_loras_raw = [item for item in sdxl_loras_raw if item.get("new") != True]
 # Download models
 hf_hub_download(
     repo_id="InstantX/InstantID",
 face_adapter = f'/data/checkpoints/ip-adapter.bin'
 controlnet_path = f'/data/checkpoints/ControlNetModel'
+# Load IdentityNet
 st = time.time()
 identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 zoedepthnet = ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
 print('Loading VAE took: ', et - st, 'seconds')
 st = time.time()
 pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
+    "SG161222/RealVisXL_V5.0",
     vae=vae,
     controlnet=[identitynet, zoedepthnet],
     torch_dtype=torch.float16
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
 pipe.load_ip_adapter_instantid(face_adapter)
+pipe.set_ip_adapter_scale(0.9)
 et = time.time()
 print('Loading pipeline took: ', et - st, 'seconds')
 last_fused = False
 lora_archive = "/data"
 def update_selection(selected_state: gr.SelectData, sdxl_loras, face_strength, image_strength, weight, depth_control_scale, negative, is_new=False):
     lora_repo = sdxl_loras[selected_state.index]["repo"]
     new_placeholder = "Type a prompt to use your selected LoRA"
     weight_name = sdxl_loras[selected_state.index]["weights"]
+    updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) ✨ {'(non-commercial LoRA, `cc-by-nc`)' if sdxl_loras[selected_state.index]['is_nc'] else '' }"
     for lora_list in lora_defaults:
         if lora_list["model"] == sdxl_loras[selected_state.index]["repo"]:
+            face_strength = lora_list.get("face_strength", 0.9)
+            image_strength = lora_list.get("image_strength", 0.2)
+            weight = lora_list.get("weight", 0.95)
             depth_control_scale = lora_list.get("depth_control_scale", 0.8)
             negative = lora_list.get("negative", "")
 def run_lora(face_image, prompt, negative, lora_scale, selected_state, face_strength, image_strength,
+             guidance_scale, depth_control_scale, sdxl_loras, custom_lora, progress=gr.Progress(track_tqdm=True)):
     """
+    Working version - matches old code exactly
     """
     print("Custom LoRA:", custom_lora)
     custom_lora_path = custom_lora[0] if custom_lora else None
     st = time.time()
     face_image = resize_image_aspect_ratio(face_image)
+    # Simple working face detection
+    face_detected = True
+    try:
+        face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
+        face_info = sorted(face_info, key=lambda x: (x['bbox'][2]-x['bbox'][0]) * (x['bbox'][3]-x['bbox'][1]))[-1]
+        face_emb = face_info['embedding']
+        face_kps = draw_kps(face_image, face_info['kps'])
+    except:
+        face_detected = False
         face_emb = None
         face_kps = face_image
     et = time.time()
     print('Face processing took:', et - st, 'seconds')
     st = time.time()
+    # Prompt processing
     if custom_lora_path and custom_lora[1]:
         prompt = f"{prompt} {custom_lora[1]}"
+    else:
+        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
+            for lora_list in lora_defaults:
+                if lora_list["model"] == sdxl_loras[selected_state_index]["repo"]:
+                    prompt_full = lora_list.get("prompt", None)
+                    if prompt_full:
+                        prompt = prompt_full.replace("<subject>", prompt)
+    print("Prompt:", prompt)
     if prompt == "":
+        prompt = "a person"
+    print(f"Executing prompt: {prompt}")
     if negative == "":
+        negative = None
     print("Custom Loaded LoRA:", custom_lora_path)
     elif custom_lora_path:
         repo_name = custom_lora_path
         full_path_lora = custom_lora_path
     else:
+        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
+            repo_name = sdxl_loras[selected_state_index]["repo"]
+            full_path_lora = state_dicts[repo_name]["saved_name"]
+        else:
+            raise gr.Error("Invalid selection")
     repo_name = repo_name.rstrip("/").lower()
     et = time.time()
     print('Prompt processing took:', et - st, 'seconds')
     st = time.time()
     image = generate_image(
         prompt, negative, face_emb, face_image, face_kps, image_strength,
         guidance_scale, face_strength, depth_control_scale, repo_name,
 run_lora.zerogpu = True
+@spaces.GPU(duration=75)
 def generate_image(prompt, negative, face_emb, face_image, face_kps, image_strength, guidance_scale,
                    face_strength, depth_control_scale, repo_name, loaded_state_dict, lora_scale,
                    sdxl_loras, selected_state_index, face_detected, st):
     print("Loaded state dict:", loaded_state_dict)
     print("Last LoRA:", last_lora, "| Current LoRA:", repo_name)
+    # Control images setup
+    control_images = [face_kps, zoe(face_image)] if face_detected else [zoe(face_image)]
+    control_scales = [face_strength, depth_control_scale] if face_detected else [depth_control_scale]
     # Handle custom LoRA from HuggingFace
     if repo_name.startswith("https://huggingface.co"):
     else:
         full_path_lora = loaded_state_dict
+    # LoRA loading
     if last_lora != repo_name:
         if last_fused:
             pipe.unfuse_lora()
             pipe.unload_lora_weights()
             pipe.unload_textual_inversion()
+        pipe.load_lora_weights(full_path_lora)
+        pipe.fuse_lora(lora_scale)
+        last_fused = True
+        # Handle pivotal tuning if needed
+        if selected_state_index >= 0 and selected_state_index < len(sdxl_loras):
+            is_pivotal = sdxl_loras[selected_state_index]["is_pivotal"]
+            if is_pivotal:
+                text_embedding_name = sdxl_loras[selected_state_index]["text_embedding_weights"]
+                embedding_path = hf_hub_download(repo_id=repo_name, filename=text_embedding_name, repo_type="model")
+                state_dict_embedding = load_file(embedding_path)
+                pipe.load_textual_inversion(
+                    state_dict_embedding["clip_l" if "clip_l" in state_dict_embedding else "text_encoders_0"],
+                    token=["<s0>", "<s1>"],
+                    text_encoder=pipe.text_encoder,
+                    tokenizer=pipe.tokenizer
+                )
+                pipe.load_textual_inversion(
+                    state_dict_embedding["clip_g" if "clip_g" in state_dict_embedding else "text_encoders_1"],
+                    token=["<s0>", "<s1>"],
+                    text_encoder=pipe.text_encoder_2,
+                    tokenizer=pipe.tokenizer_2
+                )
     print("Processing prompt...")
     conditioning, pooled = compel(prompt)
     negative_conditioning, negative_pooled = compel(negative) if negative else (None, None)
     print("Generating image...")
     image = pipe(
         prompt_embeds=conditioning,
         height=face_image.height,
         image_embeds=face_emb if face_detected else None,
         image=face_image,
+        strength=1-image_strength,
         control_image=control_images,
+        num_inference_steps=36,
         guidance_scale=guidance_scale,
         controlnet_conditioning_scale=control_scales,
     ).images[0]
         gr.Warning("We couldn't find a SDXL LoRA on the model you've sent")
         raise Exception("We couldn't find a SDXL LoRA on the model you've sent")
     return model_data["name"], f"{lora_archive}/{safetensors_name}", trigger_word, image_url
 def check_custom_model(link):
     if(link.startswith("https://")):
         if(link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co")):
     gr_sdxl_loras = gr.State(value=sdxl_loras_raw)
     title = gr.HTML(
         """<h1><img src="https://i.imgur.com/DVoGw04.png">
+<span>LucasArts Style<br><small style="
     font-size: 13px;
     display: block;
     font-weight: normal;
     opacity: 0.75;
+">🧨 diffusers InstantID + ControlNet</small></span></h1>""",
         elem_id="title",
     )
     selected_state = gr.State()
     with gr.Row(elem_id="main_app"):
         with gr.Column(scale=4, elem_id="box_column"):
             with gr.Group(elem_id="gallery_box"):
+                photo = gr.Image(label="Upload a picture", interactive=True, type="pil", height=300)
                 selected_loras = gr.Gallery(label="Selected LoRAs", height=80, show_share_button=False, visible=False, elem_id="gallery_selected")
                 gallery = gr.Gallery(
                     label="LucasArts Style",
         with gr.Column(scale=5):
             with gr.Row():
                 prompt = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1,
+                                   info="Describe your subject", value="a person", elem_id="prompt")
                 button = gr.Button("Run", elem_id="run_button")
             result = ImageSlider(
                 share_button = gr.Button("Share to community", elem_id="share-btn")
             with gr.Accordion("Advanced options", open=False):
                 negative = gr.Textbox(label="Negative Prompt")
+                weight = gr.Slider(0, 10, value=0.95, step=0.1, label="LoRA weight")
                 face_strength = gr.Slider(
+                    0, 2, value=0.9, step=0.01, label="Face strength",
+                    info="Higher values increase face likeness"
                 )
                 image_strength = gr.Slider(
+                    0, 1, value=0.20, step=0.01, label="Image strength",
+                    info="Higher values preserve more of the original structure"
                 )
                 guidance_scale = gr.Slider(
+                    0, 50, value=8, step=0.1, label="Guidance Scale"
                 )
                 depth_control_scale = gr.Slider(
+                    0, 1, value=0.8, step=0.01, label="Zoe Depth ControlNet strength"
                 )
             prompt_title = gr.Markdown(
+                value="### Click on a LoRA in the gallery to select it",
                 visible=True,
                 elem_id="selected_lora",
             )
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
+               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
         outputs=[result, share_group],
     )
     ).success(
         fn=run_lora,
         inputs=[photo, prompt, negative, weight, selected_state, face_strength, image_strength,
+               guidance_scale, depth_control_scale, gr_sdxl_loras, custom_loaded_lora],
         outputs=[result, share_group],
     )