Spaces:

danube2024
/

text-to-image-depth-map

Running

App Files Files Community

danube2024 commited on Feb 2

Commit

ddef426

verified ·

1 Parent(s): cb71cae

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -41

app.py CHANGED Viewed

@@ -1,41 +1,31 @@
 import gradio as gr
 import torch
 import numpy as np
 from diffusers import StableDiffusionXLPipeline
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 from PIL import Image, ImageEnhance, ImageOps
-############################################
-# 1. Setup and Model Loading
-############################################
-device = "cpu"  # or "cuda" if GPU is available
-torch_dtype = torch.float32  # if using CPU or float16 for GPU
 print("Loading SDXL Base model...")
 pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     torch_dtype=torch_dtype
-)
-pipe.to(device)
-print("Loading bas-relief LoRA weights...")
-# IMPORTANT: Pass the first argument as a string to the repo or path,
-# and `weight_name` as a kwarg. That matches the actual function signature.
 pipe.load_lora_weights(
-    "KappaNeuro/bas-relief",         # repo / path
-    weight_name="BAS-RELIEF.safetensors"
 )
-print("Loading DPT Depth model...")
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
 def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
-    """
-    Normalize depth to [0, 255], auto-contrast, and sharpen.
-    """
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
@@ -48,52 +38,47 @@ def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
     return depth_pil
-def generate_bas_relief_and_depth(prompt: str):
-    # We prepend "BAS-RELIEF" to ensure the LoRA style is triggered.
     full_prompt = f"BAS-RELIEF {prompt}"
-    print("Generating bas-relief image...")
     result = pipe(
         prompt=full_prompt,
-        num_inference_steps=15,    # Lower for speed on CPU
         guidance_scale=7.5,
-        height=512,
         width=512
     )
-    generated_image = result.images[0]
-    print("Running depth estimation...")
-    inputs = feature_extractor(generated_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
         predicted_depth = outputs.predicted_depth
-    # Resize depth map to match original image
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
-        size=generated_image.size[::-1],
         mode="bicubic",
-        align_corners=False,
-    ).squeeze(0)
-    depth_arr = prediction.cpu().numpy()
-    depth_pil = enhance_depth_map(depth_arr)
-    return generated_image, depth_pil
-title = "Bas-Relief (SDXL + LoRA) + Depth Map"
 description = (
-    "Load SDXL base on CPU, apply 'BAS-RELIEF.safetensors' LoRA from KappaNeuro/bas-relief. "
-    "Then run DPT for depth estimation."
 )
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
     inputs=gr.Textbox(
-        label="Describe your scene/style",
-        placeholder="e.g., 'sculpture of a woman in shibari, marble, intricate details'"
     ),
     outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Depth Map")],
     title=title,

 import gradio as gr
 import torch
 import numpy as np
 from diffusers import StableDiffusionXLPipeline
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 from PIL import Image, ImageEnhance, ImageOps
+device = "cpu"  # or "cuda" if you have a GPU
+torch_dtype = torch.float32
 print("Loading SDXL Base model...")
 pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     torch_dtype=torch_dtype
+).to(device)
+print("Loading bas-relief LoRA weights with PEFT...")
 pipe.load_lora_weights(
+    "KappaNeuro/bas-relief",      # The HF repo with BAS-RELIEF.safetensors
+    weight_name="BAS-RELIEF.safetensors",
+    peft_backend="peft"          # This is crucial
 )
+print("Loading DPT Depth Model...")
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
 def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
     return depth_pil
+def generate_bas_relief_and_depth(prompt):
+    # Use the token "BAS-RELIEF" so the LoRA triggers
     full_prompt = f"BAS-RELIEF {prompt}"
+    print("Generating image with LoRA style...")
     result = pipe(
         prompt=full_prompt,
+        num_inference_steps=15,   # reduce if too slow
         guidance_scale=7.5,
+        height=512,               # reduce if you still get timeouts
         width=512
     )
+    image = result.images[0]
+    print("Running DPT Depth Estimation...")
+    inputs = feature_extractor(image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
         predicted_depth = outputs.predicted_depth
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
+        size=image.size[::-1],
         mode="bicubic",
+        align_corners=False
+    ).squeeze()
+    depth_map_pil = enhance_depth_map(prediction.cpu().numpy())
+    return image, depth_map_pil
+title = "Bas-Relief (SDXL + LoRA) + Depth Map (with PEFT)"
 description = (
+    "Loads stable-diffusion-xl-base-1.0 on CPU, merges LoRA from 'KappaNeuro/bas-relief'. "
+    "Use 'BAS-RELIEF' token in your prompt to trigger the style, then compute a depth map."
 )
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
     inputs=gr.Textbox(
+        label="Description",
+        placeholder="woman in shibari, marble relief, intricately carved"
     ),
     outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Depth Map")],
     title=title,