Spaces:

danube2024
/

text-to-image-depth-map

Running

App Files Files Community

danube2024 commited on Feb 2

Commit

021cffb

verified ·

1 Parent(s): 2101ee0

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -44

app.py CHANGED Viewed

@@ -1,33 +1,58 @@
 import gradio as gr
 import torch
 import numpy as np
-from diffusers import StableDiffusionPipeline
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 from PIL import Image, ImageEnhance, ImageOps
-device = "cpu"
-torch_dtype = torch.float32
-# 1) Load a custom bas-relief model
-text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
-    "KappaNeuro/bas-relief",
     torch_dtype=torch_dtype
-).to(device)
-# 2) Load depth model
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
-def enhance_depth(depth_arr):
-    # Min-max normalize => 0..255
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
-    # Convert to PIL for further post-processing
     depth_pil = Image.fromarray(depth_stretched)
-    # Try auto-contrast or equalize
     depth_pil = ImageOps.autocontrast(depth_pil)
     # Sharpen
@@ -36,45 +61,82 @@ def enhance_depth(depth_arr):
     return depth_pil
-def generate_bas_relief_and_depth(prompt):
-    # A) Generate moderate-sized bas-relief
-    bas_relief = text_to_image_pipeline(
-        prompt,
-        height=512,
-        width=512,
-        num_inference_steps=25,
-        guidance_scale=7.5
-    ).images[0]
-    # B) Upscale for depth model (try 768x768 or 1024x1024)
-    big_image = bas_relief.resize((768, 768), Image.LANCZOS)
-    # C) Predict depth on the upscaled image
-    inputs = feature_extractor(big_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
-        depth = outputs.predicted_depth
-    # D) Resize the depth to match the upscaled size
-    depth_resized = torch.nn.functional.interpolate(
-        depth.unsqueeze(1),
-        size=(768, 768),
         mode="bicubic",
-        align_corners=False
-    ).squeeze()
-    # E) Enhance the depth map
-    depth_arr = depth_resized.cpu().numpy()
-    depth_pil = enhance_depth(depth_arr)
-    return bas_relief, depth_pil
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
-    inputs="text",
-    outputs=[gr.Image(label="Bas-Relief"), gr.Image(label="Depth Map")],
-    title="Custom Bas-Relief & Detailed Depth Map",
-    description="Generates bas-relief from a custom Hugging Face model with an upscaled depth map."
 )
-iface.launch()

 import gradio as gr
 import torch
 import numpy as np
+from diffusers import StableDiffusionXLPipeline
 from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 from PIL import Image, ImageEnhance, ImageOps
+############################################
+# 1. Setup and Model Loading
+############################################
+device = "cpu"  # or "cuda" if GPU is available
+torch_dtype = torch.float32  # if using CPU or float16 for GPU
+# --- Load Base SDXL Model ---
+# (Large model, be sure you have enough memory or use fewer steps)
+print("Loading SDXL Base model...")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
     torch_dtype=torch_dtype
+)
+pipe.to(device)
+# --- Load LoRA Weights from KappaNeuro/bas-relief ---
+#   The safetensors file is named "BAS-RELIEF.safetensors"
+#   This merges the LoRA into the pipeline so you can use it via the "BAS-RELIEF" token
+print("Loading bas-relief LoRA weights...")
+pipe.load_lora_weights(
+    repo_id_or_path="KappaNeuro/bas-relief",
+    weight_name="BAS-RELIEF.safetensors"
+)
+# --- Load Depth Estimation Model ---
+#   We'll use Intel's DPT for depth. On CPU, it's also relatively large, so be cautious of performance.
+print("Loading DPT Depth model...")
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
+############################################
+# 2. Depth Map Enhancement (PIL-based)
+############################################
+def enhance_depth_map(depth_arr: np.ndarray) -> Image.Image:
+    """
+    - Normalize depth to [0, 255]
+    - Auto-contrast to emphasize details
+    - Sharpen edges
+    """
     d_min, d_max = depth_arr.min(), depth_arr.max()
     depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
     depth_stretched = (depth_stretched * 255).astype(np.uint8)
     depth_pil = Image.fromarray(depth_stretched)
+    # Auto-contrast
     depth_pil = ImageOps.autocontrast(depth_pil)
     # Sharpen
     return depth_pil
+############################################
+# 3. Generation + Depth Inference Function
+############################################
+def generate_bas_relief_and_depth(prompt: str):
+    """
+    1) Generate a 'bas-relief' style image using the LoRA from KappaNeuro/bas-relief.
+       - Must include "BAS-RELIEF" token in the prompt for the style to apply.
+    2) Compute a depth map using Intel/DPT-Large.
+    3) Return (image, depth_map).
+    """
+    # -- Step A: Merge the user's prompt with "BAS-RELIEF" instance token --
+    #    You can experiment with different prompt styles:
+    #    e.g. "BAS-RELIEF sculpture of a woman in shibari, marble, octane render..."
+    full_prompt = f"BAS-RELIEF {prompt}"
+    # -- Step B: Generate the image with SDXL + LoRA
+    #    Keep resolution modest to avoid timeouts on CPU
+    print("Generating bas-relief image...")
+    result = pipe(
+        prompt=full_prompt,
+        num_inference_steps=15,    # Lower steps => faster (but lower quality)
+        guidance_scale=7.5,
+        height=512,               # can reduce to e.g. 384 if still too slow
+        width=512
+    )
+    # Extract image from pipeline result
+    generated_image = result.images[0]
+    # -- Step C: Depth Estimation with DPT
+    print("Running depth estimation...")
+    inputs = feature_extractor(generated_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = depth_model(**inputs)
+        predicted_depth = outputs.predicted_depth  # shape: [batch, height, width]
+    # Resize to match original image resolution
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=generated_image.size[::-1],
         mode="bicubic",
+        align_corners=False,
+    ).squeeze(0)
+    depth_arr = prediction.cpu().numpy()
+    depth_map_pil = enhance_depth_map(depth_arr)
+    return generated_image, depth_map_pil
+############################################
+# 4. Gradio Interface
+############################################
+title = "Bas-Relief with SDXL + LoRA + Depth Map"
+description = (
+    "This demo loads SDXL-base on CPU (slow!) and merges LoRA from KappaNeuro/bas-relief. "
+    "Use 'BAS-RELIEF' in your prompt for the style. Then we generate a depth map using DPT."
+    "Lower resolution or fewer steps if you get timeouts."
+)
 iface = gr.Interface(
     fn=generate_bas_relief_and_depth,
+    inputs=gr.Textbox(
+        label="Describe your scene/style",
+        placeholder="sculpture of a woman in shibari, marble, intricate details"
+    ),
+    outputs=[
+        gr.Image(label="Bas-Relief Image"),
+        gr.Image(label="Depth Map"),
+    ],
+    title=title,
+    description=description
 )
+if __name__ == "__main__":
+    iface.launch()