Spaces:

danube2024
/

text-to-image-depth-map

Running

App Files Files Community

danube2024 commited on Feb 1

Commit

2101ee0

verified ·

1 Parent(s): 06da073

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -63

app.py CHANGED Viewed

@@ -1,35 +1,33 @@
 import gradio as gr
-from transformers import DPTFeatureExtractor, DPTForDepthEstimation
-from diffusers import StableDiffusionPipeline
 import torch
 import numpy as np
 from PIL import Image, ImageEnhance, ImageOps
-import open3d as o3d
-# Force CPU usage (since environment is CPU-only) and 32-bit float
 device = "cpu"
 torch_dtype = torch.float32
-# 1) Initialize the text-to-image pipeline
-#    Keep resolution moderate (512x512) + fewer steps to reduce time.
 text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-2-1-base",
     torch_dtype=torch_dtype
 ).to(device)
-def enhance_depth_map(depth_array):
-    """
-    Simple PIL-based enhancements (no OpenCV needed):
-      - Normalize depth [min, max] -> [0, 255].
-      - Use auto-contrast to boost local details.
-      - Optionally sharpen to highlight edges.
-    """
-    # Normalize depth values to [0, 255]
-    d_min, d_max = depth_array.min(), depth_array.max()
-    depth_stretched = (depth_array - d_min) / (d_max - d_min + 1e-8)
-    depth_stretched = (depth_stretched * 255.0).astype(np.uint8)
     depth_pil = Image.fromarray(depth_stretched)
     depth_pil = ImageOps.autocontrast(depth_pil)
     # Sharpen
@@ -38,61 +36,45 @@ def enhance_depth_map(depth_array):
     return depth_pil
-# 2) Load the Depth Model
-feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
-depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
-def generate_3d_from_text(prompt):
-    # A) Generate a bas-relief-style image at moderate resolution (e.g. 512x512).
-    #    Use fewer inference steps on CPU to avoid timeouts.
-    result = text_to_image_pipeline(
         prompt,
         height=512,
         width=512,
-        num_inference_steps=20,  # Lower steps => faster but less detail
-        guidance_scale=7.5       # You can tune or keep default
-    )
-    generated_image = result.images[0]
-    # B) Upscale the generated image *only for depth estimation*
-    #    This helps the depth model produce more “fine-grained” edges.
-    upscale_width, upscale_height = 768, 768  # or 1024x1024 if you can handle it
-    big_image = generated_image.resize((upscale_width, upscale_height), Image.LANCZOS)
-    # C) Predict Depth on the upscaled image
-    encoding = feature_extractor(big_image, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = depth_model(**encoding)
-        predicted_depth = outputs.predicted_depth
-    # D) Resize the depth map back to the upscaled size (768×768 here)
-    #    This will be the final "detailed" depth map.
-    prediction = torch.nn.functional.interpolate(
-        predicted_depth.unsqueeze(1),
-        size=(upscale_height, upscale_width),
         mode="bicubic",
-        align_corners=False,
     ).squeeze()
-    # E) Convert to NumPy for final enhancement
-    depth_array = prediction.cpu().numpy()
-    depth_pil = enhance_depth_map(depth_array)
-    return generated_image, depth_pil
-# Gradio app
-title = "Text to Bas-Relief & Detailed Depth Map (CPU-Friendly)"
-description = (
-    "Generates a bas-relief-style image at 512x512 and produces an upscaled, "
-    "more detailed depth map. Uses fewer steps to avoid timeouts."
-)
 iface = gr.Interface(
-    fn=generate_3d_from_text,
-    inputs=gr.Textbox(label="Enter text description", placeholder="A futuristic bas-relief sculpture of a lion..."),
-    outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Detailed Depth Map")],
-    title=title,
-    description=description,
 )
 iface.launch()

 import gradio as gr
 import torch
 import numpy as np
+from diffusers import StableDiffusionPipeline
+from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 from PIL import Image, ImageEnhance, ImageOps
 device = "cpu"
 torch_dtype = torch.float32
+# 1) Load a custom bas-relief model
 text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
+    "KappaNeuro/bas-relief",
     torch_dtype=torch_dtype
 ).to(device)
+# 2) Load depth model
+feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
+depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
+def enhance_depth(depth_arr):
+    # Min-max normalize => 0..255
+    d_min, d_max = depth_arr.min(), depth_arr.max()
+    depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
+    depth_stretched = (depth_stretched * 255).astype(np.uint8)
+    # Convert to PIL for further post-processing
     depth_pil = Image.fromarray(depth_stretched)
+    # Try auto-contrast or equalize
     depth_pil = ImageOps.autocontrast(depth_pil)
     # Sharpen
     return depth_pil
+def generate_bas_relief_and_depth(prompt):
+    # A) Generate moderate-sized bas-relief
+    bas_relief = text_to_image_pipeline(
         prompt,
         height=512,
         width=512,
+        num_inference_steps=25,
+        guidance_scale=7.5
+    ).images[0]
+    # B) Upscale for depth model (try 768x768 or 1024x1024)
+    big_image = bas_relief.resize((768, 768), Image.LANCZOS)
+    # C) Predict depth on the upscaled image
+    inputs = feature_extractor(big_image, return_tensors="pt").to(device)
     with torch.no_grad():
+        outputs = depth_model(**inputs)
+        depth = outputs.predicted_depth
+    # D) Resize the depth to match the upscaled size
+    depth_resized = torch.nn.functional.interpolate(
+        depth.unsqueeze(1),
+        size=(768, 768),
         mode="bicubic",
+        align_corners=False
     ).squeeze()
+    # E) Enhance the depth map
+    depth_arr = depth_resized.cpu().numpy()
+    depth_pil = enhance_depth(depth_arr)
+    return bas_relief, depth_pil
 iface = gr.Interface(
+    fn=generate_bas_relief_and_depth,
+    inputs="text",
+    outputs=[gr.Image(label="Bas-Relief"), gr.Image(label="Depth Map")],
+    title="Custom Bas-Relief & Detailed Depth Map",
+    description="Generates bas-relief from a custom Hugging Face model with an upscaled depth map."
 )
 iface.launch()