Spaces:

juanmackie
/

2DTo3DSpatialPhotoConverter

Running

App Files Files Community

juanmackie commited on Jun 15

Commit

d7da4f2

verified ·

1 Parent(s): ce884cf

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -73

app.py CHANGED Viewed

@@ -1,79 +1,49 @@
 import gradio as gr
 import torch
-from transformers import DPTForDepthEstimation, DPTImageProcessor
 import numpy as np
 import cv2
 from PIL import Image
-# Load pre-trained depth estimation model and processor
-processor = DPTImageProcessor.from_pretrained("facebook/dpt-dinov2-small-nyu")
-model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-nyu")
-def process_image(image):
-    """Convert a 2D photo to a stereoscopic 3D image pair using depth estimation and DIBR."""
-    # Convert PIL image to numpy array
-    image_np = np.array(image)
-    height, width = image_np.shape[:2]
-    # Step 1: Estimate the depth map
-    inputs = processor(images=image, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    depth_map = outputs.predicted_depth.squeeze().cpu().numpy()
-    # Normalize the depth map to [0,1]
-    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    # Smooth the depth map to reduce noise
-    depth_map = cv2.GaussianBlur(depth_map, (5,5), 0)
-    # Step 2: Calculate the disparity map
-    max_disparity = int(0.05 * width)  # 5% of image width for dynamic scaling
-    disparity_map = max_disparity * (1 - depth_map)  # Closer objects have larger disparity
-    # Step 3: Initialize left and right images and masks for DIBR
-    left_image = np.zeros_like(image_np)
-    right_image = np.zeros_like(image_np)
-    left_mask = np.ones((height, width), dtype=bool)
-    right_mask = np.ones((height, width), dtype=bool)
-    # Step 4: Perform pixel shifting based on disparity (forward warping)
-    for y in range(height):
-        for x in range(width):
-            disparity = int(disparity_map[y, x])
-            new_x_left = x + disparity
-            new_x_right = x - disparity
-            if 0 <= new_x_left < width:
-                left_image[y, new_x_left] = image_np[y, x]
-                left_mask[y, new_x_left] = False
-            if 0 <= new_x_right < width:
-                right_image[y, new_x_right] = image_np[y, x]
-                right_mask[y, new_x_right] = False
-    # Convert masks to uint8 for inpainting
-    left_mask_uint8 = left_mask.astype(np.uint8) * 255
-    right_mask_uint8 = right_mask.astype(np.uint8) * 255
-    # Step 5: Apply inpainting to fill holes
-    inpaint_radius = 5
-    left_image_inpaint = cv2.inpaint(left_image, left_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
-    right_image_inpaint = cv2.inpaint(right_image, right_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
-    # Step 6: Combine into a side-by-side stereoscopic image
-    stereo_image = np.hstack((left_image_inpaint, right_image_inpaint))
-    # Convert back to PIL image for output
-    stereo_image_pil = Image.fromarray(stereo_image)
-    return stereo_image_pil
-# Define Gradio interface for end-to-end pipeline
-interface = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil", label="Upload a 2D Photo"),
-    outputs=gr.Image(type="pil", label="Stereoscopic 3D Output (Side-by-Side)"),
-    title="2D to Stereoscopic 3D Converter",
-    description="Upload a 2D photo to generate a stereoscopic 3D image pair for viewing on a Quest headset. The output is a side-by-side image: left half for the left eye, right half for the right eye. Download and view it on your Quest using a compatible photo viewer."
-)
-# Launch the application
-interface.launch()

 import gradio as gr
 import torch
 import numpy as np
 import cv2
 from PIL import Image
+# Import the DepthAnythingV2 model from its specific module.
+# IMPORTANT: This assumes you have the Depth-Anything-V2 repository cloned
+# and its 'depth_anything_v2' module accessible in your Python path.
+# Please follow the setup instructions provided after this code block.
+from depth_anything_v2.dpt import DepthAnythingV2
+# Determine the device for model inference (CUDA if available, otherwise MPS/CPU)
+DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+# Model configuration for Depth Anything V2 (using 'vitl' as in their app.py example)
+model_configs = {
+    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
+    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
+}
+encoder = 'vitl' # You can change this to 'vits', 'vitb', or 'vitg' if you have the respective checkpoints
+# Load the pre-trained Depth Anything V2 model
+# Ensure the checkpoint file (e.g., 'depth_anything_v2_vitl.pth') is in a 'checkpoints' directory
+# relative to where you run this script, or provide the full path.
+try:
+    model = DepthAnythingV2(**model_configs[encoder])
+    state_dict = torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location="cpu")
+    model.load_state_dict(state_dict)
+    model = model.to(DEVICE).eval()
+    print(f"Depth Anything V2 ({encoder}) model loaded successfully on {DEVICE}.")
+except FileNotFoundError:
+    print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
+    print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
+    print("and placed them in a 'checkpoints' folder. Refer to the setup instructions.")
+    # Exit or handle gracefully if the model cannot be loaded
+    # For now, setting model to None to prevent runtime errors if not loaded
+    model = None
+except Exception as e:
+    print(f"An error occurred while loading the Depth Anything V2 model: {e}")
+    model = None
+def process_image(image, max_disparity_ratio, inpaint_radius):
+    """
+    Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
+    for depth estimation and DIBR, with adjustable paramete