juanmackie commited on
Commit
d7da4f2
·
verified ·
1 Parent(s): ce884cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -73
app.py CHANGED
@@ -1,79 +1,49 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import DPTForDepthEstimation, DPTImageProcessor
4
  import numpy as np
5
  import cv2
6
  from PIL import Image
7
 
8
- # Load pre-trained depth estimation model and processor
9
- processor = DPTImageProcessor.from_pretrained("facebook/dpt-dinov2-small-nyu")
10
- model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-nyu")
11
-
12
- def process_image(image):
13
- """Convert a 2D photo to a stereoscopic 3D image pair using depth estimation and DIBR."""
14
- # Convert PIL image to numpy array
15
- image_np = np.array(image)
16
- height, width = image_np.shape[:2]
17
-
18
- # Step 1: Estimate the depth map
19
- inputs = processor(images=image, return_tensors="pt")
20
- with torch.no_grad():
21
- outputs = model(**inputs)
22
- depth_map = outputs.predicted_depth.squeeze().cpu().numpy()
23
-
24
- # Normalize the depth map to [0,1]
25
- depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
26
-
27
- # Smooth the depth map to reduce noise
28
- depth_map = cv2.GaussianBlur(depth_map, (5,5), 0)
29
-
30
- # Step 2: Calculate the disparity map
31
- max_disparity = int(0.05 * width) # 5% of image width for dynamic scaling
32
- disparity_map = max_disparity * (1 - depth_map) # Closer objects have larger disparity
33
-
34
- # Step 3: Initialize left and right images and masks for DIBR
35
- left_image = np.zeros_like(image_np)
36
- right_image = np.zeros_like(image_np)
37
- left_mask = np.ones((height, width), dtype=bool)
38
- right_mask = np.ones((height, width), dtype=bool)
39
-
40
- # Step 4: Perform pixel shifting based on disparity (forward warping)
41
- for y in range(height):
42
- for x in range(width):
43
- disparity = int(disparity_map[y, x])
44
- new_x_left = x + disparity
45
- new_x_right = x - disparity
46
- if 0 <= new_x_left < width:
47
- left_image[y, new_x_left] = image_np[y, x]
48
- left_mask[y, new_x_left] = False
49
- if 0 <= new_x_right < width:
50
- right_image[y, new_x_right] = image_np[y, x]
51
- right_mask[y, new_x_right] = False
52
-
53
- # Convert masks to uint8 for inpainting
54
- left_mask_uint8 = left_mask.astype(np.uint8) * 255
55
- right_mask_uint8 = right_mask.astype(np.uint8) * 255
56
-
57
- # Step 5: Apply inpainting to fill holes
58
- inpaint_radius = 5
59
- left_image_inpaint = cv2.inpaint(left_image, left_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
60
- right_image_inpaint = cv2.inpaint(right_image, right_mask_uint8, inpaint_radius, cv2.INPAINT_TELEA)
61
-
62
- # Step 6: Combine into a side-by-side stereoscopic image
63
- stereo_image = np.hstack((left_image_inpaint, right_image_inpaint))
64
-
65
- # Convert back to PIL image for output
66
- stereo_image_pil = Image.fromarray(stereo_image)
67
- return stereo_image_pil
68
-
69
- # Define Gradio interface for end-to-end pipeline
70
- interface = gr.Interface(
71
- fn=process_image,
72
- inputs=gr.Image(type="pil", label="Upload a 2D Photo"),
73
- outputs=gr.Image(type="pil", label="Stereoscopic 3D Output (Side-by-Side)"),
74
- title="2D to Stereoscopic 3D Converter",
75
- description="Upload a 2D photo to generate a stereoscopic 3D image pair for viewing on a Quest headset. The output is a side-by-side image: left half for the left eye, right half for the right eye. Download and view it on your Quest using a compatible photo viewer."
76
- )
77
-
78
- # Launch the application
79
- interface.launch()
 
1
  import gradio as gr
2
  import torch
 
3
  import numpy as np
4
  import cv2
5
  from PIL import Image
6
 
7
+ # Import the DepthAnythingV2 model from its specific module.
8
+ # IMPORTANT: This assumes you have the Depth-Anything-V2 repository cloned
9
+ # and its 'depth_anything_v2' module accessible in your Python path.
10
+ # Please follow the setup instructions provided after this code block.
11
+ from depth_anything_v2.dpt import DepthAnythingV2
12
+
13
+ # Determine the device for model inference (CUDA if available, otherwise MPS/CPU)
14
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
15
+
16
+ # Model configuration for Depth Anything V2 (using 'vitl' as in their app.py example)
17
+ model_configs = {
18
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
19
+ 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
20
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
21
+ 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
22
+ }
23
+ encoder = 'vitl' # You can change this to 'vits', 'vitb', or 'vitg' if you have the respective checkpoints
24
+
25
+ # Load the pre-trained Depth Anything V2 model
26
+ # Ensure the checkpoint file (e.g., 'depth_anything_v2_vitl.pth') is in a 'checkpoints' directory
27
+ # relative to where you run this script, or provide the full path.
28
+ try:
29
+ model = DepthAnythingV2(**model_configs[encoder])
30
+ state_dict = torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location="cpu")
31
+ model.load_state_dict(state_dict)
32
+ model = model.to(DEVICE).eval()
33
+ print(f"Depth Anything V2 ({encoder}) model loaded successfully on {DEVICE}.")
34
+ except FileNotFoundError:
35
+ print(f"Error: Checkpoint file 'checkpoints/depth_anything_v2_{encoder}.pth' not found.")
36
+ print("Please ensure you have downloaded the Depth Anything V2 model checkpoints")
37
+ print("and placed them in a 'checkpoints' folder. Refer to the setup instructions.")
38
+ # Exit or handle gracefully if the model cannot be loaded
39
+ # For now, setting model to None to prevent runtime errors if not loaded
40
+ model = None
41
+ except Exception as e:
42
+ print(f"An error occurred while loading the Depth Anything V2 model: {e}")
43
+ model = None
44
+
45
+
46
+ def process_image(image, max_disparity_ratio, inpaint_radius):
47
+ """
48
+ Convert a 2D photo to a stereoscopic 3D image pair using Depth Anything V2
49
+ for depth estimation and DIBR, with adjustable paramete