Spaces:

huntrezz
/

RealtimeMonocularDepth

Running

App Files Files Community

huntrezz commited on Dec 1, 2024

Commit

0143794

verified ·

1 Parent(s): f8b3886

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -50

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import cv2
 import torch
-from transformers import DPTForDepthEstimation, DPTImageProcessor
 import numpy as np
 import time
 import warnings
 warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -25,72 +29,83 @@ def manual_normalize(depth_map):
         return np.zeros_like(depth_map, dtype=np.uint8)
 frame_skip = 4
-frame_count = 0
 color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
-prev_frame_time = 0
-while True:
-    ret, frame = cap.read()
-    if not ret:
-        break
-    frame_count += 1
-    if frame_count % frame_skip != 0:
-        continue
-    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    resized_frame = resize_image(rgb_frame)
-    inputs = processor(images=resized_frame, return_tensors="pt").to(device)
-    inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model(**inputs)
-        predicted_depth = outputs.predicted_depth
-    depth_map = predicted_depth.squeeze().cpu().numpy()
-    # Check Input Data
-    print(f"depth_map shape: {depth_map.shape}")
-    print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
-    print(f"depth_map dtype: {depth_map.dtype}")
-    # Handle invalid values
-    depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
-    # Ensure depth_map is in float32 format
-    depth_map = depth_map.astype(np.float32)
-    # Check for zero-sized arrays
-    if depth_map.size == 0:
-        print("Error: depth_map is empty")
-        depth_map = np.zeros((256, 256), dtype=np.uint8)
-    else:
-        # Handle empty or constant arrays
-        if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
-            depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
         else:
-            depth_map = np.zeros_like(depth_map, dtype=np.uint8)
-    # Use manual normalization as a fallback
-    if np.all(depth_map == 0):
-        depth_map = manual_normalize(depth_map)
-    depth_map_colored = cv2.applyColorMap(depth_map, color_map)
-    depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
-    combined = np.hstack((frame, depth_map_colored))
-    new_frame_time = time.time()
-    fps = 1 / (new_frame_time - prev_frame_time)
-    prev_frame_time = new_frame_time
-    cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-    cv2.imshow('Webcam and Depth Map', combined)
-    if cv2.waitKey(1) & 0xFF == ord('q'):
-        break
-cap.release()
-cv2.destroyAllWindows()

 import cv2
 import torch
 import numpy as np
+from transformers import DPTForDepthEstimation, DPTImageProcessor
 import time
 import warnings
+import asyncio
+import json
+import websockets
 warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         return np.zeros_like(depth_map, dtype=np.uint8)
 frame_skip = 4
 color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
+connected = set()
+async def broadcast(message):
+    for websocket in connected:
+        try:
+            await websocket.send(message)
+        except websockets.exceptions.ConnectionClosed:
+            connected.remove(websocket)
+async def handler(websocket, path):
+    connected.add(websocket)
+    try:
+        await websocket.wait_closed()
+    finally:
+        connected.remove(websocket)
+async def process_frames():
+    frame_count = 0
+    prev_frame_time = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_count += 1
+        if frame_count % frame_skip != 0:
+            continue
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        resized_frame = resize_image(rgb_frame)
+        inputs = processor(images=resized_frame, return_tensors="pt").to(device)
+        inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+            predicted_depth = outputs.predicted_depth
+        depth_map = predicted_depth.squeeze().cpu().numpy()
+        depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
+        depth_map = depth_map.astype(np.float32)
+        if depth_map.size == 0:
+            depth_map = np.zeros((256, 256), dtype=np.uint8)
         else:
+            if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
+                depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
+            else:
+                depth_map = np.zeros_like(depth_map, dtype=np.uint8)
+        if np.all(depth_map == 0):
+            depth_map = manual_normalize(depth_map)
+        data = {
+            'depthMap': depth_map.tolist(),
+            'rgbFrame': rgb_frame.tolist()
+        }
+        await broadcast(json.dumps(data))
+        new_frame_time = time.time()
+        fps = 1 / (new_frame_time - prev_frame_time)
+        prev_frame_time = new_frame_time
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
+async def main():
+    server = await websockets.serve(handler, "localhost", 8765)
+    await asyncio.gather(server.wait_closed(), process_frames())
+if __name__ == "__main__":
+    asyncio.run(main())