Spaces:

MySafeCode
/

depthmap

Paused

App Files Files Community

MySafeCode commited on 10 days ago

Commit

55b8985

verified ·

1 Parent(s): a360262

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +7 -3
processor.py +54 -27
requirements.txt +10 -9

Dockerfile CHANGED Viewed

@@ -11,12 +11,16 @@ RUN apt-get update && apt-get install -y \
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
-# Create temp directory
 RUN mkdir -p /tmp/video-bg-remover
-# For Hugging Face Spaces
-ENV PORT=7860
 CMD uvicorn app:app --host 0.0.0.0 --port $PORT

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download models during build
+RUN python -c "import torch; torch.hub.load('intel-isl/MiDaS', 'MiDaS_small', trust_repo=True)"
 COPY . .
 RUN mkdir -p /tmp/video-bg-remover
+# Create a non-root user
+RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app /tmp/video-bg-remover
+USER appuser
+ENV PORT=7860
 CMD uvicorn app:app --host 0.0.0.0 --port $PORT

processor.py CHANGED Viewed

@@ -7,54 +7,65 @@ from pathlib import Path
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import gc
 class VideoProcessor:
     def __init__(self):
-        # Use CPU if no GPU
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
-        # Load MiDaS (small model for speed)
-        self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
-        self.model.to(self.device)
-        self.model.eval()
-        # Load transforms
-        midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
-        self.transform = midas_transforms.small_transform
         self.executor = ThreadPoolExecutor(max_workers=1)
     def hex_to_rgb(self, hex_color: str):
-        """Convert hex to RGB"""
         hex_color = hex_color.lstrip('#')
         return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
     async def process_video(self, input_path: str, threshold: float,
                            bg_color: str, session_id: str) -> str:
-        """Process video asynchronously"""
         loop = asyncio.get_event_loop()
         output_path = str(Path("/tmp") / f"{session_id}_output.mp4")
-        # Run in thread pool
         await loop.run_in_executor(
             self.executor,
             self._process_video_sync,
             input_path, output_path, threshold, bg_color
         )
         return output_path
     def _process_video_sync(self, input_path: str, output_path: str,
                             threshold: float, bg_color: str):
-        """Synchronous video processing"""
         cap = cv2.VideoCapture(input_path)
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        # Output video
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
@@ -66,9 +77,13 @@ class VideoProcessor:
             if not ret:
                 break
-            # Process frame
-            processed = self.process_frame(frame, threshold, bg_rgb)
-            out.write(processed)
             frame_count += 1
             if frame_count % 30 == 0:
@@ -82,20 +97,28 @@ class VideoProcessor:
         cap.release()
         out.release()
     def process_frame(self, frame: np.ndarray, threshold: float,
                       bg_color: tuple) -> np.ndarray:
-        """Process a single frame"""
-        # Resize for speed
         h, w = frame.shape[:2]
-        new_h, new_w = 256, int(256 * w / h)
         frame_small = cv2.resize(frame, (new_w, new_h))
         frame_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
         # Get depth map
         img = Image.fromarray(frame_rgb)
-        input_batch = self.transform(img).to(self.device)
         with torch.no_grad():
             depth = self.model(input_batch)
@@ -109,13 +132,17 @@ class VideoProcessor:
         # Normalize depth
         depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
-        # Create mask and resize to original
         mask = (depth_norm > threshold).astype(np.uint8) * 255
         mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
-        mask = mask.astype(bool)
-        # Apply background
-        result = frame.copy()
-        result[~mask] = bg_color
         return result

 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import gc
+import warnings
+warnings.filterwarnings('ignore')
 class VideoProcessor:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # Load MiDaS with proper error handling
+        try:
+            print("Loading MiDaS model...")
+            self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small", trust_repo=True)
+            self.model.to(self.device)
+            self.model.eval()
+            # Load transforms
+            midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True)
+            self.transform = midas_transforms.small_transform
+            print("MiDaS model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading MiDaS: {e}")
+            print("Falling back to DPT model...")
+            # Fallback to DPT model
+            self.model = torch.hub.load("intel-isl/MiDaS", "DPT_Large", trust_repo=True)
+            self.model.to(self.device)
+            self.model.eval()
+            self.transform = midas_transforms.dpt_transform
         self.executor = ThreadPoolExecutor(max_workers=1)
     def hex_to_rgb(self, hex_color: str):
         hex_color = hex_color.lstrip('#')
         return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
     async def process_video(self, input_path: str, threshold: float,
                            bg_color: str, session_id: str) -> str:
         loop = asyncio.get_event_loop()
         output_path = str(Path("/tmp") / f"{session_id}_output.mp4")
         await loop.run_in_executor(
             self.executor,
             self._process_video_sync,
             input_path, output_path, threshold, bg_color
         )
         return output_path
     def _process_video_sync(self, input_path: str, output_path: str,
                             threshold: float, bg_color: str):
         cap = cv2.VideoCapture(input_path)
+        if not cap.isOpened():
+            raise ValueError(f"Could not open video: {input_path}")
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        print(f"Video info: {width}x{height}, {fps}fps, {total_frames} frames")
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
             if not ret:
                 break
+            try:
+                processed = self.process_frame(frame, threshold, bg_rgb)
+                out.write(processed)
+            except Exception as e:
+                print(f"Error processing frame {frame_count}: {e}")
+                # Write original frame on error
+                out.write(frame)
             frame_count += 1
             if frame_count % 30 == 0:
         cap.release()
         out.release()
+        print(f"Video saved to {output_path}")
     def process_frame(self, frame: np.ndarray, threshold: float,
                       bg_color: tuple) -> np.ndarray:
         h, w = frame.shape[:2]
+        # Resize for speed while maintaining aspect ratio
+        max_size = 384
+        if h > max_size or w > max_size:
+            if h > w:
+                new_h, new_w = max_size, int(max_size * w / h)
+            else:
+                new_h, new_w = int(max_size * h / w), max_size
+        else:
+            new_h, new_w = h, w
         frame_small = cv2.resize(frame, (new_w, new_h))
         frame_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
         # Get depth map
         img = Image.fromarray(frame_rgb)
+        input_batch = self.transform(img).unsqueeze(0).to(self.device)
         with torch.no_grad():
             depth = self.model(input_batch)
         # Normalize depth
         depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
+        # Create mask
         mask = (depth_norm > threshold).astype(np.uint8) * 255
         mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
+        # Smooth mask edges
+        mask = cv2.GaussianBlur(mask, (5, 5), 0)
+        mask_float = mask.astype(np.float32) / 255.0
+        mask_3channel = np.stack([mask_float] * 3, axis=2)
+        # Apply background with soft edges
+        bg_array = np.array(bg_color, dtype=np.float32).reshape(1, 1, 3)
+        result = (frame * mask_3channel + bg_array * (1 - mask_3channel)).astype(np.uint8)
         return result

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-fastapi==0.104.1
-uvicorn==0.24.0
-torch==2.1.0
-torchvision==0.16.0
-opencv-python-headless==4.8.1.78
-numpy==1.24.3
-Pillow==10.1.0
-python-multipart==0.0.6
-timm==0.9.2

+fastapi==0.104.1
+uvicorn==0.24.0
+torch==2.1.0
+torchvision==0.16.0
+opencv-python-headless==4.8.1.78
+numpy==1.24.3
+Pillow==10.1.0
+python-multipart==0.0.6
+timm==0.9.2
+transformers==4.35.0