Spaces:

MySafeCode
/

depthmap

Sleeping

App Files Files Community

MySafeCode commited on 4 days ago

Commit

e0a1fd2

verified ·

1 Parent(s): 3325de6

Upload 6 files

Browse files

Files changed (6) hide show

.gitignore +50 -0
Dockerfile +22 -0
README.md +38 -10
app.py +86 -0
processor.py +121 -0
requirements.txt +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,50 @@

+__pycache__/
+*.pyc
+.DS_Store
+.vscode/
+*.mp4
+*.avi
+*.mov
+temp/
+/tmp/
+*.log
+.env
+venv/
+env/
+.venv/
+*.sock
+*.pid
+*.seed
+*.pid.lock
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+*.manifest
+*.spec
+pip-log.txt
+pip-delete-this-directory.txt
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.pytest_cache/

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Create temp directory
+RUN mkdir -p /tmp/video-bg-remover
+# For Hugging Face Spaces
+ENV PORT=7860
+CMD uvicorn app:app --host 0.0.0.0 --port $PORT

README.md CHANGED Viewed

@@ -1,10 +1,38 @@
----
-title: Depthmap
-emoji: 👀
-colorFrom: purple
-colorTo: red
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Depth Video Background Remover
+emoji: 🎥
+colorFrom: purple
+colorTo: blue
+sdk: docker
+pinned: false
+---
+# Depth Video Background Remover
+Remove backgrounds from videos using AI depth estimation - no green screen needed!
+## How it works
+1. Upload a video
+2. Adjust depth threshold (lower = more background removed)
+3. Pick a background color
+4. Click process and download!
+## Technical details
+- Uses MiDaS (small) for depth estimation
+- Runs on CPU/GPU (T4 on Spaces)
+- Processes frame-by-frame with PyTorch
+## Features
+- ✨ AI-powered depth estimation
+- 🎨 Customizable background color
+- 📁 Drag & drop upload
+- ⚡ Fast processing with PyTorch
+- 📥 One-click download
+## Requirements
+- Python 3.8+
+- PyTorch 2.0+
+- CUDA-capable GPU (optional)
+## License
+Apache 2.0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import StreamingResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
+import uvicorn
+import os
+import shutil
+from typing import Optional
+import uuid
+from pathlib import Path
+from processor import VideoProcessor
+# Create FastAPI app
+app = FastAPI()
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Create temp directory
+TEMP_DIR = Path("/tmp") / "video-bg-remover"
+TEMP_DIR.mkdir(exist_ok=True)
+# Initialize processor
+print("Loading MiDaS model...")
+processor = VideoProcessor()
+print("Model loaded!")
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    """Serve the frontend"""
+    with open("static/index.html", "r") as f:
+        return f.read()
+@app.post("/process")
+async def process_video(
+    file: UploadFile = File(...),
+    threshold: float = Form(0.3),
+    bg_color: str = Form("#00FF00")
+):
+    """
+    Process video: remove background using depth estimation
+    """
+    # Validate file
+    if not file.content_type.startswith('video/'):
+        raise HTTPException(400, "File must be a video")
+    # Generate unique ID
+    session_id = str(uuid.uuid4())
+    # Save uploaded file
+    input_path = TEMP_DIR / f"{session_id}_input.mp4"
+    with open(input_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    try:
+        # Process video
+        output_path = await processor.process_video(
+            input_path=str(input_path),
+            threshold=threshold,
+            bg_color=bg_color,
+            session_id=session_id
+        )
+        # Stream result back
+        def iterfile():
+            with open(output_path, "rb") as f:
+                yield from f
+            # Cleanup
+            os.unlink(str(input_path))
+            os.unlink(output_path)
+        return StreamingResponse(
+            iterfile(),
+            media_type="video/mp4",
+            headers={"Content-Disposition": f"attachment; filename=processed.mp4"}
+        )
+    except Exception as e:
+        # Cleanup on error
+        if input_path.exists():
+            input_path.unlink()
+        raise HTTPException(500, str(e))
+# For Hugging Face Spaces
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

processor.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import torch
+import torch.nn.functional as F
+import cv2
+import numpy as np
+from PIL import Image
+from pathlib import Path
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import gc
+class VideoProcessor:
+    def __init__(self):
+        # Use CPU if no GPU
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {self.device}")
+        # Load MiDaS (small model for speed)
+        self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
+        self.model.to(self.device)
+        self.model.eval()
+        # Load transforms
+        midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
+        self.transform = midas_transforms.small_transform
+        self.executor = ThreadPoolExecutor(max_workers=1)
+    def hex_to_rgb(self, hex_color: str):
+        """Convert hex to RGB"""
+        hex_color = hex_color.lstrip('#')
+        return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+    async def process_video(self, input_path: str, threshold: float,
+                           bg_color: str, session_id: str) -> str:
+        """Process video asynchronously"""
+        loop = asyncio.get_event_loop()
+        output_path = str(Path("/tmp") / f"{session_id}_output.mp4")
+        # Run in thread pool
+        await loop.run_in_executor(
+            self.executor,
+            self._process_video_sync,
+            input_path, output_path, threshold, bg_color
+        )
+        return output_path
+    def _process_video_sync(self, input_path: str, output_path: str,
+                            threshold: float, bg_color: str):
+        """Synchronous video processing"""
+        cap = cv2.VideoCapture(input_path)
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Output video
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        bg_rgb = self.hex_to_rgb(bg_color)
+        frame_count = 0
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            # Process frame
+            processed = self.process_frame(frame, threshold, bg_rgb)
+            out.write(processed)
+            frame_count += 1
+            if frame_count % 30 == 0:
+                print(f"Progress: {frame_count}/{total_frames}")
+            # Clear cache occasionally
+            if frame_count % 100 == 0:
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+        cap.release()
+        out.release()
+    def process_frame(self, frame: np.ndarray, threshold: float,
+                      bg_color: tuple) -> np.ndarray:
+        """Process a single frame"""
+        # Resize for speed
+        h, w = frame.shape[:2]
+        new_h, new_w = 256, int(256 * w / h)
+        frame_small = cv2.resize(frame, (new_w, new_h))
+        frame_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
+        # Get depth map
+        img = Image.fromarray(frame_rgb)
+        input_batch = self.transform(img).to(self.device)
+        with torch.no_grad():
+            depth = self.model(input_batch)
+            depth = F.interpolate(
+                depth.unsqueeze(1),
+                size=(new_h, new_w),
+                mode="bicubic",
+                align_corners=False
+            ).squeeze().cpu().numpy()
+        # Normalize depth
+        depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
+        # Create mask and resize to original
+        mask = (depth_norm > threshold).astype(np.uint8) * 255
+        mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
+        mask = mask.astype(bool)
+        # Apply background
+        result = frame.copy()
+        result[~mask] = bg_color
+        return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+torch==2.1.0
+torchvision==0.16.0
+opencv-python-headless==4.8.1.78
+numpy==1.24.3
+Pillow==10.1.0
+python-multipart==0.0.6