|
|
""" |
|
|
Custom handler for Hugging Face Inference Endpoints. |
|
|
Serves the Depth Anything V3 Metric Large model for depth estimation. |
|
|
""" |
|
|
|
|
|
import base64 |
|
|
import io |
|
|
from typing import Any |
|
|
|
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, path: str = ""): |
|
|
""" |
|
|
Initialize the depth estimation model. |
|
|
|
|
|
Args: |
|
|
path: Path to the model directory (provided by HF Inference Endpoints) |
|
|
""" |
|
|
from depth_anything_3.api import DepthAnything3 |
|
|
|
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
self.model = DepthAnything3.from_pretrained("depth-anything/da3metric-large") |
|
|
self.model = self.model.to(device=self.device) |
|
|
|
|
|
def __call__(self, data: dict[str, Any]) -> dict[str, Any]: |
|
|
""" |
|
|
Process incoming requests for depth estimation. |
|
|
|
|
|
Args: |
|
|
data: Request payload with 'inputs' containing base64 image(s) |
|
|
|
|
|
Returns: |
|
|
Dictionary with depth map, confidence, intrinsics, extrinsics |
|
|
""" |
|
|
inputs = data.get("inputs") |
|
|
|
|
|
|
|
|
if isinstance(inputs, str): |
|
|
|
|
|
image_data = base64.b64decode(inputs) |
|
|
image = Image.open(io.BytesIO(image_data)).convert("RGB") |
|
|
images = [image] |
|
|
elif isinstance(inputs, dict) and "image" in inputs: |
|
|
|
|
|
image_data = base64.b64decode(inputs["image"]) |
|
|
image = Image.open(io.BytesIO(image_data)).convert("RGB") |
|
|
images = [image] |
|
|
elif isinstance(inputs, list): |
|
|
|
|
|
images = [] |
|
|
for img_b64 in inputs: |
|
|
image_data = base64.b64decode(img_b64) |
|
|
image = Image.open(io.BytesIO(image_data)).convert("RGB") |
|
|
images.append(image) |
|
|
else: |
|
|
return {"error": "Invalid input format. Expected base64 encoded image(s)."} |
|
|
|
|
|
|
|
|
with torch.inference_mode(): |
|
|
prediction = self.model.inference(images) |
|
|
|
|
|
|
|
|
depth = prediction.depth.cpu().numpy() |
|
|
conf = prediction.conf.cpu().numpy() |
|
|
intrinsics = prediction.intrinsics.cpu().numpy() |
|
|
extrinsics = prediction.extrinsics.cpu().numpy() |
|
|
|
|
|
|
|
|
response = { |
|
|
"depth": self._encode_array(depth), |
|
|
"confidence": self._encode_array(conf), |
|
|
"intrinsics": self._encode_array(intrinsics), |
|
|
"extrinsics": self._encode_array(extrinsics), |
|
|
"shape": { |
|
|
"depth": list(depth.shape), |
|
|
"confidence": list(conf.shape), |
|
|
"intrinsics": list(intrinsics.shape), |
|
|
"extrinsics": list(extrinsics.shape), |
|
|
}, |
|
|
"depth_range": { |
|
|
"min": float(depth.min()), |
|
|
"max": float(depth.max()), |
|
|
}, |
|
|
} |
|
|
|
|
|
return response |
|
|
|
|
|
def _encode_array(self, arr: np.ndarray) -> str: |
|
|
"""Encode numpy array as base64 string.""" |
|
|
buffer = io.BytesIO() |
|
|
np.save(buffer, arr.astype(np.float32)) |
|
|
return base64.b64encode(buffer.getvalue()).decode("utf-8") |
|
|
|