File size: 4,611 Bytes

808837f

#!/usr/bin/env python3
"""
CLI script: image → metric depth → 3D point cloud using DepthPro.

Usage
-----
python image_to_pointcloud.py photo.jpg output.ply --device cuda:0 --sample-step 2
python image_to_pointcloud.py photo.jpg output.ply --colored --save-depth depth.png
"""

import argparse
import sys
from pathlib import Path

import numpy as np

sys.path.insert(0, str(Path(__file__).parent.parent))

from depthpro_wrapper import (
    DepthProEstimator,
    depth_to_point_cloud,
    rgbd_to_point_cloud,
    normals_from_depth,
    load_image,
    save_point_cloud,
)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Apple DepthPro: image → metric depth → 3D point cloud"
    )
    parser.add_argument("image", type=Path, help="Input RGB image")
    parser.add_argument("output", type=Path, help="Output point cloud (.ply)")
    parser.add_argument("--device", default="cuda:0", help="PyTorch device")
    parser.add_argument("--dtype", choices=["float16", "float32"], default="float16", help="Inference dtype")
    parser.add_argument("--colored", action="store_true", help="Include per-point RGB colours")
    parser.add_argument("--normals", action="store_true", help="Include per-point normals")
    parser.add_argument("--sample-step", type=int, default=1, help="Spatial sub-sampling (1 = full res, 2 = 1/4 points)")
    parser.add_argument("--save-depth", type=Path, default=None, help="Also save depth map as .npy")
    parser.add_argument("--save-confidence", type=Path, default=None, help="Also save confidence map as .npy")
    args = parser.parse_args()

    if not args.image.exists():
        parser.error(f"Input image not found: {args.image}")

    # ---- depth estimation -----------------------------------------------
    print(f"Loading DepthPro on {args.device} (dtype={args.dtype}) ...")
    dtype = {"float16": "float16", "float32": "float32"}[args.dtype]
    import torch
    torch_dtype = torch.float16 if dtype == "float16" else torch.float32

    estimator = DepthProEstimator(device=args.device, dtype=torch_dtype)

    print(f"Estimating depth for {args.image} ...")
    result = estimator.estimate(
        args.image,
        return_confidence=args.save_confidence is not None,
    )

    print(f"  Image size: {result.width}×{result.height}")
    print(f"  Estimated focal length: {result.focal_length:.1f} px")
    print(f"  Estimated FOV: {result.field_of_view:.1f}°")
    print(f"  Depth range: {result.depth.min():.2f} m – {result.depth.max():.2f} m")

    # ---- optional saves -------------------------------------------------
    if args.save_depth:
        np.save(args.save_depth, result.depth)
        print(f"  Saved depth map → {args.save_depth}")

    if args.save_confidence and result.confidence is not None:
        np.save(args.save_confidence, result.confidence)
        print(f"  Saved confidence map → {args.save_confidence}")

    # ---- back-projection ------------------------------------------------
    print("\nBack-projecting to 3D point cloud ...")

    normals = None
    if args.normals:
        normals = normals_from_depth(result.depth, result.focal_length)

    if args.colored:
        points, colors = rgbd_to_point_cloud(
            result.depth,
            result.image,
            result.focal_length,
            sample_step=args.sample_step,
        )
        if args.normals:
            # Sample normals at same grid
            H, W = result.depth.shape
            v_idx = np.arange(0, H, args.sample_step)
            u_idx = np.arange(0, W, args.sample_step)
            valid = result.depth[v_idx[:, None], u_idx[None, :]] > 0
            normals = normals[v_idx[:, None], u_idx[None, :]]
            normals = normals[valid]
        print(f"  Colored point cloud: {len(points):,} points")
        save_point_cloud(args.output, points, colors=colors, normals=normals)
    else:
        points = depth_to_point_cloud(
            result.depth,
            result.focal_length,
            sample_step=args.sample_step,
        )
        if args.normals:
            H, W = result.depth.shape
            v_idx = np.arange(0, H, args.sample_step)
            u_idx = np.arange(0, W, args.sample_step)
            valid = result.depth[v_idx[:, None], u_idx[None, :]] > 0
            normals = normals[v_idx[:, None], u_idx[None, :]]
            normals = normals[valid]
        print(f"  Point cloud: {len(points):,} points")
        save_point_cloud(args.output, points, normals=normals)

    print(f"\nDone — saved to {args.output}")


if __name__ == "__main__":
    main()