File size: 8,425 Bytes
180521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
"""
Back-projection utilities: depth map → 3D point cloud.

DepthPro outputs metric depth (meters) and an estimated focal length.
Using the standard pinhole camera model, each pixel can be back-projected
into a 3D point relative to the camera centre.
"""

from __future__ import annotations

from typing import Optional, Tuple

import numpy as np


def depth_to_point_cloud(
    depth: np.ndarray,
    focal_length: float,
    principal_point: Optional[Tuple[float, float]] = None,
    *,
    mask: Optional[np.ndarray] = None,
    sample_step: int = 1,
) -> np.ndarray:
    """
    Back-project a metric depth map into a 3D point cloud.

    Parameters
    ----------
    depth : np.ndarray
        (H, W) float array of metric depths in meters.
    focal_length : float
        Focal length in pixels (for the resolution of *depth*).
        DepthPro returns this automatically via ``DepthResult.focal_length``.
    principal_point : (cx, cy), optional
        Principal point in pixel coordinates.  Defaults to the image centre
        ``(W/2, H/2)``.
    mask : np.ndarray, optional
        (H, W) boolean array.  Only pixels where ``mask == True`` are kept.
        Useful for removing sky/background, invalid depths, etc.
    sample_step : int, default 1
        Spatial sub-sampling step.  ``2`` keeps every 2nd pixel (75 % reduction),
        ``4`` keeps every 4th (93.75 % reduction).  Handy for real-time viz.

    Returns
    -------
    points : np.ndarray
        (N, 3) float array of 3D points in the camera coordinate frame.
        ``+Z`` points forward (into the scene), ``+X`` is right, ``+Y`` is
        down (standard image convention).

    Notes
    -----
    DepthPro assumes square pixels (aspect ratio = 1) and therefore a single
    focal length value is sufficient: ``fx == fy == focal_length``.

    The standard pinhole projection equations are::

        X = (u - cx) * Z / fx
        Y = (v - cy) * Z / fy
        Z = depth[v, u]

    where ``(u, v)`` are pixel column/row indices.
    """
    depth = np.asarray(depth, dtype=np.float32)
    H, W = depth.shape

    if principal_point is None:
        cx, cy = W / 2.0, H / 2.0
    else:
        cx, cy = float(principal_point[0]), float(principal_point[1])

    fx = fy = float(focal_length)

    # Build pixel grid — sample every sample_step pixel
    v_idx = np.arange(0, H, sample_step)
    u_idx = np.arange(0, W, sample_step)
    u, v = np.meshgrid(u_idx, v_idx)

    Z = depth[v_idx[:, None], u_idx[None, :]]

    # Remove invalid / zero depths
    valid = Z > 0.0
    if mask is not None:
        mask = np.asarray(mask)
        if mask.shape != (H, W):
            raise ValueError(f"mask shape {mask.shape} does not match depth shape {(H, W)}")
        # Down-sample mask to match the sampled grid
        valid &= mask[v_idx[:, None], u_idx[None, :]]

    u = u[valid]
    v = v[valid]
    Z = Z[valid]

    X = (u - cx) * Z / fx
    Y = (v - cy) * Z / fy

    points = np.stack([X, Y, Z], axis=-1).astype(np.float32)
    return points


def rgbd_to_point_cloud(
    depth: np.ndarray,
    rgb: np.ndarray,
    focal_length: float,
    principal_point: Optional[Tuple[float, float]] = None,
    *,
    mask: Optional[np.ndarray] = None,
    sample_step: int = 1,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Back-project an RGB-D pair into a coloured 3D point cloud.

    Parameters
    ----------
    depth : np.ndarray
        (H, W) metric depth map.
    rgb : np.ndarray
        (H, W, 3) uint8 RGB image.
    focal_length : float
        Estimated focal length in pixels.
    principal_point : (cx, cy), optional
        Defaults to image centre.
    mask : np.ndarray, optional
        Boolean mask selecting pixels to keep.
    sample_step : int, default 1
        Spatial sub-sampling step.

    Returns
    -------
    points : np.ndarray
        (N, 3) float32 3D points.
    colors : np.ndarray
        (N, 3) uint8 RGB colours aligned with *points*.
    """
    depth = np.asarray(depth)
    rgb = np.asarray(rgb)
    if depth.shape[:2] != rgb.shape[:2]:
        raise ValueError(
            f"depth shape {depth.shape} and rgb shape {rgb.shape} must have same H×W"
        )

    H, W = depth.shape
    if principal_point is None:
        cx, cy = W / 2.0, H / 2.0
    else:
        cx, cy = float(principal_point[0]), float(principal_point[1])

    fx = fy = float(focal_length)

    v_idx = np.arange(0, H, sample_step)
    u_idx = np.arange(0, W, sample_step)
    u, v = np.meshgrid(u_idx, v_idx)

    Z = depth[v_idx[:, None], u_idx[None, :]]
    colors_sampled = rgb[v_idx[:, None], u_idx[None, :]]

    valid = Z > 0.0
    if mask is not None:
        mask = np.asarray(mask)
        valid &= mask[v_idx[:, None], u_idx[None, :]]

    u = u[valid]
    v = v[valid]
    Z = Z[valid]
    colors = colors_sampled[valid]

    X = (u - cx) * Z / fx
    Y = (v - cy) * Z / fy

    points = np.stack([X, Y, Z], axis=-1).astype(np.float32)
    colors = np.asarray(colors, dtype=np.uint8)
    return points, colors


def normals_from_depth(
    depth: np.ndarray,
    focal_length: float,
    principal_point: Optional[Tuple[float, float]] = None,
) -> np.ndarray:
    """
    Compute per-pixel surface normals directly from the depth map.

    This is a fast, approximate normal estimator that works well for
    visualisation or as input to downstream surface-reconstruction methods
    (e.g. Poisson, NKSR).

    Parameters
    ----------
    depth : np.ndarray
        (H, W) metric depth map.
    focal_length : float
        Focal length in pixels.
    principal_point : (cx, cy), optional
        Defaults to image centre.

    Returns
    -------
    normals : np.ndarray
        (H, W, 3) float32 array of **unoriented** unit normals.
        ``normals[v, u]`` is the normal at pixel ``(u, v)``.
    """
    depth = np.asarray(depth, dtype=np.float64)
    H, W = depth.shape

    if principal_point is None:
        cx, cy = W / 2.0, H / 2.0
    else:
        cx, cy = float(principal_point[0]), float(principal_point[1])

    fx = fy = float(focal_length)

    # Compute 3D coordinates for every pixel
    u = np.arange(W)
    v = np.arange(H)
    u, v = np.meshgrid(u, v)

    Z = depth
    X = (u - cx) * Z / fx
    Y = (v - cy) * Z / fy

    # Cross-product of neighbour vectors → normal
    # Forward differences (with edge padding)
    dx = np.zeros_like(Z)
    dy = np.zeros_like(Z)

    dx[:, :-1] = (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0)
    dy[:-1, :] = (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0)

    # Average with backward differences for smoother edges
    dx[:, 1:] += (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0)
    dy[1:, :] += (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0)
    dx[:, 1:-1] *= 0.5
    dy[1:-1, :] *= 0.5

    # Central differences in the interior
    dx[:, 1:-1] = (X[:, 2:] - X[:, :-2]) / 2.0
    dy[1:-1, :] = (Y[2:, :] - Y[:-2, :]) / 2.0

    # Vectors in 3D
    vx = np.stack([dx, np.zeros_like(dx), np.zeros_like(dx)], axis=-1)
    vy = np.stack([np.zeros_like(dy), dy, np.zeros_like(dy)], axis=-1)

    # More accurate: use the actual 3D neighbour differences
    dX = np.zeros_like(X)
    dY = np.zeros_like(Y)
    dZ = np.zeros_like(Z)

    dX[:, :-1] = X[:, 1:] - X[:, :-1]
    dY[:, :-1] = Y[:, 1:] - Y[:, :-1]
    dZ[:, :-1] = Z[:, 1:] - Z[:, :-1]

    dX[:-1, :] += X[1:, :] - X[:-1, :]
    dY[:-1, :] += Y[1:, :] - Y[:-1, :]
    dZ[:-1, :] += Z[1:, :] - Z[:-1, :]

    # Use central diff version
    grad_x = np.zeros((H, W, 3), dtype=np.float32)
    grad_y = np.zeros((H, W, 3), dtype=np.float32)

    grad_x[:, :-1, 0] = X[:, 1:] - X[:, :-1]
    grad_x[:, :-1, 1] = Y[:, 1:] - Y[:, :-1]
    grad_x[:, :-1, 2] = Z[:, 1:] - Z[:, :-1]

    grad_y[:-1, :, 0] = X[1:, :] - X[:-1, :]
    grad_y[:-1, :, 1] = Y[1:, :] - Y[:-1, :]
    grad_y[:-1, :, 2] = Z[1:, :] - Z[:-1, :]

    # Average with the opposite direction for interior pixels
    grad_x[:, 1:, :] += np.stack([X[:, :-1] - X[:, 1:], Y[:, :-1] - Y[:, 1:], Z[:, :-1] - Z[:, 1:]], axis=-1)
    grad_y[1:, :, :] += np.stack([X[:-1, :] - X[1:, :], Y[:-1, :] - Y[1:, :], Z[:-1, :] - Z[1:, :]], axis=-1)

    # Cross product for normal
    normals = np.cross(grad_x, grad_y)

    # Normalise
    norm = np.linalg.norm(normals, axis=-1, keepdims=True)
    normals = np.where(norm > 1e-8, normals / norm, 0.0)

    return normals.astype(np.float32)