File size: 14,346 Bytes
9b33fca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
"""Utility functions for image processing operations."""

from __future__ import annotations

import numpy as np
import torch

from vis4d.common.array import array_to_numpy
from vis4d.common.typing import (
    ArrayLike,
    ArrayLikeFloat,
    ArrayLikeInt,
    ArrayLikeUInt,
    NDArrayBool,
    NDArrayF32,
    NDArrayUI8,
)
from vis4d.data.const import AxisMode
from vis4d.op.box.box3d import (
    boxes3d_in_image,
    boxes3d_to_corners,
    transform_boxes3d,
)
from vis4d.op.geometry.projection import project_points
from vis4d.op.geometry.transform import inverse_rigid_transform
from vis4d.vis.util import DEFAULT_COLOR_MAPPING


def _get_box_label(
    category: str | None,
    score: float | None,
    track_id: int | None,
) -> str:
    """Gets a unique string representation for a box definition.

    Args:
        category (str): The category name
        score (float): The confidence score
        track_id (int): The track id

    Returns:
        str: Label for this box of format
            'class_name, track_id, score%'
    """
    labels = []

    if category is not None:
        labels.append(category)
    if track_id is not None:
        labels.append(str(track_id))
    if score is not None:
        labels.append(f"{score * 100:.1f}%")
    return ", ".join(labels)


def _to_binary_mask(
    mask: NDArrayUI8, ignore_class: int = 255
) -> tuple[NDArrayUI8, NDArrayUI8]:
    """Converts a mask to binary masks.

    Args:
        mask (NDArrayUI8): The mask to convert with shape [H, W].
        ignore_class (int): The class id to ignore. Defaults to 255.

    Returns:
        NDArrayUI8: The binary masks with shape [N, H, W].
        NDArrayUI8: The class ids for each binary mask.
    """
    binary_masks = []
    class_ids = []
    for class_id in np.unique(mask):
        if class_id == ignore_class:
            continue
        binary_masks.append(mask == class_id)
        class_ids.append(class_id)
    return np.stack(binary_masks, axis=0), np.array(class_ids, dtype=np.uint8)


def preprocess_boxes(
    boxes: ArrayLikeFloat,
    scores: None | ArrayLikeFloat = None,
    class_ids: None | ArrayLikeInt = None,
    track_ids: None | ArrayLikeInt = None,
    color_palette: list[tuple[int, int, int]] = DEFAULT_COLOR_MAPPING,
    class_id_mapping: dict[int, str] | None = None,
    default_color: tuple[int, int, int] = (255, 0, 0),
    categories: None | list[str] = None,
) -> tuple[
    list[tuple[float, float, float, float]],
    list[str],
    list[tuple[int, int, int]],
]:
    """Preprocesses bounding boxes.

    Converts the given predicted bounding boxes and class/track information
    into lists of corners, labels and colors.

    Args:
        boxes (ArrayLikeFloat): Boxes of shape [N, 4] where N is the number of
                            boxes and the second channel consists of
                            (x1,y1,x2,y2) box coordinates.
        scores (ArrayLikeFloat): Scores for each box shape [N]
        class_ids (ArrayLikeInt): Class id for each box shape [N]
        track_ids (ArrayLikeInt): Track id for each box shape [N]
        color_palette (list[tuple[float, float, float]]): Color palette for
            each id.
        class_id_mapping(dict[int, str], optional): Mapping from class id
            to color tuple (0-255).
        default_color (tuple[int, int, int]): fallback color for boxes of no
            class or track id is given.
        categories (None | list[str], optional): List of categories for each
            box.

    Returns:
        boxes_proc (list[tuple[float, float, float, float]]): List of box
            corners.
        labels_proc (list[str]): List of labels.
        colors_proc (list[tuple[int, int, int]]): List of colors.
    """
    if class_id_mapping is None:
        class_id_mapping = {}

    boxes = array_to_numpy(boxes, n_dims=2, dtype=np.float32)

    scores_np = array_to_numpy(scores, n_dims=1, dtype=np.float32)
    class_ids_np = array_to_numpy(class_ids, n_dims=1, dtype=np.int32)
    track_ids_np = array_to_numpy(track_ids, n_dims=1, dtype=np.int32)

    boxes_proc: list[tuple[float, float, float, float]] = []
    colors_proc: list[tuple[int, int, int]] = []
    labels_proc: list[str] = []

    # Only one box provided
    if len(boxes.shape) == 1:
        # unsqueeze one dimension
        boxes = boxes.reshape(1, -1)

    for idx in range(boxes.shape[0]):
        class_id = None if class_ids_np is None else class_ids_np[idx].item()
        score = None if scores_np is None else scores_np[idx].item()
        track_id = None if track_ids_np is None else track_ids_np[idx].item()

        if track_id is not None:
            color = color_palette[track_id % len(color_palette)]
        elif class_id is not None:
            color = color_palette[class_id % len(color_palette)]
        else:
            color = default_color

        boxes_proc.append(
            (
                boxes[idx][0].item(),
                boxes[idx][1].item(),
                boxes[idx][2].item(),
                boxes[idx][3].item(),
            )
        )
        colors_proc.append(color)

        if categories is not None:
            category = categories[idx]
        elif class_id is not None:
            category = class_id_mapping.get(class_id, str(class_id))
        else:
            category = None

        labels_proc.append(_get_box_label(category, score, track_id))
    return boxes_proc, labels_proc, colors_proc


def preprocess_boxes3d(
    image_hw: tuple[int, int],
    boxes3d: ArrayLikeFloat,
    intrinsics: ArrayLikeFloat,
    extrinsics: ArrayLikeFloat | None = None,
    scores: None | ArrayLikeFloat = None,
    class_ids: None | ArrayLikeInt = None,
    track_ids: None | ArrayLikeInt = None,
    color_palette: list[tuple[int, int, int]] = DEFAULT_COLOR_MAPPING,
    class_id_mapping: dict[int, str] | None = None,
    default_color: tuple[int, int, int] = (255, 0, 0),
    axis_mode: AxisMode = AxisMode.OPENCV,
    categories: None | list[str] = None,
) -> tuple[
    list[tuple[float, float, float]],
    list[list[tuple[float, float, float]]],
    list[str],
    list[tuple[int, int, int]],
    list[int | None],
]:
    """Preprocesses bounding boxes.

    Converts the given predicted bounding boxes and class/track information
    into lists of centers, corners, labels, colors and track_ids.
    """
    if class_id_mapping is None:
        class_id_mapping = {}

    boxes3d = array_to_numpy(boxes3d, n_dims=2, dtype=np.float32)
    intrinsics = array_to_numpy(intrinsics, n_dims=2, dtype=np.float32)

    boxes3d = torch.from_numpy(boxes3d)
    intrinsics = torch.from_numpy(intrinsics)

    if axis_mode != AxisMode.OPENCV:
        assert (
            extrinsics is not None
        ), "extrinsics must be provided to move boxes to camera coordiante."
        extrinsics = array_to_numpy(extrinsics, n_dims=2, dtype=np.float32)
        extrinsics = torch.from_numpy(extrinsics)
        global_to_cam = inverse_rigid_transform(extrinsics)
        boxes3d_cam = transform_boxes3d(
            boxes3d,
            global_to_cam,
            source_axis_mode=AxisMode.ROS,
            target_axis_mode=AxisMode.OPENCV,
        )
    else:
        boxes3d_cam = boxes3d

    corners = boxes3d_to_corners(boxes3d_cam, axis_mode=AxisMode.OPENCV)

    mask = boxes3d_in_image(corners, intrinsics, image_hw)

    boxes3d_np = boxes3d.numpy()
    corners_np = corners.numpy()

    scores_np = array_to_numpy(scores, n_dims=1, dtype=np.float32)
    class_ids_np = array_to_numpy(class_ids, n_dims=1, dtype=np.int32)
    track_ids_np = array_to_numpy(track_ids, n_dims=1, dtype=np.int32)

    centers_proc: list[tuple[float, float, float]] = []
    corners_proc: list[list[tuple[float, float, float]]] = []
    colors_proc: list[tuple[int, int, int]] = []
    labels_proc: list[str] = []
    track_ids_proc: list[int | None] = []

    if len(mask) == 1:
        if not mask[0]:
            return (
                centers_proc,
                corners_proc,
                labels_proc,
                colors_proc,
                track_ids_proc,
            )
    else:
        boxes3d_np = boxes3d_np[mask]
        corners_np = corners_np[mask]
        scores_np = scores_np[mask] if scores_np is not None else None
        class_ids_np = class_ids_np[mask] if class_ids_np is not None else None
        track_ids_np = track_ids_np[mask] if track_ids_np is not None else None

    for idx in range(corners_np.shape[0]):
        class_id = None if class_ids_np is None else class_ids_np[idx].item()
        score = None if scores_np is None else scores_np[idx].item()
        track_id = None if track_ids_np is None else track_ids_np[idx].item()

        if track_id is not None:
            color = color_palette[track_id % len(color_palette)]
        elif class_id is not None:
            color = color_palette[class_id % len(color_palette)]
        else:
            color = default_color

        centers_proc.append(
            (
                boxes3d_np[idx][0].item(),
                boxes3d_np[idx][1].item(),
                boxes3d_np[idx][2].item(),
            )
        )
        corners_proc.append([tuple(pts) for pts in corners_np[idx].tolist()])
        colors_proc.append(color)

        if categories is not None:
            category = categories[idx]
        elif class_id is not None:
            category = class_id_mapping.get(class_id, str(class_id))
        else:
            category = None

        labels_proc.append(_get_box_label(category, score, track_id))
        track_ids_proc.append(track_id)
    return centers_proc, corners_proc, labels_proc, colors_proc, track_ids_proc


def preprocess_masks(
    masks: ArrayLikeUInt,
    class_ids: ArrayLikeInt | None = None,
    color_mapping: list[tuple[int, int, int]] = DEFAULT_COLOR_MAPPING,
) -> tuple[list[NDArrayBool], list[tuple[int, int, int]]]:
    """Preprocesses predicted semantic or instance segmentation masks.

    Args:
        masks (ArrayLikeUInt): Masks of shape [H, W] or [N, H, W]. If the
            masks are of shape [H, W], they are assumed to be semantic
            segmentation masks, i.e. each pixel contains the class id.
            If the masks are of shape [N, H, W], they are assumed to be
            the binary masks of N instances.
        class_ids (ArrayLikeInt, None):  An array with class ids for each mask
            shape [N]. If None, then the masks must be semantic segmentation
            masks and the class ids are extracted from the masks.
        color_mapping (list[tuple[int, int, int]]): Color mapping for
            each class.

    Returns:
        tuple[list[masks], list[colors]]: Returns a list with all masks of
            shape [H, W] as well as a list with the corresponding colors.

    Raises:
        ValueError: If the masks have an invalid shape.
    """
    masks_np = array_to_numpy(masks, n_dims=None, dtype=np.uint8)

    if len(masks_np.shape) == 2:
        masks_np, class_ids = _to_binary_mask(masks_np)
    elif len(masks_np.shape) == 3:
        if class_ids is not None:
            class_ids = array_to_numpy(class_ids, n_dims=1, dtype=np.int32)
    else:
        raise ValueError(
            f"Expected masks to have 2 or 3 dimensions, but got "
            f"{len(masks_np.shape)}"
        )

    masks_binary = masks_np.astype(bool)
    mask_list: list[NDArrayBool] = []
    color_list: list[tuple[int, int, int]] = []

    for idx in range(masks_binary.shape[0]):
        mask = masks_binary[idx, ...]

        class_id = None if class_ids is None else class_ids[idx].item()
        if class_id is not None:
            color = color_mapping[class_id % len(color_mapping)]
        else:
            color = color_mapping[idx % len(color_mapping)]
        mask_list.append(mask)
        color_list.append(color)
    return mask_list, color_list


def preprocess_image(image: ArrayLike, mode: str = "RGB") -> NDArrayUI8:
    """Validate and convert input image.

    Args:
        image: CHW or HWC image (ArrayLike) with C = 3.
        mode: input channel format (e.g. BGR, HSV).

    Returns:
        np.array[uint8]: Processed image_np in RGB.
    """
    image_np = array_to_numpy(image, n_dims=3, dtype=np.float32)
    # Convert torch to numpy
    assert len(image_np.shape) == 3
    assert image_np.shape[0] == 3 or image_np.shape[-1] == 3

    # Convert torch to numpy convention
    if not image_np.shape[-1] == 3:
        image_np = np.transpose(image_np, (1, 2, 0))

    # Convert image_np to [0, 255]
    min_val, max_val = (
        np.min(image_np, axis=(0, 1)),
        np.max(image_np, axis=(0, 1)),
    )
    image_np = image_np.astype(np.float32)
    image_np = (image_np - min_val) / (max_val - min_val) * 255.0

    if mode == "BGR":
        image_np = image_np[..., [2, 1, 0]]

    return image_np.astype(np.uint8)


def get_intersection_point(
    point1: tuple[float, float, float],
    point2: tuple[float, float, float],
    camera_near_clip: float,
) -> tuple[float, float, float]:
    """Get point intersecting with camera near plane on line point1 -> point2.

    The line is defined by two points in camera coordinates and their depth.

    Args:
        point1 (tuple[float x 3]): First point in camera coordinates.
        point2 (tuple[float x 3]): Second point in camera coordinates
        camera_near_clip (float): camera_near_clip

    Returns:
        tuple[float, float, float]: The intersection point in camera
            coordiantes.
    """
    c1, c2, c3 = 0, 0, camera_near_clip
    a1, a2, a3 = 0, 0, 1
    x1, y1, z1 = point1
    x2, y2, z2 = point2

    k_up = abs(a1 * (x1 - c1) + a2 * (y1 - c2) + a3 * (z1 - c3))
    k_down = abs(a1 * (x1 - x2) + a2 * (y1 - y2) + a3 * (z1 - z2))
    if k_up > k_down:
        k = 1.0
    else:
        k = k_up / k_down

    return ((1 - k) * x1 + k * x2, (1 - k) * y1 + k * y2, camera_near_clip)


def project_point(
    point: tuple[float, float, float], intrinsics: NDArrayF32
) -> tuple[float, float]:
    """Project single point into the image plane."""
    projected_x, projected_y = (
        project_points(
            torch.from_numpy(np.array([point], dtype=np.float32)),
            torch.from_numpy(intrinsics),
        )
        .squeeze(0)
        .numpy()
        .tolist()
    )
    return projected_x, projected_y