# Copyright (c) OpenMMLab. All rights reserved. from typing import Dict, List, Optional, Tuple, Union import cv2 import mmcv import numpy as np import torch from mmengine.dist import master_only from mmengine.structures import InstanceData, PixelData from mmengine.visualization import Visualizer from ..evaluation import INSTANCE_OFFSET from ..registry import VISUALIZERS from ..structures import DetDataSample from ..structures.mask import BitmapMasks, PolygonMasks, bitmap_to_polygon from .palette import _get_adaptive_scales, get_palette, jitter_color @VISUALIZERS.register_module() class DetLocalVisualizer(Visualizer): """MMDetection Local Visualizer. Args: name (str): Name of the instance. Defaults to 'visualizer'. image (np.ndarray, optional): the origin image to draw. The format should be RGB. Defaults to None. vis_backends (list, optional): Visual backend config list. Defaults to None. save_dir (str, optional): Save file dir for all storage backends. If it is None, the backend storage will not save any data. bbox_color (str, tuple(int), optional): Color of bbox lines. The tuple of color should be in BGR order. Defaults to None. text_color (str, tuple(int), optional): Color of texts. The tuple of color should be in BGR order. Defaults to (200, 200, 200). mask_color (str, tuple(int), optional): Color of masks. The tuple of color should be in BGR order. Defaults to None. line_width (int, float): The linewidth of lines. Defaults to 3. alpha (int, float): The transparency of bboxes or mask. Defaults to 0.8. Examples: >>> import numpy as np >>> import torch >>> from mmengine.structures import InstanceData >>> from mmdet.structures import DetDataSample >>> from mmdet.visualization import DetLocalVisualizer >>> det_local_visualizer = DetLocalVisualizer() >>> image = np.random.randint(0, 256, ... size=(10, 12, 3)).astype('uint8') >>> gt_instances = InstanceData() >>> gt_instances.bboxes = torch.Tensor([[1, 2, 2, 5]]) >>> gt_instances.labels = torch.randint(0, 2, (1,)) >>> gt_det_data_sample = DetDataSample() >>> gt_det_data_sample.gt_instances = gt_instances >>> det_local_visualizer.add_datasample('image', image, ... gt_det_data_sample) >>> det_local_visualizer.add_datasample( ... 'image', image, gt_det_data_sample, ... out_file='out_file.jpg') >>> det_local_visualizer.add_datasample( ... 'image', image, gt_det_data_sample, ... show=True) >>> pred_instances = InstanceData() >>> pred_instances.bboxes = torch.Tensor([[2, 4, 4, 8]]) >>> pred_instances.labels = torch.randint(0, 2, (1,)) >>> pred_det_data_sample = DetDataSample() >>> pred_det_data_sample.pred_instances = pred_instances >>> det_local_visualizer.add_datasample('image', image, ... gt_det_data_sample, ... pred_det_data_sample) """ def __init__(self, name: str = 'visualizer', image: Optional[np.ndarray] = None, vis_backends: Optional[Dict] = None, save_dir: Optional[str] = None, bbox_color: Optional[Union[str, Tuple[int]]] = None, text_color: Optional[Union[str, Tuple[int]]] = (200, 200, 200), mask_color: Optional[Union[str, Tuple[int]]] = None, line_width: Union[int, float] = 3, alpha: float = 0.8, **kwargs, ) -> None: super().__init__( name=name, image=image, vis_backends=vis_backends, save_dir=save_dir, **kwargs, ) self.bbox_color = bbox_color self.text_color = text_color self.mask_color = mask_color self.line_width = line_width self.alpha = alpha # Set default value. When calling # `DetLocalVisualizer().dataset_meta=xxx`, # it will override the default value. self.dataset_meta = {} def _draw_instances(self, image: np.ndarray, instances: ['InstanceData'], classes: Optional[List[str]], palette: Optional[List[tuple]]) -> np.ndarray: """Draw instances of GT or prediction. Args: image (np.ndarray): The image to draw. instances (:obj:`InstanceData`): Data structure for instance-level annotations or predictions. classes (List[str], optional): Category information. palette (List[tuple], optional): Palette information corresponding to the category. Returns: np.ndarray: the drawn image which channel is RGB. """ self.set_image(image) if 'bboxes' in instances: bboxes = instances.bboxes labels = instances.labels max_label = int(max(labels) if len(labels) > 0 else 0) text_palette = get_palette(self.text_color, max_label + 1) text_colors = [text_palette[label] for label in labels] bbox_color = palette if self.bbox_color is None \ else self.bbox_color bbox_palette = get_palette(bbox_color, max_label + 1) colors = [bbox_palette[label] for label in labels] self.draw_bboxes( bboxes, edge_colors=colors, alpha=self.alpha, line_widths=self.line_width) positions = bboxes[:, :2] + self.line_width areas = (bboxes[:, 3] - bboxes[:, 1]) * ( bboxes[:, 2] - bboxes[:, 0]) scales = _get_adaptive_scales(areas) for i, (pos, label) in enumerate(zip(positions, labels)): label_text = classes[ label] if classes is not None else f'class {label}' if 'scores' in instances: score = round(float(instances.scores[i]) * 100, 1) label_text += f': {score}' self.draw_texts( label_text, pos, colors=text_colors[i], font_sizes=int(13 * scales[i]), bboxes=[{ 'facecolor': 'black', 'alpha': 0.8, 'pad': 0.7, 'edgecolor': 'none' }]) if 'masks' in instances: labels = instances.labels masks = instances.masks if isinstance(masks, torch.Tensor): masks = masks.numpy() elif isinstance(masks, (PolygonMasks, BitmapMasks)): masks = masks.to_ndarray() masks = masks.astype(bool) max_label = int(max(labels) if len(labels) > 0 else 0) mask_color = palette if self.mask_color is None \ else self.mask_color mask_palette = get_palette(mask_color, max_label + 1) colors = [jitter_color(mask_palette[label]) for label in labels] text_palette = get_palette(self.text_color, max_label + 1) text_colors = [text_palette[label] for label in labels] polygons = [] for i, mask in enumerate(masks): contours, _ = bitmap_to_polygon(mask) polygons.extend(contours) self.draw_polygons(polygons, edge_colors='w', alpha=self.alpha) self.draw_binary_masks(masks, colors=colors, alphas=self.alpha) if len(labels) > 0 and \ ('bboxes' not in instances or instances.bboxes.sum() == 0): # instances.bboxes.sum()==0 represent dummy bboxes. # A typical example of SOLO does not exist bbox branch. areas = [] positions = [] for mask in masks: _, _, stats, centroids = cv2.connectedComponentsWithStats( mask.astype(np.uint8), connectivity=8) if stats.shape[0] > 1: largest_id = np.argmax(stats[1:, -1]) + 1 positions.append(centroids[largest_id]) areas.append(stats[largest_id, -1]) areas = np.stack(areas, axis=0) scales = _get_adaptive_scales(areas) for i, (pos, label) in enumerate(zip(positions, labels)): label_text = classes[ label] if classes is not None else f'class {label}' if 'scores' in instances: score = round(float(instances.scores[i]) * 100, 1) label_text += f': {score}' self.draw_texts( label_text, pos, colors=text_colors[i], font_sizes=int(13 * scales[i]), horizontal_alignments='center', bboxes=[{ 'facecolor': 'black', 'alpha': 0.8, 'pad': 0.7, 'edgecolor': 'none' }]) return self.get_image() def _draw_panoptic_seg(self, image: np.ndarray, panoptic_seg: ['PixelData'], classes: Optional[List[str]]) -> np.ndarray: """Draw panoptic seg of GT or prediction. Args: image (np.ndarray): The image to draw. panoptic_seg (:obj:`PixelData`): Data structure for pixel-level annotations or predictions. classes (List[str], optional): Category information. Returns: np.ndarray: the drawn image which channel is RGB. """ # TODO: Is there a way to bypass? num_classes = len(classes) panoptic_seg = panoptic_seg.sem_seg[0] ids = np.unique(panoptic_seg)[::-1] legal_indices = ids != num_classes # for VOID label ids = ids[legal_indices] labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64) segms = (panoptic_seg[None] == ids[:, None, None]) max_label = int(max(labels) if len(labels) > 0 else 0) mask_palette = get_palette(self.mask_color, max_label + 1) colors = [mask_palette[label] for label in labels] self.set_image(image) # draw segm polygons = [] for i, mask in enumerate(segms): contours, _ = bitmap_to_polygon(mask) polygons.extend(contours) self.draw_polygons(polygons, edge_colors='w', alpha=self.alpha) self.draw_binary_masks(segms, colors=colors, alphas=self.alpha) # draw label areas = [] positions = [] for mask in segms: _, _, stats, centroids = cv2.connectedComponentsWithStats( mask.astype(np.uint8), connectivity=8) max_id = np.argmax(stats[1:, -1]) + 1 positions.append(centroids[max_id]) areas.append(stats[max_id, -1]) areas = np.stack(areas, axis=0) scales = _get_adaptive_scales(areas) text_palette = get_palette(self.text_color, max_label + 1) text_colors = [text_palette[label] for label in labels] for i, (pos, label) in enumerate(zip(positions, labels)): label_text = classes[label] self.draw_texts( label_text, pos, colors=text_colors[i], font_sizes=int(13 * scales[i]), bboxes=[{ 'facecolor': 'black', 'alpha': 0.8, 'pad': 0.7, 'edgecolor': 'none' }], horizontal_alignments='center') return self.get_image() @master_only def add_datasample( self, name: str, image: np.ndarray, data_sample: Optional['DetDataSample'] = None, draw_gt: bool = True, draw_pred: bool = True, show: bool = False, wait_time: float = 0, # TODO: Supported in mmengine's Viusalizer. out_file: Optional[str] = None, pred_score_thr: float = 0.3, step: int = 0) -> None: """Draw datasample and save to all backends. - If GT and prediction are plotted at the same time, they are displayed in a stitched image where the left image is the ground truth and the right image is the prediction. - If ``show`` is True, all storage backends are ignored, and the images will be displayed in a local window. - If ``out_file`` is specified, the drawn image will be saved to ``out_file``. t is usually used when the display is not available. Args: name (str): The image identifier. image (np.ndarray): The image to draw. data_sample (:obj:`DetDataSample`, optional): A data sample that contain annotations and predictions. Defaults to None. draw_gt (bool): Whether to draw GT DetDataSample. Default to True. draw_pred (bool): Whether to draw Prediction DetDataSample. Defaults to True. show (bool): Whether to display the drawn image. Default to False. wait_time (float): The interval of show (s). Defaults to 0. out_file (str): Path to output file. Defaults to None. pred_score_thr (float): The threshold to visualize the bboxes and masks. Defaults to 0.3. step (int): Global step value to record. Defaults to 0. """ image = image.clip(0, 255).astype(np.uint8) classes = self.dataset_meta.get('classes', None) palette = self.dataset_meta.get('palette', None) gt_img_data = None pred_img_data = None if data_sample is not None: data_sample = data_sample.cpu() if draw_gt and data_sample is not None: gt_img_data = image if 'gt_instances' in data_sample: gt_img_data = self._draw_instances(image, data_sample.gt_instances, classes, palette) if 'gt_panoptic_seg' in data_sample: assert classes is not None, 'class information is ' \ 'not provided when ' \ 'visualizing panoptic ' \ 'segmentation results.' gt_img_data = self._draw_panoptic_seg( gt_img_data, data_sample.gt_panoptic_seg, classes) if draw_pred and data_sample is not None: pred_img_data = image if 'pred_instances' in data_sample: pred_instances = data_sample.pred_instances pred_instances = pred_instances[ pred_instances.scores > pred_score_thr] pred_img_data = self._draw_instances(image, pred_instances, classes, palette) if 'pred_panoptic_seg' in data_sample: assert classes is not None, 'class information is ' \ 'not provided when ' \ 'visualizing panoptic ' \ 'segmentation results.' pred_img_data = self._draw_panoptic_seg( pred_img_data, data_sample.pred_panoptic_seg.numpy(), classes) if gt_img_data is not None and pred_img_data is not None: drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1) elif gt_img_data is not None: drawn_img = gt_img_data elif pred_img_data is not None: drawn_img = pred_img_data else: # Display the original image directly if nothing is drawn. drawn_img = image # It is convenient for users to obtain the drawn image. # For example, the user wants to obtain the drawn image and # save it as a video during video inference. self.set_image(drawn_img) if show: self.show(drawn_img, win_name=name, wait_time=wait_time) if out_file is not None: mmcv.imwrite(drawn_img[..., ::-1], out_file) else: self.add_image(name, drawn_img, step)