Spaces:
Sleeping
Sleeping
import numpy as np | |
from mmdet.registry import TRANSFORMS | |
from mmdet.structures.bbox import autocast_box_type | |
from mmcv.image.geometric import _scale_size | |
from mmcv.transforms import Resize as MMCV_Resize | |
from mmdet.datasets.transforms import Resize as MMDET_Resize | |
class ResizeImage(MMCV_Resize): | |
"""Resize images only. | |
This transform resizes the input image according to ``scale`` or | |
``scale_factor``. Bboxes, masks, and seg map are then resized | |
with the same scale factor. | |
if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to | |
resize. | |
Required Keys: | |
- img | |
- gt_bboxes (BaseBoxes[torch.float32]) (optional) | |
- gt_masks (BitmapMasks | PolygonMasks) (optional) | |
- gt_seg_map (np.uint8) (optional) | |
Modified Keys: | |
- img | |
- img_shape | |
- gt_bboxes | |
- gt_masks | |
- gt_seg_map | |
Added Keys: | |
- scale | |
- scale_factor | |
- keep_ratio | |
- homography_matrix | |
Args: | |
scale (int or tuple): Images scales for resizing. Defaults to None | |
scale_factor (float or tuple[float]): Scale factors for resizing. | |
Defaults to None. | |
keep_ratio (bool): Whether to keep the aspect ratio when resizing the | |
image. Defaults to False. | |
clip_object_border (bool): Whether to clip the objects | |
outside the border of the image. In some dataset like MOT17, the gt | |
bboxes are allowed to cross the border of images. Therefore, we | |
don't need to clip the gt bboxes in these cases. Defaults to True. | |
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. | |
These two backends generates slightly different results. Defaults | |
to 'cv2'. | |
interpolation (str): Interpolation method, accepted values are | |
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' | |
backend, "nearest", "bilinear" for 'pillow' backend. Defaults | |
to 'bilinear'. | |
""" | |
def transform(self, results: dict) -> dict: | |
"""Transform function to resize images, bounding boxes and semantic | |
segmentation map. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', | |
'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys | |
are updated in result dict. | |
""" | |
if self.scale: | |
results['scale'] = self.scale | |
else: | |
img_shape = results['img'].shape[:2] | |
results['scale'] = _scale_size(img_shape[::-1], self.scale_factor) | |
self._resize_img(results) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(scale={self.scale}, ' | |
repr_str += f'scale_factor={self.scale_factor}, ' | |
repr_str += f'keep_ratio={self.keep_ratio}, ' | |
repr_str += f'clip_object_border={self.clip_object_border}), ' | |
repr_str += f'backend={self.backend}), ' | |
repr_str += f'interpolation={self.interpolation})' | |
return repr_str | |
class ResizeSAM(MMDET_Resize): | |
def _resize_point_coords(self, results: dict) -> None: | |
if results.get('gt_point_coords', None) is not None: | |
results['gt_point_coords'] = results['gt_point_coords'] * results['scale_factor'] | |
results['gt_point_coords'][..., 0] = np.clip(results['gt_point_coords'][..., 0], 0, results['img_shape'][1]) | |
results['gt_point_coords'][..., 1] = np.clip(results['gt_point_coords'][..., 1], 0, results['img_shape'][0]) | |
def transform(self, results: dict) -> dict: | |
"""Transform function to resize images, bounding boxes and semantic | |
segmentation map. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', | |
'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys | |
are updated in result dict. | |
""" | |
if self.scale: | |
results['scale'] = self.scale | |
else: | |
img_shape = results['img'].shape[:2] | |
results['scale'] = _scale_size(img_shape[::-1], self.scale_factor) | |
self._resize_img(results) | |
self._resize_bboxes(results) | |
self._resize_masks(results) | |
self._resize_seg(results) | |
self._resize_point_coords(results) | |
self._record_homography_matrix(results) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(scale={self.scale}, ' | |
repr_str += f'scale_factor={self.scale_factor}, ' | |
repr_str += f'keep_ratio={self.keep_ratio}, ' | |
repr_str += f'clip_object_border={self.clip_object_border}), ' | |
repr_str += f'backend={self.backend}), ' | |
repr_str += f'interpolation={self.interpolation})' | |
return repr_str | |