import mmcv import numpy as np from mmcv.utils import deprecated_api_warning, is_tuple_of from numpy import random from mmseg.datasets.builder import PIPELINES from IPython import embed @PIPELINES.register_module() class AlignResize(object): """Resize images & seg. Align """ def __init__(self, img_scale=None, multiscale_mode='range', ratio_range=None, keep_ratio=True, size_divisor=32): if img_scale is None: self.img_scale = None else: if isinstance(img_scale, list): self.img_scale = img_scale else: self.img_scale = [img_scale] assert mmcv.is_list_of(self.img_scale, tuple) if ratio_range is not None: # mode 1: given img_scale=None and a range of image ratio # mode 2: given a scale and a range of image ratio assert self.img_scale is None or len(self.img_scale) == 1 else: # mode 3 and 4: given multiple scales or a range of scales assert multiscale_mode in ['value', 'range'] self.multiscale_mode = multiscale_mode self.ratio_range = ratio_range self.keep_ratio = keep_ratio self.size_divisor = size_divisor @staticmethod def random_select(img_scales): """Randomly select an img_scale from given candidates. Args: img_scales (list[tuple]): Images scales for selection. Returns: (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, where ``img_scale`` is the selected image scale and ``scale_idx`` is the selected index in the given candidates. """ assert mmcv.is_list_of(img_scales, tuple) scale_idx = np.random.randint(len(img_scales)) img_scale = img_scales[scale_idx] return img_scale, scale_idx @staticmethod def random_sample(img_scales): """Randomly sample an img_scale when ``multiscale_mode=='range'``. Args: img_scales (list[tuple]): Images scale range for sampling. There must be two tuples in img_scales, which specify the lower and uper bound of image scales. Returns: (tuple, None): Returns a tuple ``(img_scale, None)``, where ``img_scale`` is sampled scale and None is just a placeholder to be consistent with :func:`random_select`. """ assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 img_scale_long = [max(s) for s in img_scales] img_scale_short = [min(s) for s in img_scales] long_edge = np.random.randint( min(img_scale_long), max(img_scale_long) + 1) short_edge = np.random.randint( min(img_scale_short), max(img_scale_short) + 1) img_scale = (long_edge, short_edge) return img_scale, None @staticmethod def random_sample_ratio(img_scale, ratio_range): """Randomly sample an img_scale when ``ratio_range`` is specified. A ratio will be randomly sampled from the range specified by ``ratio_range``. Then it would be multiplied with ``img_scale`` to generate sampled scale. Args: img_scale (tuple): Images scale base to multiply with ratio. ratio_range (tuple[float]): The minimum and maximum ratio to scale the ``img_scale``. Returns: (tuple, None): Returns a tuple ``(scale, None)``, where ``scale`` is sampled ratio multiplied with ``img_scale`` and None is just a placeholder to be consistent with :func:`random_select`. """ assert isinstance(img_scale, tuple) and len(img_scale) == 2 min_ratio, max_ratio = ratio_range assert min_ratio <= max_ratio ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) return scale, None def _random_scale(self, results): """Randomly sample an img_scale according to ``ratio_range`` and ``multiscale_mode``. If ``ratio_range`` is specified, a ratio will be sampled and be multiplied with ``img_scale``. If multiple scales are specified by ``img_scale``, a scale will be sampled according to ``multiscale_mode``. Otherwise, single scale will be used. Args: results (dict): Result dict from :obj:`dataset`. Returns: dict: Two new keys 'scale` and 'scale_idx` are added into ``results``, which would be used by subsequent pipelines. """ if self.ratio_range is not None: if self.img_scale is None: h, w = results['img'].shape[:2] scale, scale_idx = self.random_sample_ratio((w, h), self.ratio_range) else: scale, scale_idx = self.random_sample_ratio( self.img_scale[0], self.ratio_range) elif len(self.img_scale) == 1: scale, scale_idx = self.img_scale[0], 0 elif self.multiscale_mode == 'range': scale, scale_idx = self.random_sample(self.img_scale) elif self.multiscale_mode == 'value': scale, scale_idx = self.random_select(self.img_scale) else: raise NotImplementedError results['scale'] = scale results['scale_idx'] = scale_idx def _align(self, img, size_divisor, interpolation=None): align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor if interpolation == None: img = mmcv.imresize(img, (align_w, align_h)) else: img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation) return img def _resize_img(self, results): """Resize images with ``results['scale']``.""" if self.keep_ratio: img, scale_factor = mmcv.imrescale( results['img'], results['scale'], return_scale=True) #### align #### img = self._align(img, self.size_divisor) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = results['img'].shape[:2] w_scale = new_w / w h_scale = new_h / h else: img, w_scale, h_scale = mmcv.imresize( results['img'], results['scale'], return_scale=True) h, w = img.shape[:2] assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ "img size not align. h:{} w:{}".format(h,w) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio def _resize_seg(self, results): """Resize semantic segmentation map with ``results['scale']``.""" for key in results.get('seg_fields', []): if self.keep_ratio: gt_seg = mmcv.imrescale( results[key], results['scale'], interpolation='nearest') gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest') else: gt_seg = mmcv.imresize( results[key], results['scale'], interpolation='nearest') h, w = gt_seg.shape[:2] assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ "gt_seg size not align. h:{} w:{}".format(h, w) results[key] = gt_seg def __call__(self, results): """Call function to resize images, bounding boxes, masks, semantic segmentation map. Args: results (dict): Result dict from loading pipeline. Returns: dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', 'keep_ratio' keys are added into result dict. """ if 'scale' not in results: self._random_scale(results) self._resize_img(results) self._resize_seg(results) return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(img_scale={self.img_scale}, ' f'multiscale_mode={self.multiscale_mode}, ' f'ratio_range={self.ratio_range}, ' f'keep_ratio={self.keep_ratio})') return repr_str