Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import copy | |
import cv2 | |
import mmcv | |
import numpy as np | |
from ..builder import PIPELINES | |
from .compose import Compose | |
_MAX_LEVEL = 10 | |
def level_to_value(level, max_value): | |
"""Map from level to values based on max_value.""" | |
return (level / _MAX_LEVEL) * max_value | |
def enhance_level_to_value(level, a=1.8, b=0.1): | |
"""Map from level to values.""" | |
return (level / _MAX_LEVEL) * a + b | |
def random_negative(value, random_negative_prob): | |
"""Randomly negate value based on random_negative_prob.""" | |
return -value if np.random.rand() < random_negative_prob else value | |
def bbox2fields(): | |
"""The key correspondence from bboxes to labels, masks and | |
segmentations.""" | |
bbox2label = { | |
'gt_bboxes': 'gt_labels', | |
'gt_bboxes_ignore': 'gt_labels_ignore' | |
} | |
bbox2mask = { | |
'gt_bboxes': 'gt_masks', | |
'gt_bboxes_ignore': 'gt_masks_ignore' | |
} | |
bbox2seg = { | |
'gt_bboxes': 'gt_semantic_seg', | |
} | |
return bbox2label, bbox2mask, bbox2seg | |
class AutoAugment: | |
"""Auto augmentation. | |
This data augmentation is proposed in `Learning Data Augmentation | |
Strategies for Object Detection <https://arxiv.org/pdf/1906.11172>`_. | |
TODO: Implement 'Shear', 'Sharpness' and 'Rotate' transforms | |
Args: | |
policies (list[list[dict]]): The policies of auto augmentation. Each | |
policy in ``policies`` is a specific augmentation policy, and is | |
composed by several augmentations (dict). When AutoAugment is | |
called, a random policy in ``policies`` will be selected to | |
augment images. | |
Examples: | |
>>> replace = (104, 116, 124) | |
>>> policies = [ | |
>>> [ | |
>>> dict(type='Sharpness', prob=0.0, level=8), | |
>>> dict( | |
>>> type='Shear', | |
>>> prob=0.4, | |
>>> level=0, | |
>>> replace=replace, | |
>>> axis='x') | |
>>> ], | |
>>> [ | |
>>> dict( | |
>>> type='Rotate', | |
>>> prob=0.6, | |
>>> level=10, | |
>>> replace=replace), | |
>>> dict(type='Color', prob=1.0, level=6) | |
>>> ] | |
>>> ] | |
>>> augmentation = AutoAugment(policies) | |
>>> img = np.ones(100, 100, 3) | |
>>> gt_bboxes = np.ones(10, 4) | |
>>> results = dict(img=img, gt_bboxes=gt_bboxes) | |
>>> results = augmentation(results) | |
""" | |
def __init__(self, policies): | |
assert isinstance(policies, list) and len(policies) > 0, \ | |
'Policies must be a non-empty list.' | |
for policy in policies: | |
assert isinstance(policy, list) and len(policy) > 0, \ | |
'Each policy in policies must be a non-empty list.' | |
for augment in policy: | |
assert isinstance(augment, dict) and 'type' in augment, \ | |
'Each specific augmentation must be a dict with key' \ | |
' "type".' | |
self.policies = copy.deepcopy(policies) | |
self.transforms = [Compose(policy) for policy in self.policies] | |
def __call__(self, results): | |
transform = np.random.choice(self.transforms) | |
return transform(results) | |
def __repr__(self): | |
return f'{self.__class__.__name__}(policies={self.policies})' | |
class Shear: | |
"""Apply Shear Transformation to image (and its corresponding bbox, mask, | |
segmentation). | |
Args: | |
level (int | float): The level should be in range [0,_MAX_LEVEL]. | |
img_fill_val (int | float | tuple): The filled values for image border. | |
If float, the same fill value will be used for all the three | |
channels of image. If tuple, the should be 3 elements. | |
seg_ignore_label (int): The fill value used for segmentation map. | |
Note this value must equals ``ignore_label`` in ``semantic_head`` | |
of the corresponding config. Default 255. | |
prob (float): The probability for performing Shear and should be in | |
range [0, 1]. | |
direction (str): The direction for shear, either "horizontal" | |
or "vertical". | |
max_shear_magnitude (float): The maximum magnitude for Shear | |
transformation. | |
random_negative_prob (float): The probability that turns the | |
offset negative. Should be in range [0,1] | |
interpolation (str): Same as in :func:`mmcv.imshear`. | |
""" | |
def __init__(self, | |
level, | |
img_fill_val=128, | |
seg_ignore_label=255, | |
prob=0.5, | |
direction='horizontal', | |
max_shear_magnitude=0.3, | |
random_negative_prob=0.5, | |
interpolation='bilinear'): | |
assert isinstance(level, (int, float)), 'The level must be type ' \ | |
f'int or float, got {type(level)}.' | |
assert 0 <= level <= _MAX_LEVEL, 'The level should be in range ' \ | |
f'[0,{_MAX_LEVEL}], got {level}.' | |
if isinstance(img_fill_val, (float, int)): | |
img_fill_val = tuple([float(img_fill_val)] * 3) | |
elif isinstance(img_fill_val, tuple): | |
assert len(img_fill_val) == 3, 'img_fill_val as tuple must ' \ | |
f'have 3 elements. got {len(img_fill_val)}.' | |
img_fill_val = tuple([float(val) for val in img_fill_val]) | |
else: | |
raise ValueError( | |
'img_fill_val must be float or tuple with 3 elements.') | |
assert np.all([0 <= val <= 255 for val in img_fill_val]), 'all ' \ | |
'elements of img_fill_val should between range [0,255].' \ | |
f'got {img_fill_val}.' | |
assert 0 <= prob <= 1.0, 'The probability of shear should be in ' \ | |
f'range [0,1]. got {prob}.' | |
assert direction in ('horizontal', 'vertical'), 'direction must ' \ | |
f'in be either "horizontal" or "vertical". got {direction}.' | |
assert isinstance(max_shear_magnitude, float), 'max_shear_magnitude ' \ | |
f'should be type float. got {type(max_shear_magnitude)}.' | |
assert 0. <= max_shear_magnitude <= 1., 'Defaultly ' \ | |
'max_shear_magnitude should be in range [0,1]. ' \ | |
f'got {max_shear_magnitude}.' | |
self.level = level | |
self.magnitude = level_to_value(level, max_shear_magnitude) | |
self.img_fill_val = img_fill_val | |
self.seg_ignore_label = seg_ignore_label | |
self.prob = prob | |
self.direction = direction | |
self.max_shear_magnitude = max_shear_magnitude | |
self.random_negative_prob = random_negative_prob | |
self.interpolation = interpolation | |
def _shear_img(self, | |
results, | |
magnitude, | |
direction='horizontal', | |
interpolation='bilinear'): | |
"""Shear the image. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
magnitude (int | float): The magnitude used for shear. | |
direction (str): The direction for shear, either "horizontal" | |
or "vertical". | |
interpolation (str): Same as in :func:`mmcv.imshear`. | |
""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key] | |
img_sheared = mmcv.imshear( | |
img, | |
magnitude, | |
direction, | |
border_value=self.img_fill_val, | |
interpolation=interpolation) | |
results[key] = img_sheared.astype(img.dtype) | |
results['img_shape'] = results[key].shape | |
def _shear_bboxes(self, results, magnitude): | |
"""Shear the bboxes.""" | |
h, w, c = results['img_shape'] | |
if self.direction == 'horizontal': | |
shear_matrix = np.stack([[1, magnitude], | |
[0, 1]]).astype(np.float32) # [2, 2] | |
else: | |
shear_matrix = np.stack([[1, 0], [magnitude, | |
1]]).astype(np.float32) | |
for key in results.get('bbox_fields', []): | |
min_x, min_y, max_x, max_y = np.split( | |
results[key], results[key].shape[-1], axis=-1) | |
coordinates = np.stack([[min_x, min_y], [max_x, min_y], | |
[min_x, max_y], | |
[max_x, max_y]]) # [4, 2, nb_box, 1] | |
coordinates = coordinates[..., 0].transpose( | |
(2, 1, 0)).astype(np.float32) # [nb_box, 2, 4] | |
new_coords = np.matmul(shear_matrix[None, :, :], | |
coordinates) # [nb_box, 2, 4] | |
min_x = np.min(new_coords[:, 0, :], axis=-1) | |
min_y = np.min(new_coords[:, 1, :], axis=-1) | |
max_x = np.max(new_coords[:, 0, :], axis=-1) | |
max_y = np.max(new_coords[:, 1, :], axis=-1) | |
min_x = np.clip(min_x, a_min=0, a_max=w) | |
min_y = np.clip(min_y, a_min=0, a_max=h) | |
max_x = np.clip(max_x, a_min=min_x, a_max=w) | |
max_y = np.clip(max_y, a_min=min_y, a_max=h) | |
results[key] = np.stack([min_x, min_y, max_x, max_y], | |
axis=-1).astype(results[key].dtype) | |
def _shear_masks(self, | |
results, | |
magnitude, | |
direction='horizontal', | |
fill_val=0, | |
interpolation='bilinear'): | |
"""Shear the masks.""" | |
h, w, c = results['img_shape'] | |
for key in results.get('mask_fields', []): | |
masks = results[key] | |
results[key] = masks.shear((h, w), | |
magnitude, | |
direction, | |
border_value=fill_val, | |
interpolation=interpolation) | |
def _shear_seg(self, | |
results, | |
magnitude, | |
direction='horizontal', | |
fill_val=255, | |
interpolation='bilinear'): | |
"""Shear the segmentation maps.""" | |
for key in results.get('seg_fields', []): | |
seg = results[key] | |
results[key] = mmcv.imshear( | |
seg, | |
magnitude, | |
direction, | |
border_value=fill_val, | |
interpolation=interpolation).astype(seg.dtype) | |
def _filter_invalid(self, results, min_bbox_size=0): | |
"""Filter bboxes and corresponding masks too small after shear | |
augmentation.""" | |
bbox2label, bbox2mask, _ = bbox2fields() | |
for key in results.get('bbox_fields', []): | |
bbox_w = results[key][:, 2] - results[key][:, 0] | |
bbox_h = results[key][:, 3] - results[key][:, 1] | |
valid_inds = (bbox_w > min_bbox_size) & (bbox_h > min_bbox_size) | |
valid_inds = np.nonzero(valid_inds)[0] | |
results[key] = results[key][valid_inds] | |
# label fields. e.g. gt_labels and gt_labels_ignore | |
label_key = bbox2label.get(key) | |
if label_key in results: | |
results[label_key] = results[label_key][valid_inds] | |
# mask fields, e.g. gt_masks and gt_masks_ignore | |
mask_key = bbox2mask.get(key) | |
if mask_key in results: | |
results[mask_key] = results[mask_key][valid_inds] | |
def __call__(self, results): | |
"""Call function to shear images, bounding boxes, masks and semantic | |
segmentation maps. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Sheared results. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
magnitude = random_negative(self.magnitude, self.random_negative_prob) | |
self._shear_img(results, magnitude, self.direction, self.interpolation) | |
self._shear_bboxes(results, magnitude) | |
# fill_val set to 0 for background of mask. | |
self._shear_masks( | |
results, | |
magnitude, | |
self.direction, | |
fill_val=0, | |
interpolation=self.interpolation) | |
self._shear_seg( | |
results, | |
magnitude, | |
self.direction, | |
fill_val=self.seg_ignore_label, | |
interpolation=self.interpolation) | |
self._filter_invalid(results) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(level={self.level}, ' | |
repr_str += f'img_fill_val={self.img_fill_val}, ' | |
repr_str += f'seg_ignore_label={self.seg_ignore_label}, ' | |
repr_str += f'prob={self.prob}, ' | |
repr_str += f'direction={self.direction}, ' | |
repr_str += f'max_shear_magnitude={self.max_shear_magnitude}, ' | |
repr_str += f'random_negative_prob={self.random_negative_prob}, ' | |
repr_str += f'interpolation={self.interpolation})' | |
return repr_str | |
class Rotate: | |
"""Apply Rotate Transformation to image (and its corresponding bbox, mask, | |
segmentation). | |
Args: | |
level (int | float): The level should be in range (0,_MAX_LEVEL]. | |
scale (int | float): Isotropic scale factor. Same in | |
``mmcv.imrotate``. | |
center (int | float | tuple[float]): Center point (w, h) of the | |
rotation in the source image. If None, the center of the | |
image will be used. Same in ``mmcv.imrotate``. | |
img_fill_val (int | float | tuple): The fill value for image border. | |
If float, the same value will be used for all the three | |
channels of image. If tuple, the should be 3 elements (e.g. | |
equals the number of channels for image). | |
seg_ignore_label (int): The fill value used for segmentation map. | |
Note this value must equals ``ignore_label`` in ``semantic_head`` | |
of the corresponding config. Default 255. | |
prob (float): The probability for perform transformation and | |
should be in range 0 to 1. | |
max_rotate_angle (int | float): The maximum angles for rotate | |
transformation. | |
random_negative_prob (float): The probability that turns the | |
offset negative. | |
""" | |
def __init__(self, | |
level, | |
scale=1, | |
center=None, | |
img_fill_val=128, | |
seg_ignore_label=255, | |
prob=0.5, | |
max_rotate_angle=30, | |
random_negative_prob=0.5): | |
assert isinstance(level, (int, float)), \ | |
f'The level must be type int or float. got {type(level)}.' | |
assert 0 <= level <= _MAX_LEVEL, \ | |
f'The level should be in range (0,{_MAX_LEVEL}]. got {level}.' | |
assert isinstance(scale, (int, float)), \ | |
f'The scale must be type int or float. got type {type(scale)}.' | |
if isinstance(center, (int, float)): | |
center = (center, center) | |
elif isinstance(center, tuple): | |
assert len(center) == 2, 'center with type tuple must have '\ | |
f'2 elements. got {len(center)} elements.' | |
else: | |
assert center is None, 'center must be None or type int, '\ | |
f'float or tuple, got type {type(center)}.' | |
if isinstance(img_fill_val, (float, int)): | |
img_fill_val = tuple([float(img_fill_val)] * 3) | |
elif isinstance(img_fill_val, tuple): | |
assert len(img_fill_val) == 3, 'img_fill_val as tuple must '\ | |
f'have 3 elements. got {len(img_fill_val)}.' | |
img_fill_val = tuple([float(val) for val in img_fill_val]) | |
else: | |
raise ValueError( | |
'img_fill_val must be float or tuple with 3 elements.') | |
assert np.all([0 <= val <= 255 for val in img_fill_val]), \ | |
'all elements of img_fill_val should between range [0,255]. '\ | |
f'got {img_fill_val}.' | |
assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. '\ | |
f'got {prob}.' | |
assert isinstance(max_rotate_angle, (int, float)), 'max_rotate_angle '\ | |
f'should be type int or float. got type {type(max_rotate_angle)}.' | |
self.level = level | |
self.scale = scale | |
# Rotation angle in degrees. Positive values mean | |
# clockwise rotation. | |
self.angle = level_to_value(level, max_rotate_angle) | |
self.center = center | |
self.img_fill_val = img_fill_val | |
self.seg_ignore_label = seg_ignore_label | |
self.prob = prob | |
self.max_rotate_angle = max_rotate_angle | |
self.random_negative_prob = random_negative_prob | |
def _rotate_img(self, results, angle, center=None, scale=1.0): | |
"""Rotate the image. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
angle (float): Rotation angle in degrees, positive values | |
mean clockwise rotation. Same in ``mmcv.imrotate``. | |
center (tuple[float], optional): Center point (w, h) of the | |
rotation. Same in ``mmcv.imrotate``. | |
scale (int | float): Isotropic scale factor. Same in | |
``mmcv.imrotate``. | |
""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key].copy() | |
img_rotated = mmcv.imrotate( | |
img, angle, center, scale, border_value=self.img_fill_val) | |
results[key] = img_rotated.astype(img.dtype) | |
results['img_shape'] = results[key].shape | |
def _rotate_bboxes(self, results, rotate_matrix): | |
"""Rotate the bboxes.""" | |
h, w, c = results['img_shape'] | |
for key in results.get('bbox_fields', []): | |
min_x, min_y, max_x, max_y = np.split( | |
results[key], results[key].shape[-1], axis=-1) | |
coordinates = np.stack([[min_x, min_y], [max_x, min_y], | |
[min_x, max_y], | |
[max_x, max_y]]) # [4, 2, nb_bbox, 1] | |
# pad 1 to convert from format [x, y] to homogeneous | |
# coordinates format [x, y, 1] | |
coordinates = np.concatenate( | |
(coordinates, | |
np.ones((4, 1, coordinates.shape[2], 1), coordinates.dtype)), | |
axis=1) # [4, 3, nb_bbox, 1] | |
coordinates = coordinates.transpose( | |
(2, 0, 1, 3)) # [nb_bbox, 4, 3, 1] | |
rotated_coords = np.matmul(rotate_matrix, | |
coordinates) # [nb_bbox, 4, 2, 1] | |
rotated_coords = rotated_coords[..., 0] # [nb_bbox, 4, 2] | |
min_x, min_y = np.min( | |
rotated_coords[:, :, 0], axis=1), np.min( | |
rotated_coords[:, :, 1], axis=1) | |
max_x, max_y = np.max( | |
rotated_coords[:, :, 0], axis=1), np.max( | |
rotated_coords[:, :, 1], axis=1) | |
min_x, min_y = np.clip( | |
min_x, a_min=0, a_max=w), np.clip( | |
min_y, a_min=0, a_max=h) | |
max_x, max_y = np.clip( | |
max_x, a_min=min_x, a_max=w), np.clip( | |
max_y, a_min=min_y, a_max=h) | |
results[key] = np.stack([min_x, min_y, max_x, max_y], | |
axis=-1).astype(results[key].dtype) | |
def _rotate_masks(self, | |
results, | |
angle, | |
center=None, | |
scale=1.0, | |
fill_val=0): | |
"""Rotate the masks.""" | |
h, w, c = results['img_shape'] | |
for key in results.get('mask_fields', []): | |
masks = results[key] | |
results[key] = masks.rotate((h, w), angle, center, scale, fill_val) | |
def _rotate_seg(self, | |
results, | |
angle, | |
center=None, | |
scale=1.0, | |
fill_val=255): | |
"""Rotate the segmentation map.""" | |
for key in results.get('seg_fields', []): | |
seg = results[key].copy() | |
results[key] = mmcv.imrotate( | |
seg, angle, center, scale, | |
border_value=fill_val).astype(seg.dtype) | |
def _filter_invalid(self, results, min_bbox_size=0): | |
"""Filter bboxes and corresponding masks too small after rotate | |
augmentation.""" | |
bbox2label, bbox2mask, _ = bbox2fields() | |
for key in results.get('bbox_fields', []): | |
bbox_w = results[key][:, 2] - results[key][:, 0] | |
bbox_h = results[key][:, 3] - results[key][:, 1] | |
valid_inds = (bbox_w > min_bbox_size) & (bbox_h > min_bbox_size) | |
valid_inds = np.nonzero(valid_inds)[0] | |
results[key] = results[key][valid_inds] | |
# label fields. e.g. gt_labels and gt_labels_ignore | |
label_key = bbox2label.get(key) | |
if label_key in results: | |
results[label_key] = results[label_key][valid_inds] | |
# mask fields, e.g. gt_masks and gt_masks_ignore | |
mask_key = bbox2mask.get(key) | |
if mask_key in results: | |
results[mask_key] = results[mask_key][valid_inds] | |
def __call__(self, results): | |
"""Call function to rotate images, bounding boxes, masks and semantic | |
segmentation maps. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Rotated results. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
h, w = results['img'].shape[:2] | |
center = self.center | |
if center is None: | |
center = ((w - 1) * 0.5, (h - 1) * 0.5) | |
angle = random_negative(self.angle, self.random_negative_prob) | |
self._rotate_img(results, angle, center, self.scale) | |
rotate_matrix = cv2.getRotationMatrix2D(center, -angle, self.scale) | |
self._rotate_bboxes(results, rotate_matrix) | |
self._rotate_masks(results, angle, center, self.scale, fill_val=0) | |
self._rotate_seg( | |
results, angle, center, self.scale, fill_val=self.seg_ignore_label) | |
self._filter_invalid(results) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(level={self.level}, ' | |
repr_str += f'scale={self.scale}, ' | |
repr_str += f'center={self.center}, ' | |
repr_str += f'img_fill_val={self.img_fill_val}, ' | |
repr_str += f'seg_ignore_label={self.seg_ignore_label}, ' | |
repr_str += f'prob={self.prob}, ' | |
repr_str += f'max_rotate_angle={self.max_rotate_angle}, ' | |
repr_str += f'random_negative_prob={self.random_negative_prob})' | |
return repr_str | |
class Translate: | |
"""Translate the images, bboxes, masks and segmentation maps horizontally | |
or vertically. | |
Args: | |
level (int | float): The level for Translate and should be in | |
range [0,_MAX_LEVEL]. | |
prob (float): The probability for performing translation and | |
should be in range [0, 1]. | |
img_fill_val (int | float | tuple): The filled value for image | |
border. If float, the same fill value will be used for all | |
the three channels of image. If tuple, the should be 3 | |
elements (e.g. equals the number of channels for image). | |
seg_ignore_label (int): The fill value used for segmentation map. | |
Note this value must equals ``ignore_label`` in ``semantic_head`` | |
of the corresponding config. Default 255. | |
direction (str): The translate direction, either "horizontal" | |
or "vertical". | |
max_translate_offset (int | float): The maximum pixel's offset for | |
Translate. | |
random_negative_prob (float): The probability that turns the | |
offset negative. | |
min_size (int | float): The minimum pixel for filtering | |
invalid bboxes after the translation. | |
""" | |
def __init__(self, | |
level, | |
prob=0.5, | |
img_fill_val=128, | |
seg_ignore_label=255, | |
direction='horizontal', | |
max_translate_offset=250., | |
random_negative_prob=0.5, | |
min_size=0): | |
assert isinstance(level, (int, float)), \ | |
'The level must be type int or float.' | |
assert 0 <= level <= _MAX_LEVEL, \ | |
'The level used for calculating Translate\'s offset should be ' \ | |
'in range [0,_MAX_LEVEL]' | |
assert 0 <= prob <= 1.0, \ | |
'The probability of translation should be in range [0, 1].' | |
if isinstance(img_fill_val, (float, int)): | |
img_fill_val = tuple([float(img_fill_val)] * 3) | |
elif isinstance(img_fill_val, tuple): | |
assert len(img_fill_val) == 3, \ | |
'img_fill_val as tuple must have 3 elements.' | |
img_fill_val = tuple([float(val) for val in img_fill_val]) | |
else: | |
raise ValueError('img_fill_val must be type float or tuple.') | |
assert np.all([0 <= val <= 255 for val in img_fill_val]), \ | |
'all elements of img_fill_val should between range [0,255].' | |
assert direction in ('horizontal', 'vertical'), \ | |
'direction should be "horizontal" or "vertical".' | |
assert isinstance(max_translate_offset, (int, float)), \ | |
'The max_translate_offset must be type int or float.' | |
# the offset used for translation | |
self.offset = int(level_to_value(level, max_translate_offset)) | |
self.level = level | |
self.prob = prob | |
self.img_fill_val = img_fill_val | |
self.seg_ignore_label = seg_ignore_label | |
self.direction = direction | |
self.max_translate_offset = max_translate_offset | |
self.random_negative_prob = random_negative_prob | |
self.min_size = min_size | |
def _translate_img(self, results, offset, direction='horizontal'): | |
"""Translate the image. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
offset (int | float): The offset for translate. | |
direction (str): The translate direction, either "horizontal" | |
or "vertical". | |
""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key].copy() | |
results[key] = mmcv.imtranslate( | |
img, offset, direction, self.img_fill_val).astype(img.dtype) | |
results['img_shape'] = results[key].shape | |
def _translate_bboxes(self, results, offset): | |
"""Shift bboxes horizontally or vertically, according to offset.""" | |
h, w, c = results['img_shape'] | |
for key in results.get('bbox_fields', []): | |
min_x, min_y, max_x, max_y = np.split( | |
results[key], results[key].shape[-1], axis=-1) | |
if self.direction == 'horizontal': | |
min_x = np.maximum(0, min_x + offset) | |
max_x = np.minimum(w, max_x + offset) | |
elif self.direction == 'vertical': | |
min_y = np.maximum(0, min_y + offset) | |
max_y = np.minimum(h, max_y + offset) | |
# the boxes translated outside of image will be filtered along with | |
# the corresponding masks, by invoking ``_filter_invalid``. | |
results[key] = np.concatenate([min_x, min_y, max_x, max_y], | |
axis=-1) | |
def _translate_masks(self, | |
results, | |
offset, | |
direction='horizontal', | |
fill_val=0): | |
"""Translate masks horizontally or vertically.""" | |
h, w, c = results['img_shape'] | |
for key in results.get('mask_fields', []): | |
masks = results[key] | |
results[key] = masks.translate((h, w), offset, direction, fill_val) | |
def _translate_seg(self, | |
results, | |
offset, | |
direction='horizontal', | |
fill_val=255): | |
"""Translate segmentation maps horizontally or vertically.""" | |
for key in results.get('seg_fields', []): | |
seg = results[key].copy() | |
results[key] = mmcv.imtranslate(seg, offset, direction, | |
fill_val).astype(seg.dtype) | |
def _filter_invalid(self, results, min_size=0): | |
"""Filter bboxes and masks too small or translated out of image.""" | |
bbox2label, bbox2mask, _ = bbox2fields() | |
for key in results.get('bbox_fields', []): | |
bbox_w = results[key][:, 2] - results[key][:, 0] | |
bbox_h = results[key][:, 3] - results[key][:, 1] | |
valid_inds = (bbox_w > min_size) & (bbox_h > min_size) | |
valid_inds = np.nonzero(valid_inds)[0] | |
results[key] = results[key][valid_inds] | |
# label fields. e.g. gt_labels and gt_labels_ignore | |
label_key = bbox2label.get(key) | |
if label_key in results: | |
results[label_key] = results[label_key][valid_inds] | |
# mask fields, e.g. gt_masks and gt_masks_ignore | |
mask_key = bbox2mask.get(key) | |
if mask_key in results: | |
results[mask_key] = results[mask_key][valid_inds] | |
return results | |
def __call__(self, results): | |
"""Call function to translate images, bounding boxes, masks and | |
semantic segmentation maps. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Translated results. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
offset = random_negative(self.offset, self.random_negative_prob) | |
self._translate_img(results, offset, self.direction) | |
self._translate_bboxes(results, offset) | |
# fill_val defaultly 0 for BitmapMasks and None for PolygonMasks. | |
self._translate_masks(results, offset, self.direction) | |
# fill_val set to ``seg_ignore_label`` for the ignored value | |
# of segmentation map. | |
self._translate_seg( | |
results, offset, self.direction, fill_val=self.seg_ignore_label) | |
self._filter_invalid(results, min_size=self.min_size) | |
return results | |
class ColorTransform: | |
"""Apply Color transformation to image. The bboxes, masks, and | |
segmentations are not modified. | |
Args: | |
level (int | float): Should be in range [0,_MAX_LEVEL]. | |
prob (float): The probability for performing Color transformation. | |
""" | |
def __init__(self, level, prob=0.5): | |
assert isinstance(level, (int, float)), \ | |
'The level must be type int or float.' | |
assert 0 <= level <= _MAX_LEVEL, \ | |
'The level should be in range [0,_MAX_LEVEL].' | |
assert 0 <= prob <= 1.0, \ | |
'The probability should be in range [0,1].' | |
self.level = level | |
self.prob = prob | |
self.factor = enhance_level_to_value(level) | |
def _adjust_color_img(self, results, factor=1.0): | |
"""Apply Color transformation to image.""" | |
for key in results.get('img_fields', ['img']): | |
# NOTE defaultly the image should be BGR format | |
img = results[key] | |
results[key] = mmcv.adjust_color(img, factor).astype(img.dtype) | |
def __call__(self, results): | |
"""Call function for Color transformation. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Colored results. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
self._adjust_color_img(results, self.factor) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(level={self.level}, ' | |
repr_str += f'prob={self.prob})' | |
return repr_str | |
class EqualizeTransform: | |
"""Apply Equalize transformation to image. The bboxes, masks and | |
segmentations are not modified. | |
Args: | |
prob (float): The probability for performing Equalize transformation. | |
""" | |
def __init__(self, prob=0.5): | |
assert 0 <= prob <= 1.0, \ | |
'The probability should be in range [0,1].' | |
self.prob = prob | |
def _imequalize(self, results): | |
"""Equalizes the histogram of one image.""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key] | |
results[key] = mmcv.imequalize(img).astype(img.dtype) | |
def __call__(self, results): | |
"""Call function for Equalize transformation. | |
Args: | |
results (dict): Results dict from loading pipeline. | |
Returns: | |
dict: Results after the transformation. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
self._imequalize(results) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(prob={self.prob})' | |
class BrightnessTransform: | |
"""Apply Brightness transformation to image. The bboxes, masks and | |
segmentations are not modified. | |
Args: | |
level (int | float): Should be in range [0,_MAX_LEVEL]. | |
prob (float): The probability for performing Brightness transformation. | |
""" | |
def __init__(self, level, prob=0.5): | |
assert isinstance(level, (int, float)), \ | |
'The level must be type int or float.' | |
assert 0 <= level <= _MAX_LEVEL, \ | |
'The level should be in range [0,_MAX_LEVEL].' | |
assert 0 <= prob <= 1.0, \ | |
'The probability should be in range [0,1].' | |
self.level = level | |
self.prob = prob | |
self.factor = enhance_level_to_value(level) | |
def _adjust_brightness_img(self, results, factor=1.0): | |
"""Adjust the brightness of image.""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key] | |
results[key] = mmcv.adjust_brightness(img, | |
factor).astype(img.dtype) | |
def __call__(self, results): | |
"""Call function for Brightness transformation. | |
Args: | |
results (dict): Results dict from loading pipeline. | |
Returns: | |
dict: Results after the transformation. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
self._adjust_brightness_img(results, self.factor) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(level={self.level}, ' | |
repr_str += f'prob={self.prob})' | |
return repr_str | |
class ContrastTransform: | |
"""Apply Contrast transformation to image. The bboxes, masks and | |
segmentations are not modified. | |
Args: | |
level (int | float): Should be in range [0,_MAX_LEVEL]. | |
prob (float): The probability for performing Contrast transformation. | |
""" | |
def __init__(self, level, prob=0.5): | |
assert isinstance(level, (int, float)), \ | |
'The level must be type int or float.' | |
assert 0 <= level <= _MAX_LEVEL, \ | |
'The level should be in range [0,_MAX_LEVEL].' | |
assert 0 <= prob <= 1.0, \ | |
'The probability should be in range [0,1].' | |
self.level = level | |
self.prob = prob | |
self.factor = enhance_level_to_value(level) | |
def _adjust_contrast_img(self, results, factor=1.0): | |
"""Adjust the image contrast.""" | |
for key in results.get('img_fields', ['img']): | |
img = results[key] | |
results[key] = mmcv.adjust_contrast(img, factor).astype(img.dtype) | |
def __call__(self, results): | |
"""Call function for Contrast transformation. | |
Args: | |
results (dict): Results dict from loading pipeline. | |
Returns: | |
dict: Results after the transformation. | |
""" | |
if np.random.rand() > self.prob: | |
return results | |
self._adjust_contrast_img(results, self.factor) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(level={self.level}, ' | |
repr_str += f'prob={self.prob})' | |
return repr_str | |