import numbers import random import math import warnings import numpy as np from PIL import Image, ImageEnhance, ImageOps class ShearX(object): def __init__(self, fillcolor=(128, 128, 128)): self.fillcolor = fillcolor def __call__(self, x, magnitude): return x.transform( x.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), Image.BICUBIC, fillcolor=self.fillcolor) class ShearY(object): def __init__(self, fillcolor=(128, 128, 128)): self.fillcolor = fillcolor def __call__(self, x, magnitude): return x.transform( x.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), Image.BICUBIC, fillcolor=self.fillcolor) class TranslateX(object): def __init__(self, fillcolor=(128, 128, 128)): self.fillcolor = fillcolor def __call__(self, x, magnitude): return x.transform( x.size, Image.AFFINE, (1, 0, magnitude * x.size[0] * random.choice([-1, 1]), 0, 1, 0), fillcolor=self.fillcolor) class TranslateY(object): def __init__(self, fillcolor=(128, 128, 128)): self.fillcolor = fillcolor def __call__(self, x, magnitude): return x.transform( x.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * x.size[1] * random.choice([-1, 1])), fillcolor=self.fillcolor) class Rotate(object): # from https://stackoverflow.com/questions/ # 5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand def __call__(self, x, magnitude): rot = x.convert("RGBA").rotate(magnitude * random.choice([-1, 1])) return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(x.mode) class Color(object): def __call__(self, x, magnitude): return ImageEnhance.Color(x).enhance(1 + magnitude * random.choice([-1, 1])) class Posterize(object): def __call__(self, x, magnitude): return ImageOps.posterize(x, magnitude) class Solarize(object): def __call__(self, x, magnitude): return ImageOps.solarize(x, magnitude) class Contrast(object): def __call__(self, x, magnitude): return ImageEnhance.Contrast(x).enhance(1 + magnitude * random.choice([-1, 1])) class Sharpness(object): def __call__(self, x, magnitude): return ImageEnhance.Sharpness(x).enhance(1 + magnitude * random.choice([-1, 1])) class Brightness(object): def __call__(self, x, magnitude): return ImageEnhance.Brightness(x).enhance(1 + magnitude * random.choice([-1, 1])) class AutoContrast(object): def __call__(self, x, magnitude): return ImageOps.autocontrast(x) class Equalize(object): def __call__(self, x, magnitude): return ImageOps.equalize(x) class Invert(object): def __call__(self, x, magnitude): return ImageOps.invert(x) class ImageNetPolicy(object): """ Randomly choose one of the best 24 Sub-policies on ImageNet. Example: >>> policy = ImageNetPolicy() >>> transformed = policy(image) Example as a PyTorch Transform: >>> transform = transforms.Compose([ >>> transforms.Resize(256), >>> ImageNetPolicy(), >>> transforms.ToTensor()]) """ def __init__(self, fillcolor=(128, 128, 128)): self.policies = [ SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor) ] def __call__(self, img): policy_idx = random.randint(0, len(self.policies) - 1) return self.policies[policy_idx](img) def __repr__(self): return "AutoAugment ImageNet Policy" class SubPolicy(object): def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)): ranges = { "shearX": np.linspace(0, 0.3, 10), "shearY": np.linspace(0, 0.3, 10), "translateX": np.linspace(0, 150 / 331, 10), "translateY": np.linspace(0, 150 / 331, 10), "rotate": np.linspace(0, 30, 10), "color": np.linspace(0.0, 0.9, 10), "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), "solarize": np.linspace(256, 0, 10), "contrast": np.linspace(0.0, 0.9, 10), "sharpness": np.linspace(0.0, 0.9, 10), "brightness": np.linspace(0.0, 0.9, 10), "autocontrast": [0] * 10, "equalize": [0] * 10, "invert": [0] * 10 } func = { "shearX": ShearX(fillcolor=fillcolor), "shearY": ShearY(fillcolor=fillcolor), "translateX": TranslateX(fillcolor=fillcolor), "translateY": TranslateY(fillcolor=fillcolor), "rotate": Rotate(), "color": Color(), "posterize": Posterize(), "solarize": Solarize(), "contrast": Contrast(), "sharpness": Sharpness(), "brightness": Brightness(), "autocontrast": AutoContrast(), "equalize": Equalize(), "invert": Invert() } self.p1 = p1 self.operation1 = func[operation1] self.magnitude1 = ranges[operation1][magnitude_idx1] self.p2 = p2 self.operation2 = func[operation2] self.magnitude2 = ranges[operation2][magnitude_idx2] def __call__(self, img): if random.random() < self.p1: img = self.operation1(img, self.magnitude1) if random.random() < self.p2: img = self.operation2(img, self.magnitude2) return img def crop(img, i, j, h, w): """Crop the given PIL Image. Args: img (PIL Image): Image to be cropped. i (int): i in (i,j) i.e coordinates of the upper left corner. j (int): j in (i,j) i.e coordinates of the upper left corner. h (int): Height of the cropped image. w (int): Width of the cropped image. Returns: PIL Image: Cropped image. """ return img.crop((j, i, j + w, i + h)) def resize(img, size, interpolation=Image.BILINEAR): r"""Resize the input PIL Image to the given size. Args: img (PIL Image): Image to be resized. size (sequence or int): Desired output size. If size is a sequence like (h, w), the output size will be matched to this. If size is an int, the smaller edge of the image will be matched to this number maintaing the aspect ratio. i.e, if height > width, then image will be rescaled to :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR`` Returns: PIL Image: Resized image. """ if isinstance(size, int): w, h = img.size if (w <= h and w == size) or (h <= w and h == size): return img if w < h: ow = size oh = int(size * h / w) return img.resize((ow, oh), interpolation) else: oh = size ow = int(size * w / h) return img.resize((ow, oh), interpolation) else: return img.resize(size[::-1], interpolation) def center_crop(img, output_size): if isinstance(output_size, numbers.Number): output_size = (int(output_size), int(output_size)) w, h = img.size th, tw = output_size i = int(round((h - th) / 2.)) j = int(round((w - tw) / 2.)) return crop(img, i, j, th, tw) def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): """Crop the given PIL Image and resize it to desired size. Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. Args: img (PIL Image): Image to be cropped. i (int): i in (i,j) i.e coordinates of the upper left corner j (int): j in (i,j) i.e coordinates of the upper left corner h (int): Height of the cropped image. w (int): Width of the cropped image. size (sequence or int): Desired output size. Same semantics as ``resize``. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. Returns: PIL Image: Cropped image. """ img = crop(img, i, j, h, w) img = resize(img, size, interpolation) return img class Resize(object): """Resize the input PIL Image to the given size. Args: size (sequence or int): Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR`` """ def __init__(self, size, interpolation=Image.BILINEAR): self.size = size self.interpolation = interpolation def __call__(self, img): """ Args: img (PIL Image): Image to be scaled. Returns: PIL Image: Rescaled image. """ return resize(img, self.size, self.interpolation) class CenterCrop(object): """Crops the given PIL Image at the center. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __init__(self, size): self.size = size def __call__(self, img): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ return center_crop(img, self.size) class RandomResizedCrop(object): """Crop the given PIL Image to random size and aspect ratio. A crop of random size (default: of 0.08 to 1.0) of the original size and a random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop is finally resized to given size. This is popularly used to train the Inception networks. Args: size: expected output size of each edge scale: range of size of the origin size cropped ratio: range of aspect ratio of the origin aspect ratio cropped interpolation: Default: PIL.Image.BILINEAR """ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): self.size = size if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): warnings.warn("range should be of kind (min, max)") self.interpolation = interpolation self.scale = scale self.ratio = ratio @staticmethod def get_params(img, scale, ratio): """Get parameters for ``crop`` for a random sized crop. Args: img (PIL Image): Image to be cropped. scale (tuple): range of size of the origin size cropped ratio (tuple): range of aspect ratio of the origin aspect ratio cropped Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for a random sized crop. """ area = img.size[0] * img.size[1] for attempt in range(10): target_area = random.uniform(*scale) * area log_ratio = (math.log(ratio[0]), math.log(ratio[1])) aspect_ratio = math.exp(random.uniform(*log_ratio)) w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if w <= img.size[0] and h <= img.size[1]: i = random.randint(0, img.size[1] - h) j = random.randint(0, img.size[0] - w) return i, j, h, w # Fallback to central crop in_ratio = img.size[0] / img.size[1] if (in_ratio < min(ratio)): w = img.size[0] h = int(round(w / min(ratio))) elif (in_ratio > max(ratio)): h = img.size[1] w = int(round(h * max(ratio))) else: # whole image w = img.size[0] h = img.size[1] i = (img.size[1] - h) // 2 j = (img.size[0] - w) // 2 return i, j, h, w def __call__(self, img): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ i, j, h, w = self.get_params(img, self.scale, self.ratio) return resized_crop(img, i, j, h, w, self.size, self.interpolation)