Spaces:
Sleeping
Sleeping
# Copyright (c) 2023, NVIDIA Corporation & Affiliates. All rights reserved. | |
# | |
# This work is made available under the Nvidia Source Code License-NC. | |
# To view a copy of this license, visit | |
# https://github.com/NVlabs/prismer/blob/main/LICENSE | |
import random | |
import numpy as np | |
import torch | |
from PIL import Image, ImageOps, ImageEnhance, ImageDraw | |
fillmask = {'depth': 0, 'normal': 0, 'edge': 0, 'seg_coco': 255, 'seg_ade': 255, | |
'obj_detection': 255, 'ocr_detection': 255} | |
fillcolor = (0, 0, 0) | |
def affine_transform(pair, affine_params): | |
img, label = pair | |
img = img.transform(img.size, Image.AFFINE, affine_params, | |
resample=Image.BILINEAR, fillcolor=fillcolor) | |
if label is not None: | |
for exp in label: | |
label[exp] = label[exp].transform(label[exp].size, Image.AFFINE, affine_params, | |
resample=Image.NEAREST, fillcolor=fillmask[exp]) | |
return img, label | |
def ShearX(pair, v): # [-0.3, 0.3] | |
assert -0.3 <= v <= 0.3 | |
if random.random() > 0.5: | |
v = -v | |
return affine_transform(pair, (1, v, 0, 0, 1, 0)) | |
def ShearY(pair, v): # [-0.3, 0.3] | |
assert -0.3 <= v <= 0.3 | |
if random.random() > 0.5: | |
v = -v | |
return affine_transform(pair, (1, 0, 0, v, 1, 0)) | |
def TranslateX(pair, v): # [-150, 150] => percentage: [-0.45, 0.45] | |
assert -0.45 <= v <= 0.45 | |
if random.random() > 0.5: | |
v = -v | |
img, _ = pair | |
v = v * img.size[0] | |
return affine_transform(pair, (1, 0, v, 0, 1, 0)) | |
def TranslateY(pair, v): # [-150, 150] => percentage: [-0.45, 0.45] | |
assert -0.45 <= v <= 0.45 | |
if random.random() > 0.5: | |
v = -v | |
img, _ = pair | |
v = v * img.size[1] | |
return affine_transform(pair, (1, 0, 0, 0, 1, v)) | |
def TranslateXAbs(pair, v): # [-150, 150] => percentage: [-0.45, 0.45] | |
assert 0 <= v <= 10 | |
if random.random() > 0.5: | |
v = -v | |
return affine_transform(pair, (1, 0, v, 0, 1, 0)) | |
def TranslateYAbs(pair, v): # [-150, 150] => percentage: [-0.45, 0.45] | |
assert 0 <= v <= 10 | |
if random.random() > 0.5: | |
v = -v | |
return affine_transform(pair, (1, 0, 0, 0, 1, v)) | |
def Rotate(pair, v): # [-30, 30] | |
assert -30 <= v <= 30 | |
if random.random() > 0.5: | |
v = -v | |
img, label = pair | |
img = img.rotate(v, fillcolor=fillcolor) | |
if label is not None: | |
for exp in label: | |
label[exp] = label[exp].rotate(v, resample=Image.NEAREST, fillcolor=fillmask[exp]) | |
return img, label | |
def AutoContrast(pair, _): | |
img, label = pair | |
return ImageOps.autocontrast(img), label | |
def Invert(pair, _): | |
img, label = pair | |
return ImageOps.invert(img), label | |
def Equalize(pair, _): | |
img, label = pair | |
return ImageOps.equalize(img), label | |
def Flip(pair, _): # not from the paper | |
img, label = pair | |
return ImageOps.mirror(img), ImageOps.mirror(label) | |
def Solarize(pair, v): # [0, 256] | |
img, label = pair | |
assert 0 <= v <= 256 | |
return ImageOps.solarize(img, v), label | |
def Posterize(pair, v): # [4, 8] | |
img, label = pair | |
assert 4 <= v <= 8 | |
v = int(v) | |
return ImageOps.posterize(img, v), label | |
def Posterize2(pair, v): # [0, 4] | |
img, label = pair | |
assert 0 <= v <= 4 | |
v = int(v) | |
return ImageOps.posterize(img, v), label | |
def Contrast(pair, v): # [0.1,1.9] | |
img, label = pair | |
assert 0.1 <= v <= 1.9 | |
return ImageEnhance.Contrast(img).enhance(v), label | |
def Color(pair, v): # [0.1,1.9] | |
img, label = pair | |
assert 0.1 <= v <= 1.9 | |
return ImageEnhance.Color(img).enhance(v), label | |
def Brightness(pair, v): # [0.1,1.9] | |
img, label = pair | |
assert 0.1 <= v <= 1.9 | |
return ImageEnhance.Brightness(img).enhance(v), label | |
def Sharpness(pair, v): # [0.1,1.9] | |
img, label = pair | |
assert 0.1 <= v <= 1.9 | |
return ImageEnhance.Sharpness(img).enhance(v), label | |
def Cutout(pair, v): # [0, 60] => percentage: [0, 0.2] | |
assert 0.0 <= v <= 0.2 | |
if v <= 0.: | |
return pair | |
img, label = pair | |
v = v * img.size[0] | |
return CutoutAbs(img, v), label | |
def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2] | |
# assert 0 <= v <= 20 | |
if v < 0: | |
return img | |
w, h = img.size | |
x0 = np.random.uniform(w) | |
y0 = np.random.uniform(h) | |
x0 = int(max(0, x0 - v / 2.)) | |
y0 = int(max(0, y0 - v / 2.)) | |
x1 = min(w, x0 + v) | |
y1 = min(h, y0 + v) | |
xy = (x0, y0, x1, y1) | |
color = (125, 123, 114) | |
# color = (0, 0, 0) | |
img = img.copy() | |
ImageDraw.Draw(img).rectangle(xy, color) | |
return img | |
def Identity(pair, v): | |
return pair | |
def augment_list(): # 16 oeprations and their ranges | |
# https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57 | |
l = [ | |
(Identity, 0., 1.0), | |
(ShearX, 0., 0.3), # 0 | |
(ShearY, 0., 0.3), # 1 | |
(TranslateX, 0., 0.33), # 2 | |
(TranslateY, 0., 0.33), # 3 | |
(Rotate, 0, 30), # 4 | |
(AutoContrast, 0, 1), # 5 | |
# (Invert, 0, 1), # 6 | |
(Equalize, 0, 1), # 7 | |
# (Solarize, 0, 110), # 8 | |
# (Posterize, 4, 8), # 9 | |
# (Color, 0.1, 1.9), # 11 | |
(Brightness, 0.1, 1.9), # 12 | |
(Sharpness, 0.1, 1.9), # 13 | |
] | |
return l | |
class Lighting(object): | |
"""Lighting noise(AlexNet - style PCA - based noise)""" | |
def __init__(self, alphastd, eigval, eigvec): | |
self.alphastd = alphastd | |
self.eigval = torch.Tensor(eigval) | |
self.eigvec = torch.Tensor(eigvec) | |
def __call__(self, img): | |
if self.alphastd == 0: | |
return img | |
alpha = img.new().resize_(3).normal_(0, self.alphastd) | |
rgb = self.eigvec.type_as(img).clone() \ | |
.mul(alpha.view(1, 3).expand(3, 3)) \ | |
.mul(self.eigval.view(1, 3).expand(3, 3)) \ | |
.sum(1).squeeze() | |
return img.add(rgb.view(3, 1, 1).expand_as(img)) | |
class CutoutDefault(object): | |
""" | |
Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py | |
""" | |
def __init__(self, length): | |
self.length = length | |
def __call__(self, img): | |
h, w = img.size(1), img.size(2) | |
mask = np.ones((h, w), np.float32) | |
y = np.random.randint(h) | |
x = np.random.randint(w) | |
y1 = np.clip(y - self.length // 2, 0, h) | |
y2 = np.clip(y + self.length // 2, 0, h) | |
x1 = np.clip(x - self.length // 2, 0, w) | |
x2 = np.clip(x + self.length // 2, 0, w) | |
mask[y1: y2, x1: x2] = 0. | |
mask = torch.from_numpy(mask) | |
mask = mask.expand_as(img) | |
img *= mask | |
return img | |
class RandAugment: | |
def __init__(self, n, m): | |
self.n = n | |
self.m = m # [0, 10] | |
self.augment_list = augment_list() | |
def __call__(self, img, label): | |
pair = img, label | |
ops = random.choices(self.augment_list, k=self.n) | |
for op, minval, maxval in ops: | |
val = (float(self.m) / 10) * float(maxval - minval) + minval | |
pair = op(pair, val) | |
return pair | |