|
|
|
|
|
|
|
|
|
import pdb |
|
import numpy as np |
|
from PIL import Image, ImageOps |
|
import torchvision.transforms as tvf |
|
import random |
|
from math import ceil |
|
|
|
from . import transforms_tools as F |
|
|
|
""" |
|
Example command to try out some transformation chain: |
|
|
|
python -m tools.transforms --trfs "Scale(384), ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1), RandomRotation(10), RandomTilting(0.5, 'all'), RandomScale(240,320), RandomCrop(224)" |
|
""" |
|
|
|
|
|
def instanciate_transformation(cmd_line): |
|
"""Create a sequence of transformations. |
|
|
|
cmd_line: (str) |
|
Comma-separated list of transformations. |
|
Ex: "Rotate(10), Scale(256)" |
|
""" |
|
if not isinstance(cmd_line, str): |
|
return cmd_line |
|
|
|
cmd_line = "tvf.Compose([%s])" % cmd_line |
|
try: |
|
return eval(cmd_line) |
|
except Exception as e: |
|
print("Cannot interpret this transform list: %s\nReason: %s" % (cmd_line, e)) |
|
|
|
|
|
class Scale(object): |
|
"""Rescale the input PIL.Image to a given size. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
The smallest dimension of the resulting image will be = size. |
|
|
|
if largest == True: same behaviour for the largest dimension. |
|
|
|
if not can_upscale: don't upscale |
|
if not can_downscale: don't downscale |
|
""" |
|
|
|
def __init__( |
|
self, |
|
size, |
|
interpolation=Image.BILINEAR, |
|
largest=False, |
|
can_upscale=True, |
|
can_downscale=True, |
|
): |
|
assert isinstance(size, int) or (len(size) == 2) |
|
self.size = size |
|
self.interpolation = interpolation |
|
self.largest = largest |
|
self.can_upscale = can_upscale |
|
self.can_downscale = can_downscale |
|
|
|
def __repr__(self): |
|
fmt_str = "RandomScale(%s" % str(self.size) |
|
if self.largest: |
|
fmt_str += ", largest=True" |
|
if not self.can_upscale: |
|
fmt_str += ", can_upscale=False" |
|
if not self.can_downscale: |
|
fmt_str += ", can_downscale=False" |
|
return fmt_str + ")" |
|
|
|
def get_params(self, imsize): |
|
w, h = imsize |
|
if isinstance(self.size, int): |
|
cmp = lambda a, b: (a >= b) if self.largest else (a <= b) |
|
if (cmp(w, h) and w == self.size) or (cmp(h, w) and h == self.size): |
|
ow, oh = w, h |
|
elif cmp(w, h): |
|
ow = self.size |
|
oh = int(self.size * h / w) |
|
else: |
|
oh = self.size |
|
ow = int(self.size * w / h) |
|
else: |
|
ow, oh = self.size |
|
return ow, oh |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
w, h = img.size |
|
|
|
size2 = ow, oh = self.get_params(img.size) |
|
|
|
if size2 != img.size: |
|
a1, a2 = img.size, size2 |
|
if (self.can_upscale and min(a1) < min(a2)) or ( |
|
self.can_downscale and min(a1) > min(a2) |
|
): |
|
img = img.resize(size2, self.interpolation) |
|
|
|
return F.update_img_and_labels( |
|
inp, img, persp=(ow / w, 0, 0, 0, oh / h, 0, 0, 0) |
|
) |
|
|
|
|
|
class RandomScale(Scale): |
|
"""Rescale the input PIL.Image to a random size. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
min_size (int): min size of the smaller edge of the picture. |
|
max_size (int): max size of the smaller edge of the picture. |
|
|
|
ar (float or tuple): |
|
max change of aspect ratio (width/height). |
|
|
|
interpolation (int, optional): Desired interpolation. Default is |
|
``PIL.Image.BILINEAR`` |
|
""" |
|
|
|
def __init__( |
|
self, |
|
min_size, |
|
max_size, |
|
ar=1, |
|
can_upscale=False, |
|
can_downscale=True, |
|
interpolation=Image.BILINEAR, |
|
): |
|
Scale.__init__( |
|
self, |
|
0, |
|
can_upscale=can_upscale, |
|
can_downscale=can_downscale, |
|
interpolation=interpolation, |
|
) |
|
assert type(min_size) == type( |
|
max_size |
|
), "min_size and max_size can only be 2 ints or 2 floats" |
|
assert ( |
|
isinstance(min_size, int) |
|
and min_size >= 1 |
|
or isinstance(min_size, float) |
|
and min_size > 0 |
|
) |
|
assert isinstance(max_size, (int, float)) and min_size <= max_size |
|
self.min_size = min_size |
|
self.max_size = max_size |
|
if type(ar) in (float, int): |
|
ar = (min(1 / ar, ar), max(1 / ar, ar)) |
|
assert 0.2 < ar[0] <= ar[1] < 5 |
|
self.ar = ar |
|
|
|
def get_params(self, imsize): |
|
w, h = imsize |
|
if isinstance(self.min_size, float): |
|
min_size = int(self.min_size * min(w, h) + 0.5) |
|
if isinstance(self.max_size, float): |
|
max_size = int(self.max_size * min(w, h) + 0.5) |
|
if isinstance(self.min_size, int): |
|
min_size = self.min_size |
|
if isinstance(self.max_size, int): |
|
max_size = self.max_size |
|
|
|
if not self.can_upscale: |
|
max_size = min(max_size, min(w, h)) |
|
|
|
size = int(0.5 + F.rand_log_uniform(min_size, max_size)) |
|
ar = F.rand_log_uniform(*self.ar) |
|
|
|
if w < h: |
|
ow = size |
|
oh = int(0.5 + size * h / w / ar) |
|
if oh < min_size: |
|
ow, oh = int(0.5 + ow * float(min_size) / oh), min_size |
|
else: |
|
oh = size |
|
ow = int(0.5 + size * w / h * ar) |
|
if ow < min_size: |
|
ow, oh = min_size, int(0.5 + oh * float(min_size) / ow) |
|
|
|
assert ow >= min_size, "image too small (width=%d < min_size=%d)" % ( |
|
ow, |
|
min_size, |
|
) |
|
assert oh >= min_size, "image too small (height=%d < min_size=%d)" % ( |
|
oh, |
|
min_size, |
|
) |
|
return ow, oh |
|
|
|
|
|
class RandomCrop(object): |
|
"""Crop the given PIL Image at a random location. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
size (sequence or int): Desired output size of the crop. If size is an |
|
int instead of sequence like (h, w), a square crop (size, size) is |
|
made. |
|
padding (int or sequence, optional): Optional padding on each border |
|
of the image. Default is 0, i.e no padding. If a sequence of length |
|
4 is provided, it is used to pad left, top, right, bottom borders |
|
respectively. |
|
""" |
|
|
|
def __init__(self, size, padding=0): |
|
if isinstance(size, int): |
|
self.size = (int(size), int(size)) |
|
else: |
|
self.size = size |
|
self.padding = padding |
|
|
|
def __repr__(self): |
|
return "RandomCrop(%s)" % str(self.size) |
|
|
|
@staticmethod |
|
def get_params(img, output_size): |
|
w, h = img.size |
|
th, tw = output_size |
|
assert h >= th and w >= tw, "Image of %dx%d is too small for crop %dx%d" % ( |
|
w, |
|
h, |
|
tw, |
|
th, |
|
) |
|
|
|
y = np.random.randint(0, h - th) if h > th else 0 |
|
x = np.random.randint(0, w - tw) if w > tw else 0 |
|
return x, y, tw, th |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
|
|
padl = padt = 0 |
|
if self.padding: |
|
if F.is_pil_image(img): |
|
img = ImageOps.expand(img, border=self.padding, fill=0) |
|
else: |
|
assert isinstance(img, F.DummyImg) |
|
img = img.expand(border=self.padding) |
|
if isinstance(self.padding, int): |
|
padl = padt = self.padding |
|
else: |
|
padl, padt = self.padding[0:2] |
|
|
|
i, j, tw, th = self.get_params(img, self.size) |
|
img = img.crop((i, j, i + tw, j + th)) |
|
|
|
return F.update_img_and_labels( |
|
inp, img, persp=(1, 0, padl - i, 0, 1, padt - j, 0, 0) |
|
) |
|
|
|
|
|
class CenterCrop(RandomCrop): |
|
"""Crops the given PIL Image at the center. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
size (sequence or int): Desired output size of the crop. If size is an |
|
int instead of sequence like (h, w), a square crop (size, size) is |
|
made. |
|
""" |
|
|
|
@staticmethod |
|
def get_params(img, output_size): |
|
w, h = img.size |
|
th, tw = output_size |
|
y = int(0.5 + ((h - th) / 2.0)) |
|
x = int(0.5 + ((w - tw) / 2.0)) |
|
return x, y, tw, th |
|
|
|
|
|
class RandomRotation(object): |
|
"""Rescale the input PIL.Image to a random size. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
degrees (float): |
|
rotation angle. |
|
|
|
interpolation (int, optional): Desired interpolation. Default is |
|
``PIL.Image.BILINEAR`` |
|
""" |
|
|
|
def __init__(self, degrees, interpolation=Image.BILINEAR): |
|
self.degrees = degrees |
|
self.interpolation = interpolation |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
w, h = img.size |
|
|
|
angle = np.random.uniform(-self.degrees, self.degrees) |
|
|
|
img = img.rotate(angle, resample=self.interpolation) |
|
w2, h2 = img.size |
|
|
|
trf = F.translate(-w / 2, -h / 2) |
|
trf = F.persp_mul(trf, F.rotate(-angle * np.pi / 180)) |
|
trf = F.persp_mul(trf, F.translate(w2 / 2, h2 / 2)) |
|
return F.update_img_and_labels(inp, img, persp=trf) |
|
|
|
|
|
class RandomTilting(object): |
|
"""Apply a random tilting (left, right, up, down) to the input PIL.Image |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
maginitude (float): |
|
maximum magnitude of the random skew (value between 0 and 1) |
|
directions (string): |
|
tilting directions allowed (all, left, right, up, down) |
|
examples: "all", "left,right", "up-down-right" |
|
""" |
|
|
|
def __init__(self, magnitude, directions="all"): |
|
self.magnitude = magnitude |
|
self.directions = directions.lower().replace(",", " ").replace("-", " ") |
|
|
|
def __repr__(self): |
|
return "RandomTilt(%g, '%s')" % (self.magnitude, self.directions) |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
w, h = img.size |
|
|
|
x1, y1, x2, y2 = 0, 0, h, w |
|
original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)] |
|
|
|
max_skew_amount = max(w, h) |
|
max_skew_amount = int(ceil(max_skew_amount * self.magnitude)) |
|
skew_amount = random.randint(1, max_skew_amount) |
|
|
|
if self.directions == "all": |
|
choices = [0, 1, 2, 3] |
|
else: |
|
dirs = ["left", "right", "up", "down"] |
|
choices = [] |
|
for d in self.directions.split(): |
|
try: |
|
choices.append(dirs.index(d)) |
|
except: |
|
raise ValueError("Tilting direction %s not recognized" % d) |
|
|
|
skew_direction = random.choice(choices) |
|
|
|
|
|
|
|
if skew_direction == 0: |
|
|
|
new_plane = [ |
|
(y1, x1 - skew_amount), |
|
(y2, x1), |
|
(y2, x2), |
|
(y1, x2 + skew_amount), |
|
] |
|
elif skew_direction == 1: |
|
|
|
new_plane = [ |
|
(y1, x1), |
|
(y2, x1 - skew_amount), |
|
(y2, x2 + skew_amount), |
|
(y1, x2), |
|
] |
|
elif skew_direction == 2: |
|
|
|
new_plane = [ |
|
(y1 - skew_amount, x1), |
|
(y2 + skew_amount, x1), |
|
(y2, x2), |
|
(y1, x2), |
|
] |
|
elif skew_direction == 3: |
|
|
|
new_plane = [ |
|
(y1, x1), |
|
(y2, x1), |
|
(y2 + skew_amount, x2), |
|
(y1 - skew_amount, x2), |
|
] |
|
|
|
|
|
|
|
matrix = [] |
|
|
|
for p1, p2 in zip(new_plane, original_plane): |
|
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) |
|
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) |
|
|
|
A = np.matrix(matrix, dtype=np.float) |
|
B = np.array(original_plane).reshape(8) |
|
|
|
homography = np.dot(np.linalg.pinv(A), B) |
|
homography = tuple(np.array(homography).reshape(8)) |
|
|
|
|
|
img = img.transform( |
|
img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC |
|
) |
|
|
|
homography = np.linalg.pinv( |
|
np.float32(homography + (1,)).reshape(3, 3) |
|
).ravel()[:8] |
|
return F.update_img_and_labels(inp, img, persp=tuple(homography)) |
|
|
|
|
|
RandomTilt = RandomTilting |
|
|
|
|
|
class Tilt(object): |
|
"""Apply a known tilting to an image""" |
|
|
|
def __init__(self, *homography): |
|
assert len(homography) == 8 |
|
self.homography = homography |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
homography = self.homography |
|
|
|
|
|
img = img.transform( |
|
img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC |
|
) |
|
|
|
homography = np.linalg.pinv( |
|
np.float32(homography + (1,)).reshape(3, 3) |
|
).ravel()[:8] |
|
return F.update_img_and_labels(inp, img, persp=tuple(homography)) |
|
|
|
|
|
class StillTransform(object): |
|
"""Takes and return an image, without changing its shape or geometry.""" |
|
|
|
def _transform(self, img): |
|
raise NotImplementedError() |
|
|
|
def __call__(self, inp): |
|
img = F.grab_img(inp) |
|
|
|
|
|
try: |
|
img = self._transform(img) |
|
except TypeError: |
|
pass |
|
|
|
return F.update_img_and_labels(inp, img, persp=(1, 0, 0, 0, 1, 0, 0, 0)) |
|
|
|
|
|
class PixelNoise(StillTransform): |
|
"""Takes an image, and add random white noise.""" |
|
|
|
def __init__(self, ampl=20): |
|
StillTransform.__init__(self) |
|
assert 0 <= ampl < 255 |
|
self.ampl = ampl |
|
|
|
def __repr__(self): |
|
return "PixelNoise(%g)" % self.ampl |
|
|
|
def _transform(self, img): |
|
img = np.float32(img) |
|
img += np.random.uniform( |
|
0.5 - self.ampl / 2, 0.5 + self.ampl / 2, size=img.shape |
|
) |
|
return Image.fromarray(np.uint8(img.clip(0, 255))) |
|
|
|
|
|
class ColorJitter(StillTransform): |
|
"""Randomly change the brightness, contrast and saturation of an image. |
|
Copied from https://github.com/pytorch in torchvision/transforms/transforms.py |
|
|
|
Args: |
|
brightness (float): How much to jitter brightness. brightness_factor |
|
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. |
|
contrast (float): How much to jitter contrast. contrast_factor |
|
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. |
|
saturation (float): How much to jitter saturation. saturation_factor |
|
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. |
|
hue(float): How much to jitter hue. hue_factor is chosen uniformly from |
|
[-hue, hue]. Should be >=0 and <= 0.5. |
|
""" |
|
|
|
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): |
|
self.brightness = brightness |
|
self.contrast = contrast |
|
self.saturation = saturation |
|
self.hue = hue |
|
|
|
def __repr__(self): |
|
return "ColorJitter(%g,%g,%g,%g)" % ( |
|
self.brightness, |
|
self.contrast, |
|
self.saturation, |
|
self.hue, |
|
) |
|
|
|
@staticmethod |
|
def get_params(brightness, contrast, saturation, hue): |
|
"""Get a randomized transform to be applied on image. |
|
Arguments are same as that of __init__. |
|
Returns: |
|
Transform which randomly adjusts brightness, contrast and |
|
saturation in a random order. |
|
""" |
|
transforms = [] |
|
if brightness > 0: |
|
brightness_factor = np.random.uniform( |
|
max(0, 1 - brightness), 1 + brightness |
|
) |
|
transforms.append( |
|
tvf.Lambda(lambda img: F.adjust_brightness(img, brightness_factor)) |
|
) |
|
|
|
if contrast > 0: |
|
contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast) |
|
transforms.append( |
|
tvf.Lambda(lambda img: F.adjust_contrast(img, contrast_factor)) |
|
) |
|
|
|
if saturation > 0: |
|
saturation_factor = np.random.uniform( |
|
max(0, 1 - saturation), 1 + saturation |
|
) |
|
transforms.append( |
|
tvf.Lambda(lambda img: F.adjust_saturation(img, saturation_factor)) |
|
) |
|
|
|
if hue > 0: |
|
hue_factor = np.random.uniform(-hue, hue) |
|
transforms.append(tvf.Lambda(lambda img: F.adjust_hue(img, hue_factor))) |
|
|
|
|
|
|
|
np.random.shuffle(transforms) |
|
transform = tvf.Compose(transforms) |
|
|
|
return transform |
|
|
|
def _transform(self, img): |
|
transform = self.get_params( |
|
self.brightness, self.contrast, self.saturation, self.hue |
|
) |
|
return transform(img) |
|
|
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser("Script to try out and visualize transformations") |
|
parser.add_argument("--img", type=str, default="imgs/test.png", help="input image") |
|
parser.add_argument( |
|
"--trfs", type=str, required=True, help="list of transformations" |
|
) |
|
parser.add_argument( |
|
"--layout", type=int, nargs=2, default=(3, 3), help="nb of rows,cols" |
|
) |
|
args = parser.parse_args() |
|
|
|
import os |
|
|
|
args.img = args.img.replace("$HERE", os.path.dirname(__file__)) |
|
img = Image.open(args.img) |
|
img = dict(img=img) |
|
|
|
trfs = instanciate_transformation(args.trfs) |
|
|
|
from matplotlib import pyplot as pl |
|
|
|
pl.ion() |
|
pl.subplots_adjust(0, 0, 1, 1) |
|
|
|
nr, nc = args.layout |
|
|
|
while True: |
|
for j in range(nr): |
|
for i in range(nc): |
|
pl.subplot(nr, nc, i + j * nc + 1) |
|
if i == j == 0: |
|
img2 = img |
|
else: |
|
img2 = trfs(img.copy()) |
|
if isinstance(img2, dict): |
|
img2 = img2["img"] |
|
pl.imshow(img2) |
|
pl.xlabel("%d x %d" % img2.size) |
|
pl.xticks(()) |
|
pl.yticks(()) |
|
pdb.set_trace() |
|
|