File size: 3,406 Bytes
8e5d8c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import cv2
import numpy as np
import albumentations as albu
from albumentations.augmentations.geometric.resize import LongestMaxSize
def round_pixel_dim(dimension: float) -> int:
"""Rounds pixel dimensions consistently."""
if abs(round(dimension) - dimension) == 0.5:
return int(2.0 * round(dimension / 2.0))
return int(round(dimension))
def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR):
"""Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible."""
height, width = image.shape[:2]
max_dimension = max(height, width)
if ((height % stride == 0) and (width % stride == 0) and
(max_dimension <= target_size)):
return image
scale = target_size / float(max(width, height))
new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width))
new_height, new_width = new_dims
new_height = ((new_height // stride + 1) * stride
if new_height % stride != 0 else new_height)
new_width = ((new_width // stride + 1) * stride
if new_width % stride != 0 else new_width)
return cv2.resize(image, (new_width, new_height), interpolation=interpolation)
class PaddedResize(LongestMaxSize):
def apply(self, img: np.ndarray, target_size: int = 1024,
interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray:
return resize_with_padding(img, target_size=target_size, interpolation=interpolation)
def get_training_augmentations(width=768, height=576):
"""Configures training-time augmentations."""
target_size = max([width, height])
transforms = [
albu.HorizontalFlip(p=0.5),
albu.ShiftScaleRotate(
scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0),
albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True),
albu.RandomCrop(height=target_size, width=target_size, always_apply=True),
albu.GaussNoise(p=0.2),
albu.Perspective(p=0.2),
albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33),
albu.OneOf([
albu.Sharpen(p=1),
albu.Blur(blur_limit=3, p=1),
albu.MotionBlur(blur_limit=3, p=1)], p=0.33),
albu.OneOf([
albu.RandomBrightnessContrast(p=1),
albu.HueSaturationValue(p=1)], p=0.33),
]
return albu.Compose(transforms)
def get_validation_augmentations(width=1920, height=1440, fixed_size=True):
"""Configures validation/inference-time augmentations."""
if fixed_size:
transforms = [albu.Resize(height=height, width=width, always_apply=True)]
return albu.Compose(transforms)
target_size = max(width, height)
transforms = [PaddedResize(max_size=target_size, always_apply=True)]
return albu.Compose(transforms)
def convert_to_tensor(x, **kwargs):
"""Converts image array to PyTorch tensor format."""
if x.ndim == 2:
x = np.expand_dims(x, axis=-1)
return x.transpose(2, 0, 1).astype('float32')
def get_preprocessing_pipeline(preprocessing_fn):
"""Builds preprocessing pipeline including normalization and tensor conversion."""
transforms = [
albu.Lambda(image=preprocessing_fn),
albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor),
]
return albu.Compose(transforms) |