File size: 3,406 Bytes
8e5d8c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import cv2
import numpy as np
import albumentations as albu
from albumentations.augmentations.geometric.resize import LongestMaxSize

def round_pixel_dim(dimension: float) -> int:
    """Rounds pixel dimensions consistently."""
    if abs(round(dimension) - dimension) == 0.5:
        return int(2.0 * round(dimension / 2.0))
    return int(round(dimension))

def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR):
    """Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible."""
    height, width = image.shape[:2]
    max_dimension = max(height, width)
    
    if ((height % stride == 0) and (width % stride == 0) and 
        (max_dimension <= target_size)):
        return image
    
    scale = target_size / float(max(width, height))
    new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width))
    new_height, new_width = new_dims
    
    new_height = ((new_height // stride + 1) * stride 
                 if new_height % stride != 0 else new_height)
    new_width = ((new_width // stride + 1) * stride 
                if new_width % stride != 0 else new_width)
    
    return cv2.resize(image, (new_width, new_height), interpolation=interpolation)

class PaddedResize(LongestMaxSize):
    def apply(self, img: np.ndarray, target_size: int = 1024, 
             interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray:
        return resize_with_padding(img, target_size=target_size, interpolation=interpolation)
    
def get_training_augmentations(width=768, height=576):
    """Configures training-time augmentations."""
    target_size = max([width, height])
    transforms = [
        albu.HorizontalFlip(p=0.5),
        albu.ShiftScaleRotate(
            scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0),
        albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True),
        albu.RandomCrop(height=target_size, width=target_size, always_apply=True),
        albu.GaussNoise(p=0.2),
        albu.Perspective(p=0.2),
        albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33),
        albu.OneOf([
            albu.Sharpen(p=1), 
            albu.Blur(blur_limit=3, p=1),
            albu.MotionBlur(blur_limit=3, p=1)], p=0.33),
        albu.OneOf([
            albu.RandomBrightnessContrast(p=1),
            albu.HueSaturationValue(p=1)], p=0.33),
    ]
    return albu.Compose(transforms)

def get_validation_augmentations(width=1920, height=1440, fixed_size=True):
    """Configures validation/inference-time augmentations."""
    if fixed_size:
        transforms = [albu.Resize(height=height, width=width, always_apply=True)]
        return albu.Compose(transforms)
    
    target_size = max(width, height)
    transforms = [PaddedResize(max_size=target_size, always_apply=True)]
    return albu.Compose(transforms)

def convert_to_tensor(x, **kwargs):
    """Converts image array to PyTorch tensor format."""
    if x.ndim == 2:
        x = np.expand_dims(x, axis=-1)
    return x.transpose(2, 0, 1).astype('float32')

def get_preprocessing_pipeline(preprocessing_fn):
    """Builds preprocessing pipeline including normalization and tensor conversion."""
    transforms = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor),
    ]
    return albu.Compose(transforms)