| """ |
| Augmentation Pipeline for Face Detection. |
| |
| Implements SCRFD's "Sample Redistribution" strategy plus production-grade |
| robustness augmentations for: |
| - Tiny faces (large-scale crops generate small face positives) |
| - Blur (Gaussian, motion blur) |
| - Compression artifacts (JPEG quality degradation) |
| - Low-light / poor illumination (brightness/gamma jitter) |
| - Occlusion (random erasing simulating partial occlusion) |
| |
| Training augmentation pipeline (from SCRFD + TinaFace papers): |
| 1. Random crop with scale [0.3, 2.0] (Sample Redistribution) |
| 2. Resize to target size (640×640) |
| 3. Photometric distortion (brightness, contrast, hue, saturation) |
| 4. Horizontal flip (p=0.5) |
| 5. Random blur / compression / lighting degradation |
| 6. Normalize (ImageNet stats) |
| """ |
|
|
| import numpy as np |
| import cv2 |
| from typing import Dict, Tuple, Optional |
|
|
|
|
| class TrainAugmentation: |
| """ |
| Full training augmentation with SCRFD Sample Redistribution. |
| |
| The key insight: using crop scales up to 2.0× generates more |
| small-face positive anchors at stride 8 (72K → 118K per paper). |
| """ |
|
|
| def __init__(self, |
| target_size: int = 640, |
| crop_scales: list = None, |
| mean: tuple = (104.0, 117.0, 123.0), |
| flip_prob: float = 0.5, |
| enable_robustness: bool = True): |
| self.target_size = target_size |
| self.crop_scales = crop_scales or [0.3, 0.45, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0] |
| self.mean = np.array(mean, dtype=np.float32) |
| self.flip_prob = flip_prob |
| self.enable_robustness = enable_robustness |
| self.robustness_aug = RobustnessAugmentation() if enable_robustness else None |
|
|
| def __call__(self, image: np.ndarray, boxes: np.ndarray, |
| landmarks: np.ndarray) -> Dict: |
| h, w = image.shape[:2] |
|
|
| |
| image, boxes, landmarks = self._random_crop(image, boxes, landmarks) |
|
|
| |
| image, boxes, landmarks = self._resize(image, boxes, landmarks) |
|
|
| |
| image = self._photometric_distort(image) |
|
|
| |
| if np.random.random() < self.flip_prob: |
| image, boxes, landmarks = self._hflip(image, boxes, landmarks) |
|
|
| |
| if self.enable_robustness and self.robustness_aug: |
| image = self.robustness_aug(image) |
|
|
| |
| image = image.astype(np.float32) - self.mean |
|
|
| return {'image': image, 'boxes': boxes, 'landmarks': landmarks} |
|
|
| def _random_crop(self, image: np.ndarray, boxes: np.ndarray, |
| landmarks: np.ndarray) -> Tuple: |
| """Random crop with sample redistribution scales.""" |
| h, w = image.shape[:2] |
| scale = np.random.choice(self.crop_scales) |
| crop_size = int(min(h, w) * scale) |
| crop_size = max(crop_size, 32) |
|
|
| |
| if crop_size > max(h, w): |
| pad_h = max(crop_size - h, 0) |
| pad_w = max(crop_size - w, 0) |
| image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, |
| cv2.BORDER_CONSTANT, value=(0, 0, 0)) |
| h, w = image.shape[:2] |
|
|
| |
| max_x = w - crop_size |
| max_y = h - crop_size |
| x1 = np.random.randint(0, max(max_x, 1)) |
| y1 = np.random.randint(0, max(max_y, 1)) |
| x2 = x1 + crop_size |
| y2 = y1 + crop_size |
|
|
| |
| cropped = image[y1:y2, x1:x2] |
|
|
| |
| new_boxes = boxes.copy() |
| new_boxes[:, 0] -= x1 |
| new_boxes[:, 1] -= y1 |
| new_boxes[:, 2] -= x1 |
| new_boxes[:, 3] -= y1 |
|
|
| |
| new_boxes[:, 0] = np.clip(new_boxes[:, 0], 0, crop_size) |
| new_boxes[:, 1] = np.clip(new_boxes[:, 1], 0, crop_size) |
| new_boxes[:, 2] = np.clip(new_boxes[:, 2], 0, crop_size) |
| new_boxes[:, 3] = np.clip(new_boxes[:, 3], 0, crop_size) |
|
|
| |
| orig_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) |
| new_widths = new_boxes[:, 2] - new_boxes[:, 0] |
| new_heights = new_boxes[:, 3] - new_boxes[:, 1] |
| new_areas = new_widths * new_heights |
| valid = (new_widths > 2) & (new_heights > 2) & (new_areas > 0.2 * orig_areas) |
|
|
| if valid.sum() == 0: |
| |
| return image[:min(h, w), :min(h, w)], boxes, landmarks |
|
|
| new_boxes = new_boxes[valid] |
|
|
| |
| new_lmk = landmarks[valid].copy() |
| for i in range(5): |
| new_lmk[:, i*2] -= x1 |
| new_lmk[:, i*2+1] -= y1 |
|
|
| return cropped, new_boxes, new_lmk |
|
|
| def _resize(self, image: np.ndarray, boxes: np.ndarray, |
| landmarks: np.ndarray) -> Tuple: |
| """Resize to target size.""" |
| h, w = image.shape[:2] |
| scale_x = self.target_size / w |
| scale_y = self.target_size / h |
|
|
| image = cv2.resize(image, (self.target_size, self.target_size)) |
|
|
| boxes[:, 0] *= scale_x |
| boxes[:, 1] *= scale_y |
| boxes[:, 2] *= scale_x |
| boxes[:, 3] *= scale_y |
|
|
| for i in range(5): |
| landmarks[:, i*2] *= scale_x |
| landmarks[:, i*2+1] *= scale_y |
|
|
| return image, boxes, landmarks |
|
|
| def _photometric_distort(self, image: np.ndarray) -> np.ndarray: |
| """Random photometric distortion (brightness, contrast, hue, saturation).""" |
| image = image.astype(np.float32) |
|
|
| |
| if np.random.random() < 0.5: |
| delta = np.random.uniform(-32, 32) |
| image += delta |
|
|
| |
| if np.random.random() < 0.5: |
| alpha = np.random.uniform(0.5, 1.5) |
| image *= alpha |
|
|
| |
| if np.random.random() < 0.5: |
| image_uint8 = np.clip(image, 0, 255).astype(np.uint8) |
| hsv = cv2.cvtColor(image_uint8, cv2.COLOR_RGB2HSV).astype(np.float32) |
|
|
| |
| hsv[:, :, 0] += np.random.uniform(-18, 18) |
| hsv[:, :, 0] = np.clip(hsv[:, :, 0], 0, 180) |
|
|
| |
| hsv[:, :, 1] *= np.random.uniform(0.5, 1.5) |
| hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 255) |
|
|
| image = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32) |
|
|
| return np.clip(image, 0, 255) |
|
|
| def _hflip(self, image: np.ndarray, boxes: np.ndarray, |
| landmarks: np.ndarray) -> Tuple: |
| """Horizontal flip with landmark reordering.""" |
| w = image.shape[1] |
| image = image[:, ::-1].copy() |
|
|
| new_boxes = boxes.copy() |
| new_boxes[:, 0] = w - boxes[:, 2] |
| new_boxes[:, 2] = w - boxes[:, 0] |
|
|
| new_lmk = landmarks.copy() |
| for i in range(5): |
| new_lmk[:, i*2] = w - landmarks[:, i*2] |
|
|
| |
| |
| |
| if new_lmk.shape[0] > 0 and np.any(new_lmk > 0): |
| |
| new_lmk[:, [0, 1, 2, 3]] = new_lmk[:, [2, 3, 0, 1]] |
| |
| new_lmk[:, [6, 7, 8, 9]] = new_lmk[:, [8, 9, 6, 7]] |
|
|
| return image, new_boxes, new_lmk |
|
|
|
|
| class ValAugmentation: |
| """Validation: resize + normalize only.""" |
|
|
| def __init__(self, target_size: int = 640, |
| mean: tuple = (104.0, 117.0, 123.0)): |
| self.target_size = target_size |
| self.mean = np.array(mean, dtype=np.float32) |
|
|
| def __call__(self, image: np.ndarray, boxes: np.ndarray, |
| landmarks: np.ndarray) -> Dict: |
| h, w = image.shape[:2] |
|
|
| |
| scale = self.target_size / max(h, w) |
| new_h, new_w = int(h * scale), int(w * scale) |
| image = cv2.resize(image, (new_w, new_h)) |
|
|
| |
| pad_h = self.target_size - new_h |
| pad_w = self.target_size - new_w |
| image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, |
| cv2.BORDER_CONSTANT, value=(0, 0, 0)) |
|
|
| |
| boxes[:, 0] *= scale |
| boxes[:, 1] *= scale |
| boxes[:, 2] *= scale |
| boxes[:, 3] *= scale |
|
|
| for i in range(5): |
| landmarks[:, i*2] *= scale |
| landmarks[:, i*2+1] *= scale |
|
|
| image = image.astype(np.float32) - self.mean |
|
|
| return {'image': image, 'boxes': boxes, 'landmarks': landmarks} |
|
|
|
|
| class RobustnessAugmentation: |
| """ |
| Production-grade robustness augmentations targeting known failure modes. |
| |
| Applied with probability during training to make the detector robust to: |
| 1. Gaussian blur (σ = 0.5–3.0) — camera defocus, motion blur |
| 2. JPEG compression (Q = 20–80) — streaming/compression artifacts |
| 3. Low-light gamma (γ = 1.5–3.0) — dark environments |
| 4. Random occlusion (Cutout) — partial face occlusion |
| 5. Gaussian noise — sensor noise, low-light grain |
| """ |
|
|
| def __init__(self, |
| blur_prob: float = 0.2, |
| jpeg_prob: float = 0.2, |
| lowlight_prob: float = 0.15, |
| occlusion_prob: float = 0.1, |
| noise_prob: float = 0.15): |
| self.blur_prob = blur_prob |
| self.jpeg_prob = jpeg_prob |
| self.lowlight_prob = lowlight_prob |
| self.occlusion_prob = occlusion_prob |
| self.noise_prob = noise_prob |
|
|
| def __call__(self, image: np.ndarray) -> np.ndarray: |
| |
| if np.random.random() < self.blur_prob: |
| sigma = np.random.uniform(0.5, 3.0) |
| ksize = int(sigma * 6) | 1 |
| image = cv2.GaussianBlur(image, (ksize, ksize), sigma) |
|
|
| |
| if np.random.random() < self.jpeg_prob: |
| quality = np.random.randint(20, 80) |
| encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality] |
| _, buf = cv2.imencode('.jpg', image.astype(np.uint8), encode_param) |
| image = cv2.imdecode(buf, cv2.IMREAD_COLOR).astype(np.float32) |
|
|
| |
| if np.random.random() < self.lowlight_prob: |
| gamma = np.random.uniform(1.5, 3.0) |
| image = np.clip(image, 0, 255) |
| image = ((image / 255.0) ** gamma * 255.0) |
|
|
| |
| if np.random.random() < self.occlusion_prob: |
| h, w = image.shape[:2] |
| |
| rh = np.random.randint(h // 10, h // 4) |
| rw = np.random.randint(w // 10, w // 4) |
| ry = np.random.randint(0, h - rh) |
| rx = np.random.randint(0, w - rw) |
| image[ry:ry+rh, rx:rx+rw] = np.random.randint(0, 255, 3) |
|
|
| |
| if np.random.random() < self.noise_prob: |
| sigma = np.random.uniform(5, 25) |
| noise = np.random.randn(*image.shape) * sigma |
| image = np.clip(image + noise, 0, 255) |
|
|
| return image.astype(np.float32) |
|
|