| """
|
| Dataset-aware preprocessing for document forgery detection
|
| Implements Critical Fix #1: Dataset-Aware Preprocessing
|
| """
|
|
|
| import cv2
|
| import numpy as np
|
| from typing import Tuple, Optional
|
| import pywt
|
| from scipy import ndimage
|
|
|
|
|
| class DocumentPreprocessor:
|
| """Dataset-aware document preprocessing"""
|
|
|
| def __init__(self, config, dataset_name: str):
|
| """
|
| Initialize preprocessor
|
|
|
| Args:
|
| config: Configuration object
|
| dataset_name: Name of dataset (for dataset-aware processing)
|
| """
|
| self.config = config
|
| self.dataset_name = dataset_name
|
| self.image_size = config.get('data.image_size', 384)
|
| self.noise_threshold = config.get('preprocessing.noise_threshold', 15.0)
|
|
|
|
|
| self.skip_deskew = config.should_skip_deskew(dataset_name)
|
| self.skip_denoising = config.should_skip_denoising(dataset_name)
|
|
|
| def __call__(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
|
| """
|
| Apply preprocessing pipeline
|
|
|
| Args:
|
| image: Input image (H, W, 3)
|
| mask: Optional ground truth mask (H, W)
|
|
|
| Returns:
|
| Preprocessed image and mask
|
| """
|
|
|
| if len(image.shape) == 2:
|
| image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
| elif image.shape[2] == 4:
|
| image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
|
| elif image.shape[2] == 3:
|
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
|
|
|
| if not self.skip_deskew:
|
| image, mask = self._deskew(image, mask)
|
|
|
|
|
| image, mask = self._resize(image, mask)
|
|
|
|
|
| image = self._normalize(image)
|
|
|
|
|
| if not self.skip_denoising:
|
| noise_level = self._estimate_noise(image)
|
| if noise_level > self.noise_threshold:
|
| image = self._denoise(image)
|
|
|
| return image, mask
|
|
|
| def _deskew(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
|
| """
|
| Deskew document image
|
|
|
| Args:
|
| image: Input image
|
| mask: Optional mask
|
|
|
| Returns:
|
| Deskewed image and mask
|
| """
|
|
|
| gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
|
|
|
|
| edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
|
|
|
|
| lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
|
|
|
| if lines is not None and len(lines) > 0:
|
|
|
| angles = []
|
| for rho, theta in lines[:, 0]:
|
| angle = (theta * 180 / np.pi) - 90
|
| angles.append(angle)
|
|
|
|
|
| angle = np.median(angles)
|
|
|
|
|
| if abs(angle) > 0.5:
|
|
|
| h, w = image.shape[:2]
|
| center = (w // 2, h // 2)
|
| M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
|
|
|
| image = cv2.warpAffine(image, M, (w, h),
|
| flags=cv2.INTER_CUBIC,
|
| borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
|
| if mask is not None:
|
| mask = cv2.warpAffine(mask, M, (w, h),
|
| flags=cv2.INTER_NEAREST,
|
| borderMode=cv2.BORDER_CONSTANT,
|
| borderValue=0)
|
|
|
| return image, mask
|
|
|
| def _resize(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
|
| """
|
| Resize image and mask to target size
|
|
|
| Args:
|
| image: Input image
|
| mask: Optional mask
|
|
|
| Returns:
|
| Resized image and mask
|
| """
|
| target_size = (self.image_size, self.image_size)
|
|
|
|
|
| image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
|
|
|
|
|
| if mask is not None:
|
| mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
|
|
|
| return image, mask
|
|
|
| def _normalize(self, image: np.ndarray) -> np.ndarray:
|
| """
|
| Normalize pixel values to [0, 1]
|
|
|
| Args:
|
| image: Input image
|
|
|
| Returns:
|
| Normalized image
|
| """
|
| return image.astype(np.float32) / 255.0
|
|
|
| def _estimate_noise(self, image: np.ndarray) -> float:
|
| """
|
| Estimate noise level using Laplacian variance and wavelet-based estimation
|
|
|
| Args:
|
| image: Input image (normalized)
|
|
|
| Returns:
|
| Estimated noise level
|
| """
|
|
|
| if len(image.shape) == 3:
|
| gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
|
| else:
|
| gray = (image * 255).astype(np.uint8)
|
|
|
|
|
| laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
|
|
|
|
|
| coeffs = pywt.dwt2(gray, 'db1')
|
| _, (cH, cV, cD) = coeffs
|
| sigma = np.median(np.abs(cD)) / 0.6745
|
|
|
|
|
| noise_level = (laplacian_var + sigma) / 2.0
|
|
|
| return noise_level
|
|
|
| def _denoise(self, image: np.ndarray) -> np.ndarray:
|
| """
|
| Apply conditional denoising
|
|
|
| Args:
|
| image: Input image (normalized)
|
|
|
| Returns:
|
| Denoised image
|
| """
|
|
|
| image_uint8 = (image * 255).astype(np.uint8)
|
|
|
|
|
| median_filtered = cv2.medianBlur(image_uint8, 3)
|
|
|
|
|
| gaussian_filtered = cv2.GaussianBlur(median_filtered, (3, 3), 0.8)
|
|
|
|
|
| denoised = gaussian_filtered.astype(np.float32) / 255.0
|
|
|
| return denoised
|
|
|
|
|
| def preprocess_image(image: np.ndarray,
|
| mask: Optional[np.ndarray] = None,
|
| config = None,
|
| dataset_name: str = 'default') -> Tuple[np.ndarray, Optional[np.ndarray]]:
|
| """
|
| Convenience function for preprocessing
|
|
|
| Args:
|
| image: Input image
|
| mask: Optional mask
|
| config: Configuration object
|
| dataset_name: Dataset name
|
|
|
| Returns:
|
| Preprocessed image and mask
|
| """
|
| preprocessor = DocumentPreprocessor(config, dataset_name)
|
| return preprocessor(image, mask)
|
|
|