test2 / src /preprocessor.py
mlengineer01's picture
Update src/preprocessor.py
04898ba verified
"""
preprocessor.py (Optimized)
---------------------------
Image preprocessing for technical drawings.
Optimized for processing thin-line engineering documents on CPU.
"""
import cv2
import numpy as np
from typing import Tuple
class ImagePreprocessor:
def __init__(
self,
denoise: bool = True,
enhance_contrast: bool = True,
binarize: bool = False,
max_image_dim: int = 1600, # Hạ nhẹ mặc định từ 2000 xuống 1600 cho HF Space CPU
):
self.denoise = denoise
self.enhance_contrast = enhance_contrast
self.binarize = binarize
self.max_image_dim = max_image_dim
def _to_grayscale(self, image: np.ndarray) -> np.ndarray:
if len(image.shape) == 3:
if image.shape[2] == 4: # RGBA
image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
else:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return image
def _safe_resize(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
h, w = image.shape[:2]
max_dim = max(h, w)
if max_dim <= self.max_image_dim:
return image, 1.0
scale = self.max_image_dim / max_dim
new_w = int(w * scale)
new_h = int(h * scale)
# Ép buộc dùng INTER_AREA khi nén ảnh bản vẽ để giữ lại các nét pixel mảnh (không bị đứt nét)
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
return resized, scale
def preprocess(
self, image: np.ndarray, resize: bool = True
) -> Tuple[np.ndarray, float]:
gray = self._to_grayscale(image)
scale_factor = 1.0
if resize:
gray, scale_factor = self._safe_resize(gray)
# Giảm d (diameter) từ 9 xuống 5 để bộ lọc Bilateral chạy nhanh gấp đôi trên CPU đơn nhân
if self.denoise:
gray = cv2.bilateralFilter(gray, d=5, sigmaColor=50, sigmaSpace=50)
if self.enhance_contrast:
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
gray = clahe.apply(gray)
if self.binarize:
_, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return gray, scale_factor
def preprocess_query(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
return self.preprocess(image, resize=False)
def preprocess_drawing(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
return self.preprocess(image, resize=True)
@staticmethod
def rotate_image(image: np.ndarray, angle: float) -> np.ndarray:
h, w = image.shape[:2]
cx, cy = w // 2, h // 2
M = cv2.getRotationMatrix2D((cx, cy), -angle, 1.0)
cos_a = abs(M[0, 0])
sin_a = abs(M[0, 1])
new_w = int(h * sin_a + w * cos_a)
new_h = int(h * cos_a + w * sin_a)
M[0, 2] += new_w / 2 - cx
M[1, 2] += new_h / 2 - cy
rotated = cv2.warpAffine(
image, M, (new_w, new_h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=255 # Nền trắng cho bản vẽ kỹ thuật CAD
)
return rotated