File size: 3,245 Bytes
07353b6
04898ba
 
07353b6
04898ba
07353b6
 
 
 
04898ba
07353b6
 
 
 
 
 
 
 
04898ba
07353b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04898ba
07353b6
 
 
 
 
 
 
 
 
 
 
 
04898ba
07353b6
04898ba
07353b6
 
 
 
 
 
04898ba
07353b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04898ba
07353b6
04898ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
preprocessor.py (Optimized)
---------------------------
Image preprocessing for technical drawings.
Optimized for processing thin-line engineering documents on CPU.
"""

import cv2
import numpy as np
from typing import Tuple


class ImagePreprocessor:
    def __init__(
        self,
        denoise: bool = True,
        enhance_contrast: bool = True,
        binarize: bool = False,
        max_image_dim: int = 1600, # Hạ nhẹ mặc định từ 2000 xuống 1600 cho HF Space CPU
    ):
        self.denoise = denoise
        self.enhance_contrast = enhance_contrast
        self.binarize = binarize
        self.max_image_dim = max_image_dim

    def _to_grayscale(self, image: np.ndarray) -> np.ndarray:
        if len(image.shape) == 3:
            if image.shape[2] == 4:  # RGBA
                image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
            else:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return image

    def _safe_resize(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
        h, w = image.shape[:2]
        max_dim = max(h, w)
        if max_dim <= self.max_image_dim:
            return image, 1.0
        scale = self.max_image_dim / max_dim
        new_w = int(w * scale)
        new_h = int(h * scale)
        # Ép buộc dùng INTER_AREA khi nén ảnh bản vẽ để giữ lại các nét pixel mảnh (không bị đứt nét)
        resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
        return resized, scale

    def preprocess(
        self, image: np.ndarray, resize: bool = True
    ) -> Tuple[np.ndarray, float]:
        gray = self._to_grayscale(image)

        scale_factor = 1.0
        if resize:
            gray, scale_factor = self._safe_resize(gray)

        # Giảm d (diameter) từ 9 xuống 5 để bộ lọc Bilateral chạy nhanh gấp đôi trên CPU đơn nhân
        if self.denoise:
            gray = cv2.bilateralFilter(gray, d=5, sigmaColor=50, sigmaSpace=50)

        if self.enhance_contrast:
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            gray = clahe.apply(gray)

        if self.binarize:
            _, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        return gray, scale_factor

    def preprocess_query(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
        return self.preprocess(image, resize=False)

    def preprocess_drawing(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
        return self.preprocess(image, resize=True)

    @staticmethod
    def rotate_image(image: np.ndarray, angle: float) -> np.ndarray:
        h, w = image.shape[:2]
        cx, cy = w // 2, h // 2
        M = cv2.getRotationMatrix2D((cx, cy), -angle, 1.0)

        cos_a = abs(M[0, 0])
        sin_a = abs(M[0, 1])
        new_w = int(h * sin_a + w * cos_a)
        new_h = int(h * cos_a + w * sin_a)

        M[0, 2] += new_w / 2 - cx
        M[1, 2] += new_h / 2 - cy

        rotated = cv2.warpAffine(
            image, M, (new_w, new_h),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=255  # Nền trắng cho bản vẽ kỹ thuật CAD
        )
        return rotated