File size: 2,775 Bytes
8eb0b3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import cv2
import os

def preprocess(img):
    """Applies Otsu's thresholding to the input image."""
    # Ensure input is grayscale if it's not already
    gray_img = img
    if len(img.shape) == 3 and img.shape[2] == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    _, thresh_otsu = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return thresh_otsu

# def preprocess(img):
#     """Applies Otsu's thresholding to the input image."""

#     import numpy as np
#     # Ensure input is grayscale if it's not already
#     # Apply adaptive Gaussian thresholding
#     ## invert image
    
#     block_size = 11  # Size of pixel neighborhood used for threshold calculation
#     C = 10  # Constant subtracted from mean
#     thresh_adaptive = cv2.adaptiveThreshold(img, 
#                                         maxValue=255,
#                                         adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
#                                         thresholdType=cv2.THRESH_BINARY,
#                                         blockSize=block_size,
#                                         C=C)
    
#     kernel = np.ones((3,3), np.uint8)
#     thresh_cleaned = cv2.morphologyEx(thresh_adaptive, cv2.MORPH_OPEN, kernel)
#     # or
#     thresh_cleaned = cv2.medianBlur(thresh_adaptive, 3)
#     gray_img = cv2.bitwise_not(thresh_cleaned)

#     return gray_img

def load_and_preprocess_image(image_path: str, config: dict):
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found at: {image_path}")

    img_color = cv2.imread(image_path)
    if img_color is None:
        raise ValueError(f"Could not read image file: {image_path}")

    img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)

    # --- Upscaling Heuristic ---
    cfg_proc = config.get('image_processing', {})
    min_dimension_threshold = cfg_proc.get('min_dimension_threshold', 800)
    upscale_factor = cfg_proc.get('upscale_factor', 2)

    h, w = img_gray.shape
    img_color_resized = img_color
    img_gray_resized = img_gray

    if h < min_dimension_threshold or w < min_dimension_threshold:
        print(f"Image dimensions ({w}x{h}) below threshold ({min_dimension_threshold}px). Upscaling by {upscale_factor}x.")
        new_w, new_h = w * upscale_factor, h * upscale_factor
        img_gray_resized = cv2.resize(img_gray, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
        img_color_resized = cv2.resize(img_color, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)

    # --- Initial Preprocessing (Inversion + Thresholding) ---
    img_inverted = cv2.bitwise_not(img_gray_resized)
    preprocessed_img = preprocess(img_inverted) 

    return preprocessed_img, img_color_resized, img_gray_resized