File size: 4,195 Bytes
e4f8fe4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import sys
import cv2
import numpy as np
import easyocr
from ultralytics import YOLO

# Initialize EasyOCR reader (you can set gpu=True if you have CUDA)
reader = easyocr.Reader(['en'], gpu=False)


def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray:
    # 1) Convert to grayscale
    gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY)

    # 2) Upscale by 2×
    h, w = gray.shape
    gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR)

    # 4) Apply Otsu's threshold → binary
    _, thresh = cv2.threshold(
        gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    return thresh  # single‐channel (0/255) image


def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list):
    """

    - Draws each OBB polygon on `image`

    - Crops the region inside the OBB, preprocesses it, runs EasyOCR

    - Writes the extracted text back onto `image` just above the box

    - Returns (modified_image, list_of_extracted_texts)

    """
    boxes = obb.xyxyxyxy.cpu().numpy()  # shape: (N, 8)
    extracted_texts = []

    for i, box in enumerate(boxes):
        # Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
        pts = box.reshape(4, 2).astype(np.int32)

        # Draw the bounding polygon (green)
        cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

        # Compute axis‐aligned crop coordinates
        x_min, y_min = np.min(pts, axis=0)
        x_max, y_max = np.max(pts, axis=0)

        # Ensure coordinates are within image
        x_min = max(0, x_min)
        y_min = max(0, y_min)
        x_max = min(image.shape[1] - 1, x_max)
        y_max = min(image.shape[0] - 1, y_max)

        cropped_region = image[y_min:y_max, x_min:x_max]

        # Only proceed if crop is non-empty
        if cropped_region.size == 0:
            continue

        # Preprocess the cropped region before OCR
        preprocessed = preprocess_cropped_region(cropped_region)

        # (Optional) If you want to visualize how the preprocessed patch looks:
        # cv2.imshow(f"Preprocessed Crop {i}", preprocessed)
        # cv2.waitKey(0)

        # Run EasyOCR on the single‐channel (binarized) image
        ocr_results = reader.readtext(preprocessed)

        # Concatenate all recognized text fragments
        detected_text = " ".join([entry[1] for entry in ocr_results]).strip()
        extracted_texts.append(detected_text)

        # Put the extracted text above the bounding box (yellow text)
        cv2.putText(
            image,
            detected_text,
            (x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 255),
            2,
            lineType=cv2.LINE_AA,
        )

    return image, extracted_texts


def main(model_path_3: str, image_path: str):
    # Load the YOLO OBB model for detection
    model_3 = YOLO(model_path_3)

    # Read the input image
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Could not read image at", image_path)
        sys.exit(1)

    # Run inference using the YOLO OBB model
    results = model_3(image)

    all_extracted_texts = []

    # Iterate over each detection result
    for r in results:
        if r.obb is not None:
            image, extracted_texts = draw_obb(image, r.obb)
            all_extracted_texts.extend(extracted_texts)

            # Print class info & OCR results to console
            for i, class_id in enumerate(r.obb.cls.cpu().numpy()):
                class_name = r.names[int(class_id)]
                print(f"Detected class ID: {class_id}, Class name: {class_name}")

            for idx, text in enumerate(extracted_texts):
                print(f"OCR Extracted Text {idx + 1}: {text}")

    return image, all_extracted_texts


if __name__ == "__main__":
    # Replace these with your actual paths
    yolo_weights = "Models/Remaining_tests_model.pt"
    test_image   = "test_images/HV_PD/11.png"

    output_image, texts = main(yolo_weights, test_image)