File size: 4,195 Bytes

e4f8fe4

import sys
import cv2
import numpy as np
import easyocr
from ultralytics import YOLO

# Initialize EasyOCR reader (you can set gpu=True if you have CUDA)
reader = easyocr.Reader(['en'], gpu=False)


def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray:
    # 1) Convert to grayscale
    gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY)

    # 2) Upscale by 2×
    h, w = gray.shape
    gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR)

    # 4) Apply Otsu's threshold → binary
    _, thresh = cv2.threshold(
        gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    return thresh  # single‐channel (0/255) image


def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list):
    """

    - Draws each OBB polygon on `image`

    - Crops the region inside the OBB, preprocesses it, runs EasyOCR

    - Writes the extracted text back onto `image` just above the box

    - Returns (modified_image, list_of_extracted_texts)

    """
    boxes = obb.xyxyxyxy.cpu().numpy()  # shape: (N, 8)
    extracted_texts = []

    for i, box in enumerate(boxes):
        # Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
        pts = box.reshape(4, 2).astype(np.int32)

        # Draw the bounding polygon (green)
        cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

        # Compute axis‐aligned crop coordinates
        x_min, y_min = np.min(pts, axis=0)
        x_max, y_max = np.max(pts, axis=0)

        # Ensure coordinates are within image
        x_min = max(0, x_min)
        y_min = max(0, y_min)
        x_max = min(image.shape[1] - 1, x_max)
        y_max = min(image.shape[0] - 1, y_max)

        cropped_region = image[y_min:y_max, x_min:x_max]

        # Only proceed if crop is non-empty
        if cropped_region.size == 0:
            continue

        # Preprocess the cropped region before OCR
        preprocessed = preprocess_cropped_region(cropped_region)

        # (Optional) If you want to visualize how the preprocessed patch looks:
        # cv2.imshow(f"Preprocessed Crop {i}", preprocessed)
        # cv2.waitKey(0)

        # Run EasyOCR on the single‐channel (binarized) image
        ocr_results = reader.readtext(preprocessed)

        # Concatenate all recognized text fragments
        detected_text = " ".join([entry[1] for entry in ocr_results]).strip()
        extracted_texts.append(detected_text)

        # Put the extracted text above the bounding box (yellow text)
        cv2.putText(
            image,
            detected_text,
            (x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 255),
            2,
            lineType=cv2.LINE_AA,
        )

    return image, extracted_texts


def main(model_path_3: str, image_path: str):
    # Load the YOLO OBB model for detection
    model_3 = YOLO(model_path_3)

    # Read the input image
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Could not read image at", image_path)
        sys.exit(1)

    # Run inference using the YOLO OBB model
    results = model_3(image)

    all_extracted_texts = []

    # Iterate over each detection result
    for r in results:
        if r.obb is not None:
            image, extracted_texts = draw_obb(image, r.obb)
            all_extracted_texts.extend(extracted_texts)

            # Print class info & OCR results to console
            for i, class_id in enumerate(r.obb.cls.cpu().numpy()):
                class_name = r.names[int(class_id)]
                print(f"Detected class ID: {class_id}, Class name: {class_name}")

            for idx, text in enumerate(extracted_texts):
                print(f"OCR Extracted Text {idx + 1}: {text}")

    return image, all_extracted_texts


if __name__ == "__main__":
    # Replace these with your actual paths
    yolo_weights = "Models/Remaining_tests_model.pt"
    test_image   = "test_images/HV_PD/11.png"

    output_image, texts = main(yolo_weights, test_image)