Sensei13k's picture
Upload 8 files
e4f8fe4 verified
import sys
import cv2
import numpy as np
import easyocr
from ultralytics import YOLO
# Initialize EasyOCR reader (you can set gpu=True if you have CUDA)
reader = easyocr.Reader(['en'], gpu=False)
def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray:
# 1) Convert to grayscale
gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY)
# 2) Upscale by 2×
h, w = gray.shape
gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR)
# 4) Apply Otsu's threshold → binary
_, thresh = cv2.threshold(
gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
return thresh # single‐channel (0/255) image
def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list):
"""
- Draws each OBB polygon on `image`
- Crops the region inside the OBB, preprocesses it, runs EasyOCR
- Writes the extracted text back onto `image` just above the box
- Returns (modified_image, list_of_extracted_texts)
"""
boxes = obb.xyxyxyxy.cpu().numpy() # shape: (N, 8)
extracted_texts = []
for i, box in enumerate(boxes):
# Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
pts = box.reshape(4, 2).astype(np.int32)
# Draw the bounding polygon (green)
cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2)
# Compute axis‐aligned crop coordinates
x_min, y_min = np.min(pts, axis=0)
x_max, y_max = np.max(pts, axis=0)
# Ensure coordinates are within image
x_min = max(0, x_min)
y_min = max(0, y_min)
x_max = min(image.shape[1] - 1, x_max)
y_max = min(image.shape[0] - 1, y_max)
cropped_region = image[y_min:y_max, x_min:x_max]
# Only proceed if crop is non-empty
if cropped_region.size == 0:
continue
# Preprocess the cropped region before OCR
preprocessed = preprocess_cropped_region(cropped_region)
# (Optional) If you want to visualize how the preprocessed patch looks:
# cv2.imshow(f"Preprocessed Crop {i}", preprocessed)
# cv2.waitKey(0)
# Run EasyOCR on the single‐channel (binarized) image
ocr_results = reader.readtext(preprocessed)
# Concatenate all recognized text fragments
detected_text = " ".join([entry[1] for entry in ocr_results]).strip()
extracted_texts.append(detected_text)
# Put the extracted text above the bounding box (yellow text)
cv2.putText(
image,
detected_text,
(x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 255),
2,
lineType=cv2.LINE_AA,
)
return image, extracted_texts
def main(model_path_3: str, image_path: str):
# Load the YOLO OBB model for detection
model_3 = YOLO(model_path_3)
# Read the input image
image = cv2.imread(image_path)
if image is None:
print("Error: Could not read image at", image_path)
sys.exit(1)
# Run inference using the YOLO OBB model
results = model_3(image)
all_extracted_texts = []
# Iterate over each detection result
for r in results:
if r.obb is not None:
image, extracted_texts = draw_obb(image, r.obb)
all_extracted_texts.extend(extracted_texts)
# Print class info & OCR results to console
for i, class_id in enumerate(r.obb.cls.cpu().numpy()):
class_name = r.names[int(class_id)]
print(f"Detected class ID: {class_id}, Class name: {class_name}")
for idx, text in enumerate(extracted_texts):
print(f"OCR Extracted Text {idx + 1}: {text}")
return image, all_extracted_texts
if __name__ == "__main__":
# Replace these with your actual paths
yolo_weights = "Models/Remaining_tests_model.pt"
test_image = "test_images/HV_PD/11.png"
output_image, texts = main(yolo_weights, test_image)