Spaces:
Runtime error
Runtime error
import sys | |
import cv2 | |
import numpy as np | |
import easyocr | |
from ultralytics import YOLO | |
# Initialize EasyOCR reader (you can set gpu=True if you have CUDA) | |
reader = easyocr.Reader(['en'], gpu=False) | |
def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray: | |
# 1) Convert to grayscale | |
gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY) | |
# 2) Upscale by 2× | |
h, w = gray.shape | |
gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR) | |
# 4) Apply Otsu's threshold → binary | |
_, thresh = cv2.threshold( | |
gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU | |
) | |
return thresh # single‐channel (0/255) image | |
def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list): | |
""" | |
- Draws each OBB polygon on `image` | |
- Crops the region inside the OBB, preprocesses it, runs EasyOCR | |
- Writes the extracted text back onto `image` just above the box | |
- Returns (modified_image, list_of_extracted_texts) | |
""" | |
boxes = obb.xyxyxyxy.cpu().numpy() # shape: (N, 8) | |
extracted_texts = [] | |
for i, box in enumerate(boxes): | |
# Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] | |
pts = box.reshape(4, 2).astype(np.int32) | |
# Draw the bounding polygon (green) | |
cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2) | |
# Compute axis‐aligned crop coordinates | |
x_min, y_min = np.min(pts, axis=0) | |
x_max, y_max = np.max(pts, axis=0) | |
# Ensure coordinates are within image | |
x_min = max(0, x_min) | |
y_min = max(0, y_min) | |
x_max = min(image.shape[1] - 1, x_max) | |
y_max = min(image.shape[0] - 1, y_max) | |
cropped_region = image[y_min:y_max, x_min:x_max] | |
# Only proceed if crop is non-empty | |
if cropped_region.size == 0: | |
continue | |
# Preprocess the cropped region before OCR | |
preprocessed = preprocess_cropped_region(cropped_region) | |
# (Optional) If you want to visualize how the preprocessed patch looks: | |
# cv2.imshow(f"Preprocessed Crop {i}", preprocessed) | |
# cv2.waitKey(0) | |
# Run EasyOCR on the single‐channel (binarized) image | |
ocr_results = reader.readtext(preprocessed) | |
# Concatenate all recognized text fragments | |
detected_text = " ".join([entry[1] for entry in ocr_results]).strip() | |
extracted_texts.append(detected_text) | |
# Put the extracted text above the bounding box (yellow text) | |
cv2.putText( | |
image, | |
detected_text, | |
(x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.5, | |
(0, 255, 255), | |
2, | |
lineType=cv2.LINE_AA, | |
) | |
return image, extracted_texts | |
def main(model_path_3: str, image_path: str): | |
# Load the YOLO OBB model for detection | |
model_3 = YOLO(model_path_3) | |
# Read the input image | |
image = cv2.imread(image_path) | |
if image is None: | |
print("Error: Could not read image at", image_path) | |
sys.exit(1) | |
# Run inference using the YOLO OBB model | |
results = model_3(image) | |
all_extracted_texts = [] | |
# Iterate over each detection result | |
for r in results: | |
if r.obb is not None: | |
image, extracted_texts = draw_obb(image, r.obb) | |
all_extracted_texts.extend(extracted_texts) | |
# Print class info & OCR results to console | |
for i, class_id in enumerate(r.obb.cls.cpu().numpy()): | |
class_name = r.names[int(class_id)] | |
print(f"Detected class ID: {class_id}, Class name: {class_name}") | |
for idx, text in enumerate(extracted_texts): | |
print(f"OCR Extracted Text {idx + 1}: {text}") | |
return image, all_extracted_texts | |
if __name__ == "__main__": | |
# Replace these with your actual paths | |
yolo_weights = "Models/Remaining_tests_model.pt" | |
test_image = "test_images/HV_PD/11.png" | |
output_image, texts = main(yolo_weights, test_image) | |