Spaces:

Sensei13k
/

Industrial_meter_reading

Runtime error

App Files Files Community

Industrial_meter_reading / HV_PD.py

Sensei13k

Upload 8 files

e4f8fe4 verified 4 months ago

raw

history blame contribute delete

4.2 kB

	import sys
	import cv2
	import numpy as np
	import easyocr
	from ultralytics import YOLO

	# Initialize EasyOCR reader (you can set gpu=True if you have CUDA)
	reader = easyocr.Reader(['en'], gpu=False)


	def preprocess_cropped_region(cropped_bgr: np.ndarray) -> np.ndarray:
	# 1) Convert to grayscale
	gray = cv2.cvtColor(cropped_bgr, cv2.COLOR_BGR2GRAY)

	# 2) Upscale by 2×
	h, w = gray.shape
	gray_up = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_LINEAR)

	# 4) Apply Otsu's threshold → binary
	_, thresh = cv2.threshold(
	gray_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
	)

	return thresh # single‐channel (0/255) image


	def draw_obb(image: np.ndarray, obb) -> (np.ndarray, list):
	"""
	- Draws each OBB polygon on `image`
	- Crops the region inside the OBB, preprocesses it, runs EasyOCR
	- Writes the extracted text back onto `image` just above the box
	- Returns (modified_image, list_of_extracted_texts)
	"""
	boxes = obb.xyxyxyxy.cpu().numpy() # shape: (N, 8)
	extracted_texts = []

	for i, box in enumerate(boxes):
	# Reshape into 4 points: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
	pts = box.reshape(4, 2).astype(np.int32)

	# Draw the bounding polygon (green)
	cv2.polylines(image, [pts], isClosed=True, color=(0, 255, 0), thickness=2)

	# Compute axis‐aligned crop coordinates
	x_min, y_min = np.min(pts, axis=0)
	x_max, y_max = np.max(pts, axis=0)

	# Ensure coordinates are within image
	x_min = max(0, x_min)
	y_min = max(0, y_min)
	x_max = min(image.shape[1] - 1, x_max)
	y_max = min(image.shape[0] - 1, y_max)

	cropped_region = image[y_min:y_max, x_min:x_max]

	# Only proceed if crop is non-empty
	if cropped_region.size == 0:
	continue

	# Preprocess the cropped region before OCR
	preprocessed = preprocess_cropped_region(cropped_region)

	# (Optional) If you want to visualize how the preprocessed patch looks:
	# cv2.imshow(f"Preprocessed Crop {i}", preprocessed)
	# cv2.waitKey(0)

	# Run EasyOCR on the single‐channel (binarized) image
	ocr_results = reader.readtext(preprocessed)

	# Concatenate all recognized text fragments
	detected_text = " ".join([entry[1] for entry in ocr_results]).strip()
	extracted_texts.append(detected_text)

	# Put the extracted text above the bounding box (yellow text)
	cv2.putText(
	image,
	detected_text,
	(x_min, y_min - 10 if y_min - 10 > 10 else y_min + 20),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.5,
	(0, 255, 255),
	2,
	lineType=cv2.LINE_AA,
	)

	return image, extracted_texts


	def main(model_path_3: str, image_path: str):
	# Load the YOLO OBB model for detection
	model_3 = YOLO(model_path_3)

	# Read the input image
	image = cv2.imread(image_path)
	if image is None:
	print("Error: Could not read image at", image_path)
	sys.exit(1)

	# Run inference using the YOLO OBB model
	results = model_3(image)

	all_extracted_texts = []

	# Iterate over each detection result
	for r in results:
	if r.obb is not None:
	image, extracted_texts = draw_obb(image, r.obb)
	all_extracted_texts.extend(extracted_texts)

	# Print class info & OCR results to console
	for i, class_id in enumerate(r.obb.cls.cpu().numpy()):
	class_name = r.names[int(class_id)]
	print(f"Detected class ID: {class_id}, Class name: {class_name}")

	for idx, text in enumerate(extracted_texts):
	print(f"OCR Extracted Text {idx + 1}: {text}")

	return image, all_extracted_texts


	if __name__ == "__main__":
	# Replace these with your actual paths
	yolo_weights = "Models/Remaining_tests_model.pt"
	test_image = "test_images/HV_PD/11.png"

	output_image, texts = main(yolo_weights, test_image)