import cv2 import numpy as np import onnxruntime as ort import pytesseract from PIL import Image import gradio as gr import torchvision from huggingface_hub import hf_hub_download app_title = "License Plate Object Detection" #model = ["ml-debi/yolov8_license_plate_detection"] model_path = "./best.onnx" examples = [["./examples/tesla.jpg"], ["./examples/mazda.jpg"], ["./examples/mercedes.jpg"]] def build_tesseract_options(psm=7): # tell Tesseract to only OCR alphanumeric characters alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" options = "-c tessedit_char_whitelist={}".format(alphanumeric) # set the PSM mode options += " --psm {}".format(psm) # return the built options string return options # Cropped image processing def auto_canny(image, sigma=0.33): # compute the median of the single channel pixel intensities v = np.median(image) # apply automatic Canny edge detection using the computed median lower = int(max(0, (1.0 - sigma) * v)) upper = int(min(255, (1.0 + sigma) * v)) edged = cv2.Canny(image, lower, upper) # return the edged image return edged def ocr_image_process(img, sigma, block_size, constant): # If the input is a numpy array, convert it to a PIL Image if isinstance(img, np.ndarray): img = Image.fromarray(img) # Convert the PIL Image back to a numpy array if necessary if isinstance(img, Image.Image): img = np.array(img) gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) thresh_inv = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, int(block_size) if block_size is not None else 41, int(constant) if constant is not None else 1) edges = auto_canny(thresh_inv, float(sigma) if sigma is not None else 0.33) ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) img_area = img.shape[0]*img.shape[1] # Create a blank white image mask = np.ones(img.shape, dtype="uint8") * 255 for i, ctr in enumerate(sorted_ctrs): x, y, w, h = cv2.boundingRect(ctr) roi_area = w*h roi_ratio = roi_area/img_area if((roi_ratio >= 0.015) and (roi_ratio < 0.09)): if ((h>1.2*w) and (3*w>=h)): # Draw filled rectangle (mask) on the mask image cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1) # Bitwise-or input image and mask to get result img = cv2.bitwise_or(img, mask) # Convert the image to grayscale (if it isn't already) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return img def get_detections(image_path, size, ort_session): """ Function to get detections from the model. """ # Check if image_path is a string (indicating a file path) if isinstance(image_path, str): # Check if the image is a PNG if image_path.lower().endswith('.png'): # Open the image file img = Image.open(image_path) # Convert the image to RGB (removes the alpha channel) rgb_img = img.convert('RGB') # Create a new file name by replacing .png with .jpg jpg_image_path = os.path.splitext(image_path)[0] + '.jpg' # Save the RGB image as a JPG rgb_img.save(jpg_image_path) # Update image_path to point to the new JPG image image_path = jpg_image_path image = Image.open(image_path) # Check if image_path is a NumPy array elif isinstance(image_path, np.ndarray): image = Image.fromarray(image_path) else: raise ValueError( "image_path must be a file path (str) or a NumPy array.") scale_x = image.width / size scale_y = image.height / size resized_image = image.resize((size, size)) transform = torchvision.transforms.ToTensor() input_tensor = transform(resized_image).unsqueeze(0) outputs = ort_session.run(None, {'images': input_tensor.numpy()}) return image, outputs, scale_x, scale_y def non_maximum_supression(outputs, min_confidence): """ Function to apply non-maximum suppression. """ if min_confidence is None: min_confidence = 0.5 boxes = outputs[0][0] confidences = boxes[4] max_confidence_index = np.argmax(confidences) if confidences[max_confidence_index] > min_confidence: return boxes[:, max_confidence_index] else: return None def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr): """ Function to draw bounding boxes and apply OCR. """ x, y, w, h, c = boxes x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y license_plate_image = image.crop((x_min, y_min, x_max, y_max)) processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant) if ocr == "easyocr": import easyocr reader = easyocr.Reader(['en']) result = reader.readtext(processed_cropped_image) try: license_plate_text = str.upper(result[0][1]) except IndexError: license_plate_text = "No result found" print(license_plate_text) else: options = build_tesseract_options(7) license_plate_text = pytesseract.image_to_string( processed_cropped_image, config=options) print(license_plate_text) # Calculate the font scale based on image size font_scale = 0.001 * max(image.size) image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3) #cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2) cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1) return image, license_plate_image, processed_cropped_image, license_plate_text def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr): """ Function to get YOLO predictions. """ image, outputs, scale_x, scale_y = get_detections( image_path, size, ort_session) boxes = non_maximum_supression(outputs, min_confidence) result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings( image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr) return result_img, license_plate_image, processed_cropped_image, license_plate_text def predict(image, ocr, sigma, block_size, constant, min_confidence): size = 640 ort_session = ort.InferenceSession(model_path) result_img, _, processed_cropped_image, license_plate_text = yolo_predictions( image, size, sigma, block_size, constant, min_confidence, ort_session, ocr) return result_img, processed_cropped_image, license_plate_text # Add output license plate text, and add examples and description iface = gr.Interface( fn=predict, inputs=[ "image", gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'), gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'), gr.Number(value=41, label='Block Size for Adaptive Threshold'), gr.Number(value=1, label='Constant for Adaptive Threshold'), gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS') ], outputs=[ gr.Image(label="Predicted image"), gr.Image(label="Processed license plate image"), gr.Textbox(label="Predicted license plate number") ], examples=examples ) iface.launch()