Spaces:

ml-debi
/

yolo-license-plate-detection

Runtime error

File size: 7,544 Bytes

import cv2
import numpy as np
import onnxruntime as ort
import pytesseract
from PIL import Image
import gradio as gr
import torchvision
from huggingface_hub import hf_hub_download


app_title = "License Plate Object Detection"
#model = ["ml-debi/yolov8_license_plate_detection"]
model_path = "./best.onnx"

def build_tesseract_options(psm=7):
		# tell Tesseract to only OCR alphanumeric characters
		alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
		options = "-c tessedit_char_whitelist={}".format(alphanumeric)
		# set the PSM mode
		options += " --psm {}".format(psm)
		# return the built options string
		return options

# Cropped image processing
def auto_canny(image, sigma=0.33):
    # compute the median of the single channel pixel intensities
    v = np.median(image)
 
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    edged = cv2.Canny(image, lower, upper)
 
    # return the edged image
    return edged



def ocr_image_process(img, sigma, block_size, constant):
    # If the input is a numpy array, convert it to a PIL Image
    if isinstance(img, np.ndarray):
        img = Image.fromarray(img)

    # Convert the PIL Image back to a numpy array if necessary
    if isinstance(img, Image.Image):
        img = np.array(img)
    
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, int(block_size), int(constant)) #41, 1
    edges = auto_canny(thresh_inv, sigma)
    ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    img_area = img.shape[0]*img.shape[1]
    # Create a blank white image
    mask = np.ones(img.shape, dtype="uint8") * 255

    for i, ctr in enumerate(sorted_ctrs):
        x, y, w, h = cv2.boundingRect(ctr)
        roi_area = w*h
        roi_ratio = roi_area/img_area
        if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
            if ((h>1.2*w) and (3*w>=h)):
                # Draw filled rectangle (mask) on the mask image
                cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)

    # Bitwise-or input image and mask to get result
    img = cv2.bitwise_or(img, mask)
    # Convert the image to grayscale (if it isn't already)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    return img


def get_detections(image_path, size, ort_session):
    """
    Function to get detections from the model.
    """
    # Check if image_path is a string (indicating a file path)
    if isinstance(image_path, str):
        # Check if the image is a PNG
        if image_path.lower().endswith('.png'):
            # Open the image file
            img = Image.open(image_path)
            # Convert the image to RGB (removes the alpha channel)
            rgb_img = img.convert('RGB')
            # Create a new file name by replacing .png with .jpg
            jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
            # Save the RGB image as a JPG
            rgb_img.save(jpg_image_path)
            # Update image_path to point to the new JPG image
            image_path = jpg_image_path

        image = Image.open(image_path)
    # Check if image_path is a NumPy array
    elif isinstance(image_path, np.ndarray):
        image = Image.fromarray(image_path)
    else:
        raise ValueError(
            "image_path must be a file path (str) or a NumPy array.")

    scale_x = image.width / size
    scale_y = image.height / size
    resized_image = image.resize((size, size))
    transform = torchvision.transforms.ToTensor()
    input_tensor = transform(resized_image).unsqueeze(0)
    outputs = ort_session.run(None, {'images': input_tensor.numpy()})
    return image, outputs, scale_x, scale_y


def non_maximum_supression(outputs, min_confidence):
    """
    Function to apply non-maximum suppression.
    """
    boxes = outputs[0][0]
    confidences = boxes[4]
    max_confidence_index = np.argmax(confidences)
    if confidences[max_confidence_index] > min_confidence:
        return boxes[:, max_confidence_index]
    else:
        return None


def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
    """
    Function to draw bounding boxes and apply OCR.
    """
    x, y, w, h, c = boxes
    x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
    x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
    license_plate_image = image.crop((x_min, y_min, x_max, y_max))
    processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)

    if ocr == "easyocr":
        import easyocr
        reader = easyocr.Reader(['en'])
        result = reader.readtext(processed_cropped_image)
        try:
            license_plate_text = str.upper(result[0][1])
        except IndexError:
            license_plate_text = "No result found"
        print(license_plate_text)
    else:
        options = build_tesseract_options(7)
        license_plate_text = pytesseract.image_to_string(
            processed_cropped_image,
            config=options)
        print(license_plate_text)
    # Calculate the font scale based on image size
    font_scale = 0.001 * max(image.size)

    image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
    #cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
    cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)

    return image, license_plate_image, processed_cropped_image, license_plate_text


def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
    """
    Function to get YOLO predictions.
    """
    image, outputs, scale_x, scale_y = get_detections(
        image_path, size, ort_session)
    boxes = non_maximum_supression(outputs, min_confidence)
    result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
        image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
    return result_img, license_plate_image, processed_cropped_image, license_plate_text


def predict(image, ocr, sigma, block_size, constant, min_confidence):
    
    size = 640
    ort_session = ort.InferenceSession(model_path)
    
    result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
            image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)

    return result_img, processed_cropped_image, license_plate_text


# Add output license plate text, and add examples and description
iface = gr.Interface(
    fn=predict,
    inputs=[
        "image", 
        gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
        gr.Number(value=41, label='Block Size for Adaptive Threshold'),
        gr.Number(value=1, label='Constant for Adaptive Threshold'),
        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
    ],
    outputs=[
        gr.Image(label="Predicted image"), 
        gr.Image(label="Processed license plate image"), 
        gr.Textbox(label="Predicted license plate number")
    ]
)
iface.launch()