ml-debi's picture
add app
1d241a3
raw
history blame
No virus
7.58 kB
import cv2
import numpy as np
import onnxruntime as ort
import pytesseract
from PIL import Image
import gradio as gr
import torchvision
from huggingface_hub import hf_hub_download
app_title = "License Plate Object Detection"
#model = ["ml-debi/yolov8_license_plate_detection"]
def build_tesseract_options(psm=7):
# tell Tesseract to only OCR alphanumeric characters
alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
options = "-c tessedit_char_whitelist={}".format(alphanumeric)
# set the PSM mode
options += " --psm {}".format(psm)
# return the built options string
return options
# Cropped image processing
def auto_canny(image, sigma=0.33):
# compute the median of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
edged = cv2.Canny(image, lower, upper)
# return the edged image
return edged
def ocr_image_process(img, sigma, block_size, constant):
# If the input is a numpy array, convert it to a PIL Image
if isinstance(img, np.ndarray):
img = Image.fromarray(img)
# Convert the PIL Image back to a numpy array if necessary
if isinstance(img, Image.Image):
img = np.array(img)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, int(block_size), int(constant)) #41, 1
edges = auto_canny(thresh_inv, sigma)
ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
img_area = img.shape[0]*img.shape[1]
# Create a blank white image
mask = np.ones(img.shape, dtype="uint8") * 255
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi_area = w*h
roi_ratio = roi_area/img_area
if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
if ((h>1.2*w) and (3*w>=h)):
# Draw filled rectangle (mask) on the mask image
cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)
# Bitwise-or input image and mask to get result
img = cv2.bitwise_or(img, mask)
# Convert the image to grayscale (if it isn't already)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
def get_detections(image_path, size, ort_session):
"""
Function to get detections from the model.
"""
# Check if image_path is a string (indicating a file path)
if isinstance(image_path, str):
# Check if the image is a PNG
if image_path.lower().endswith('.png'):
# Open the image file
img = Image.open(image_path)
# Convert the image to RGB (removes the alpha channel)
rgb_img = img.convert('RGB')
# Create a new file name by replacing .png with .jpg
jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
# Save the RGB image as a JPG
rgb_img.save(jpg_image_path)
# Update image_path to point to the new JPG image
image_path = jpg_image_path
image = Image.open(image_path)
# Check if image_path is a NumPy array
elif isinstance(image_path, np.ndarray):
image = Image.fromarray(image_path)
else:
raise ValueError(
"image_path must be a file path (str) or a NumPy array.")
scale_x = image.width / size
scale_y = image.height / size
resized_image = image.resize((size, size))
transform = torchvision.transforms.ToTensor()
input_tensor = transform(resized_image).unsqueeze(0)
outputs = ort_session.run(None, {'images': input_tensor.numpy()})
return image, outputs, scale_x, scale_y
def non_maximum_supression(outputs, min_confidence):
"""
Function to apply non-maximum suppression.
"""
boxes = outputs[0][0]
confidences = boxes[4]
max_confidence_index = np.argmax(confidences)
if confidences[max_confidence_index] > min_confidence:
return boxes[:, max_confidence_index]
else:
return None
def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
"""
Function to draw bounding boxes and apply OCR.
"""
x, y, w, h, c = boxes
x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
license_plate_image = image.crop((x_min, y_min, x_max, y_max))
processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)
if ocr == "easyocr":
import easyocr
reader = easyocr.Reader(['en'])
result = reader.readtext(processed_cropped_image)
try:
license_plate_text = str.upper(result[0][1])
except IndexError:
license_plate_text = "No result found"
print(license_plate_text)
else:
options = build_tesseract_options(7)
license_plate_text = pytesseract.image_to_string(
processed_cropped_image,
config=options)
print(license_plate_text)
# Calculate the font scale based on image size
font_scale = 0.001 * max(image.size)
image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
#cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)
return image, license_plate_image, processed_cropped_image, license_plate_text
def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
"""
Function to get YOLO predictions.
"""
image, outputs, scale_x, scale_y = get_detections(
image_path, size, ort_session)
boxes = non_maximum_supression(outputs, min_confidence)
result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
return result_img, license_plate_image, processed_cropped_image, license_plate_text
def predict(image, ocr, sigma, block_size, constant, min_confidence):
size = 640
model_path = "ml-debi/yolov8_license_plate_detection"
ort_session = ort.InferenceSession(model_path)
result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)
return result_img, processed_cropped_image, license_plate_text
# Add output license plate text, and add examples and description
iface = gr.Interface(
fn=predict,
inputs=[
"image",
gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
gr.Number(value=41, label='Block Size for Adaptive Threshold'),
gr.Number(value=1, label='Constant for Adaptive Threshold'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
],
outputs=[
gr.Image(label="Predicted image"),
gr.Image(label="Processed license plate image"),
gr.Textbox(label="Predicted license plate number")
]
)
iface.launch()