Spaces:
Runtime error
Runtime error
File size: 7,544 Bytes
1d241a3 33f07f1 1d241a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import cv2
import numpy as np
import onnxruntime as ort
import pytesseract
from PIL import Image
import gradio as gr
import torchvision
from huggingface_hub import hf_hub_download
app_title = "License Plate Object Detection"
#model = ["ml-debi/yolov8_license_plate_detection"]
model_path = "./best.onnx"
def build_tesseract_options(psm=7):
# tell Tesseract to only OCR alphanumeric characters
alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
options = "-c tessedit_char_whitelist={}".format(alphanumeric)
# set the PSM mode
options += " --psm {}".format(psm)
# return the built options string
return options
# Cropped image processing
def auto_canny(image, sigma=0.33):
# compute the median of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
edged = cv2.Canny(image, lower, upper)
# return the edged image
return edged
def ocr_image_process(img, sigma, block_size, constant):
# If the input is a numpy array, convert it to a PIL Image
if isinstance(img, np.ndarray):
img = Image.fromarray(img)
# Convert the PIL Image back to a numpy array if necessary
if isinstance(img, Image.Image):
img = np.array(img)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, int(block_size), int(constant)) #41, 1
edges = auto_canny(thresh_inv, sigma)
ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
img_area = img.shape[0]*img.shape[1]
# Create a blank white image
mask = np.ones(img.shape, dtype="uint8") * 255
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi_area = w*h
roi_ratio = roi_area/img_area
if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
if ((h>1.2*w) and (3*w>=h)):
# Draw filled rectangle (mask) on the mask image
cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)
# Bitwise-or input image and mask to get result
img = cv2.bitwise_or(img, mask)
# Convert the image to grayscale (if it isn't already)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
def get_detections(image_path, size, ort_session):
"""
Function to get detections from the model.
"""
# Check if image_path is a string (indicating a file path)
if isinstance(image_path, str):
# Check if the image is a PNG
if image_path.lower().endswith('.png'):
# Open the image file
img = Image.open(image_path)
# Convert the image to RGB (removes the alpha channel)
rgb_img = img.convert('RGB')
# Create a new file name by replacing .png with .jpg
jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
# Save the RGB image as a JPG
rgb_img.save(jpg_image_path)
# Update image_path to point to the new JPG image
image_path = jpg_image_path
image = Image.open(image_path)
# Check if image_path is a NumPy array
elif isinstance(image_path, np.ndarray):
image = Image.fromarray(image_path)
else:
raise ValueError(
"image_path must be a file path (str) or a NumPy array.")
scale_x = image.width / size
scale_y = image.height / size
resized_image = image.resize((size, size))
transform = torchvision.transforms.ToTensor()
input_tensor = transform(resized_image).unsqueeze(0)
outputs = ort_session.run(None, {'images': input_tensor.numpy()})
return image, outputs, scale_x, scale_y
def non_maximum_supression(outputs, min_confidence):
"""
Function to apply non-maximum suppression.
"""
boxes = outputs[0][0]
confidences = boxes[4]
max_confidence_index = np.argmax(confidences)
if confidences[max_confidence_index] > min_confidence:
return boxes[:, max_confidence_index]
else:
return None
def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
"""
Function to draw bounding boxes and apply OCR.
"""
x, y, w, h, c = boxes
x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
license_plate_image = image.crop((x_min, y_min, x_max, y_max))
processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)
if ocr == "easyocr":
import easyocr
reader = easyocr.Reader(['en'])
result = reader.readtext(processed_cropped_image)
try:
license_plate_text = str.upper(result[0][1])
except IndexError:
license_plate_text = "No result found"
print(license_plate_text)
else:
options = build_tesseract_options(7)
license_plate_text = pytesseract.image_to_string(
processed_cropped_image,
config=options)
print(license_plate_text)
# Calculate the font scale based on image size
font_scale = 0.001 * max(image.size)
image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
#cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)
return image, license_plate_image, processed_cropped_image, license_plate_text
def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
"""
Function to get YOLO predictions.
"""
image, outputs, scale_x, scale_y = get_detections(
image_path, size, ort_session)
boxes = non_maximum_supression(outputs, min_confidence)
result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
return result_img, license_plate_image, processed_cropped_image, license_plate_text
def predict(image, ocr, sigma, block_size, constant, min_confidence):
size = 640
ort_session = ort.InferenceSession(model_path)
result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)
return result_img, processed_cropped_image, license_plate_text
# Add output license plate text, and add examples and description
iface = gr.Interface(
fn=predict,
inputs=[
"image",
gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
gr.Number(value=41, label='Block Size for Adaptive Threshold'),
gr.Number(value=1, label='Constant for Adaptive Threshold'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
],
outputs=[
gr.Image(label="Predicted image"),
gr.Image(label="Processed license plate image"),
gr.Textbox(label="Predicted license plate number")
]
)
iface.launch()
|