Spaces:

ml-debi
/

yolo-license-plate-detection

Runtime error

App Files Files Community

ml-debi commited on Dec 13, 2023

Commit

1d241a3

•

1 Parent(s): f5e2541

add app

Browse files

Files changed (3) hide show

app.py +198 -0
packages.txt +1 -0
requirements.txt +99 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import cv2
+import numpy as np
+import onnxruntime as ort
+import pytesseract
+from PIL import Image
+import gradio as gr
+import torchvision
+from huggingface_hub import hf_hub_download
+app_title = "License Plate Object Detection"
+#model = ["ml-debi/yolov8_license_plate_detection"]
+def build_tesseract_options(psm=7):
+		# tell Tesseract to only OCR alphanumeric characters
+		alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+		options = "-c tessedit_char_whitelist={}".format(alphanumeric)
+		# set the PSM mode
+		options += " --psm {}".format(psm)
+		# return the built options string
+		return options
+# Cropped image processing
+def auto_canny(image, sigma=0.33):
+    # compute the median of the single channel pixel intensities
+    v = np.median(image)
+    # apply automatic Canny edge detection using the computed median
+    lower = int(max(0, (1.0 - sigma) * v))
+    upper = int(min(255, (1.0 + sigma) * v))
+    edged = cv2.Canny(image, lower, upper)
+    # return the edged image
+    return edged
+def ocr_image_process(img, sigma, block_size, constant):
+    # If the input is a numpy array, convert it to a PIL Image
+    if isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    # Convert the PIL Image back to a numpy array if necessary
+    if isinstance(img, Image.Image):
+        img = np.array(img)
+    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+    thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, int(block_size), int(constant)) #41, 1
+    edges = auto_canny(thresh_inv, sigma)
+    ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
+    img_area = img.shape[0]*img.shape[1]
+    # Create a blank white image
+    mask = np.ones(img.shape, dtype="uint8") * 255
+    for i, ctr in enumerate(sorted_ctrs):
+        x, y, w, h = cv2.boundingRect(ctr)
+        roi_area = w*h
+        roi_ratio = roi_area/img_area
+        if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
+            if ((h>1.2*w) and (3*w>=h)):
+                # Draw filled rectangle (mask) on the mask image
+                cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)
+    # Bitwise-or input image and mask to get result
+    img = cv2.bitwise_or(img, mask)
+    # Convert the image to grayscale (if it isn't already)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    return img
+def get_detections(image_path, size, ort_session):
+    """
+    Function to get detections from the model.
+    """
+    # Check if image_path is a string (indicating a file path)
+    if isinstance(image_path, str):
+        # Check if the image is a PNG
+        if image_path.lower().endswith('.png'):
+            # Open the image file
+            img = Image.open(image_path)
+            # Convert the image to RGB (removes the alpha channel)
+            rgb_img = img.convert('RGB')
+            # Create a new file name by replacing .png with .jpg
+            jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
+            # Save the RGB image as a JPG
+            rgb_img.save(jpg_image_path)
+            # Update image_path to point to the new JPG image
+            image_path = jpg_image_path
+        image = Image.open(image_path)
+    # Check if image_path is a NumPy array
+    elif isinstance(image_path, np.ndarray):
+        image = Image.fromarray(image_path)
+    else:
+        raise ValueError(
+            "image_path must be a file path (str) or a NumPy array.")
+    scale_x = image.width / size
+    scale_y = image.height / size
+    resized_image = image.resize((size, size))
+    transform = torchvision.transforms.ToTensor()
+    input_tensor = transform(resized_image).unsqueeze(0)
+    outputs = ort_session.run(None, {'images': input_tensor.numpy()})
+    return image, outputs, scale_x, scale_y
+def non_maximum_supression(outputs, min_confidence):
+    """
+    Function to apply non-maximum suppression.
+    """
+    boxes = outputs[0][0]
+    confidences = boxes[4]
+    max_confidence_index = np.argmax(confidences)
+    if confidences[max_confidence_index] > min_confidence:
+        return boxes[:, max_confidence_index]
+    else:
+        return None
+def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
+    """
+    Function to draw bounding boxes and apply OCR.
+    """
+    x, y, w, h, c = boxes
+    x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
+    x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
+    license_plate_image = image.crop((x_min, y_min, x_max, y_max))
+    processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)
+    if ocr == "easyocr":
+        import easyocr
+        reader = easyocr.Reader(['en'])
+        result = reader.readtext(processed_cropped_image)
+        try:
+            license_plate_text = str.upper(result[0][1])
+        except IndexError:
+            license_plate_text = "No result found"
+        print(license_plate_text)
+    else:
+        options = build_tesseract_options(7)
+        license_plate_text = pytesseract.image_to_string(
+            processed_cropped_image,
+            config=options)
+        print(license_plate_text)
+    # Calculate the font scale based on image size
+    font_scale = 0.001 * max(image.size)
+    image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
+    #cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
+    cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)
+    return image, license_plate_image, processed_cropped_image, license_plate_text
+def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
+    """
+    Function to get YOLO predictions.
+    """
+    image, outputs, scale_x, scale_y = get_detections(
+        image_path, size, ort_session)
+    boxes = non_maximum_supression(outputs, min_confidence)
+    result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
+        image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
+    return result_img, license_plate_image, processed_cropped_image, license_plate_text
+def predict(image, ocr, sigma, block_size, constant, min_confidence):
+    size = 640
+    model_path = "ml-debi/yolov8_license_plate_detection"
+    ort_session = ort.InferenceSession(model_path)
+    result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
+            image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)
+    return result_img, processed_cropped_image, license_plate_text
+# Add output license plate text, and add examples and description
+iface = gr.Interface(
+    fn=predict,
+    inputs=[
+        "image",
+        gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
+        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
+        gr.Number(value=41, label='Block Size for Adaptive Threshold'),
+        gr.Number(value=1, label='Constant for Adaptive Threshold'),
+        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
+    ],
+    outputs=[
+        gr.Image(label="Predicted image"),
+        gr.Image(label="Processed license plate image"),
+        gr.Textbox(label="Predicted license plate number")
+    ]
+)
+iface.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ tesseract-ocr

requirements.txt ADDED Viewed

	@@ -0,0 +1,99 @@

+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==3.7.1
+astroid==3.0.1
+attrs==23.1.0
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+coloredlogs==15.0.1
+contourpy==1.1.1
+cycler==0.12.1
+dill==0.3.7
+easyocr==1.7.1
+exceptiongroup==1.2.0
+fastapi==0.104.1
+ffmpy==0.3.1
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.46.0
+fsspec==2023.12.1
+gradio==4.8.0
+gradio_client==0.7.1
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.2
+huggingface-hub==0.19.4
+humanfriendly==10.0
+idna==3.6
+imageio==2.33.0
+importlib-resources==6.1.1
+isort==5.12.0
+Jinja2==3.1.2
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+kiwisolver==1.4.5
+lazy_loader==0.3
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.7.4
+mccabe==0.7.0
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.1
+ninja==1.11.1.1
+numpy==1.24.4
+onnxruntime==1.16.3
+opencv-python==4.8.1.78
+opencv-python-headless==4.8.1.78
+orjson==3.9.10
+packaging==23.2
+pandas==2.0.3
+Pillow==10.1.0
+pkgutil_resolve_name==1.3.10
+platformdirs==4.1.0
+protobuf==4.25.1
+pyclipper==1.3.0.post5
+pydantic==2.5.2
+pydantic_core==2.14.5
+pydub==0.25.1
+Pygments==2.17.2
+pylint==3.0.2
+pyparsing==3.1.1
+pyreadline3==3.4.1
+pytesseract==0.3.10
+python-bidi==0.4.2
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyWavelets==1.4.1
+PyYAML==6.0.1
+referencing==0.32.0
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.13.2
+scikit-image==0.21.0
+scipy==1.10.1
+semantic-version==2.10.0
+shapely==2.0.2
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.0
+starlette==0.27.0
+sympy==1.12
+tifffile==2023.7.10
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.0
+torch==2.1.1
+torchvision==0.16.1
+tqdm==4.66.1
+typer==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+urllib3==2.1.0
+uvicorn==0.24.0.post1
+websockets==11.0.3
+zipp==3.17.0