File size: 7,544 Bytes
1d241a3
 
 
 
 
 
 
 
 
 
 
 
33f07f1
1d241a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import cv2
import numpy as np
import onnxruntime as ort
import pytesseract
from PIL import Image
import gradio as gr
import torchvision
from huggingface_hub import hf_hub_download


app_title = "License Plate Object Detection"
#model = ["ml-debi/yolov8_license_plate_detection"]
model_path = "./best.onnx"

def build_tesseract_options(psm=7):
		# tell Tesseract to only OCR alphanumeric characters
		alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
		options = "-c tessedit_char_whitelist={}".format(alphanumeric)
		# set the PSM mode
		options += " --psm {}".format(psm)
		# return the built options string
		return options

# Cropped image processing
def auto_canny(image, sigma=0.33):
    # compute the median of the single channel pixel intensities
    v = np.median(image)
 
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    edged = cv2.Canny(image, lower, upper)
 
    # return the edged image
    return edged



def ocr_image_process(img, sigma, block_size, constant):
    # If the input is a numpy array, convert it to a PIL Image
    if isinstance(img, np.ndarray):
        img = Image.fromarray(img)

    # Convert the PIL Image back to a numpy array if necessary
    if isinstance(img, Image.Image):
        img = np.array(img)
    
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    thresh_inv = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, int(block_size), int(constant)) #41, 1
    edges = auto_canny(thresh_inv, sigma)
    ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    img_area = img.shape[0]*img.shape[1]
    # Create a blank white image
    mask = np.ones(img.shape, dtype="uint8") * 255

    for i, ctr in enumerate(sorted_ctrs):
        x, y, w, h = cv2.boundingRect(ctr)
        roi_area = w*h
        roi_ratio = roi_area/img_area
        if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
            if ((h>1.2*w) and (3*w>=h)):
                # Draw filled rectangle (mask) on the mask image
                cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)

    # Bitwise-or input image and mask to get result
    img = cv2.bitwise_or(img, mask)
    # Convert the image to grayscale (if it isn't already)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    return img


def get_detections(image_path, size, ort_session):
    """
    Function to get detections from the model.
    """
    # Check if image_path is a string (indicating a file path)
    if isinstance(image_path, str):
        # Check if the image is a PNG
        if image_path.lower().endswith('.png'):
            # Open the image file
            img = Image.open(image_path)
            # Convert the image to RGB (removes the alpha channel)
            rgb_img = img.convert('RGB')
            # Create a new file name by replacing .png with .jpg
            jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
            # Save the RGB image as a JPG
            rgb_img.save(jpg_image_path)
            # Update image_path to point to the new JPG image
            image_path = jpg_image_path

        image = Image.open(image_path)
    # Check if image_path is a NumPy array
    elif isinstance(image_path, np.ndarray):
        image = Image.fromarray(image_path)
    else:
        raise ValueError(
            "image_path must be a file path (str) or a NumPy array.")

    scale_x = image.width / size
    scale_y = image.height / size
    resized_image = image.resize((size, size))
    transform = torchvision.transforms.ToTensor()
    input_tensor = transform(resized_image).unsqueeze(0)
    outputs = ort_session.run(None, {'images': input_tensor.numpy()})
    return image, outputs, scale_x, scale_y


def non_maximum_supression(outputs, min_confidence):
    """
    Function to apply non-maximum suppression.
    """
    boxes = outputs[0][0]
    confidences = boxes[4]
    max_confidence_index = np.argmax(confidences)
    if confidences[max_confidence_index] > min_confidence:
        return boxes[:, max_confidence_index]
    else:
        return None


def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
    """
    Function to draw bounding boxes and apply OCR.
    """
    x, y, w, h, c = boxes
    x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
    x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
    license_plate_image = image.crop((x_min, y_min, x_max, y_max))
    processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)

    if ocr == "easyocr":
        import easyocr
        reader = easyocr.Reader(['en'])
        result = reader.readtext(processed_cropped_image)
        try:
            license_plate_text = str.upper(result[0][1])
        except IndexError:
            license_plate_text = "No result found"
        print(license_plate_text)
    else:
        options = build_tesseract_options(7)
        license_plate_text = pytesseract.image_to_string(
            processed_cropped_image,
            config=options)
        print(license_plate_text)
    # Calculate the font scale based on image size
    font_scale = 0.001 * max(image.size)

    image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
    #cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
    cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)

    return image, license_plate_image, processed_cropped_image, license_plate_text


def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
    """
    Function to get YOLO predictions.
    """
    image, outputs, scale_x, scale_y = get_detections(
        image_path, size, ort_session)
    boxes = non_maximum_supression(outputs, min_confidence)
    result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
        image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
    return result_img, license_plate_image, processed_cropped_image, license_plate_text


def predict(image, ocr, sigma, block_size, constant, min_confidence):
    
    size = 640
    ort_session = ort.InferenceSession(model_path)
    
    result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
            image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)

    return result_img, processed_cropped_image, license_plate_text


# Add output license plate text, and add examples and description
iface = gr.Interface(
    fn=predict,
    inputs=[
        "image", 
        gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
        gr.Number(value=41, label='Block Size for Adaptive Threshold'),
        gr.Number(value=1, label='Constant for Adaptive Threshold'),
        gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
    ],
    outputs=[
        gr.Image(label="Predicted image"), 
        gr.Image(label="Processed license plate image"), 
        gr.Textbox(label="Predicted license plate number")
    ]
)
iface.launch()