import os import gradio as gr # type: ignore from paddleocr import PaddleOCR # type: ignore from ultralytics import YOLO # type: ignore from pathlib import Path from deep_sort_realtime.deepsort_tracker import DeepSort # type: ignore import cv2 # type: ignore import numpy as np import re from internetarchive import download # type: ignore from tqdm import trange download("anpr_weights", files=["anpr.pt"], verbose=True) # type: ignore download( "anpr_examples_202208", files=["test_image_1.jpg", "test_image_2.jpg", "test_image_3.jpeg", "test_video_1.mp4"], # type: ignore verbose=True, ) paddle = PaddleOCR(lang="en", use_angle_cls=True, show_log=False) model = YOLO(model="./anpr_weights/anpr.pt", task="detect") def detect_plates(src): predictions = model.predict(src, verbose=False) results = [] for prediction in predictions: for box in prediction.boxes: det_confidence = box.conf.item() if det_confidence < 0.6: continue coords = [int(position) for position in (box.xyxy.view(1, 4)).tolist()[0]] results.append({"coords": coords, "det_conf": det_confidence}) return results def crop(img, coords): cropped = img[coords[1] : coords[3], coords[0] : coords[2]] return cropped def preprocess_image(src): normalize = cv2.normalize( src, np.zeros((src.shape[0], src.shape[1])), 0, 255, cv2.NORM_MINMAX ) denoise = cv2.fastNlMeansDenoisingColored( normalize, h=10, hColor=10, templateWindowSize=7, searchWindowSize=15 ) grayscale = cv2.cvtColor(denoise, cv2.COLOR_BGR2GRAY) threshold = cv2.threshold(grayscale, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] return threshold def ocr_plate(src): # Preprocess the image for better OCR results preprocessed = preprocess_image(src) # OCR the preprocessed image results = paddle.ocr(preprocessed, det=False, cls=True) # Get the best OCR result plate_text, ocr_confidence = max( results, key=lambda ocr_prediction: max( ocr_prediction, key=lambda ocr_prediction_result: ocr_prediction_result[1], # type: ignore ), )[0] # Filter out anything but uppercase letters, digits, hypens and whitespace. # Also, remove hypens and whitespaces at the first and last positions plate_text_filtered = re.sub(r"[^A-Z0-9- ]", "", plate_text).strip("- ") return {"plate": plate_text_filtered, "ocr_conf": ocr_confidence} def ocr_plates(src, det_predictions): results = [] for det_prediction in det_predictions: plate_region = crop(src, det_prediction["coords"]) ocr_prediction = ocr_plate(plate_region) results.append(ocr_prediction) return results def plot_box(img, coords, label=None, color=[0, 150, 255], line_thickness=3): # Plots box on image c1, c2 = (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3])) cv2.rectangle(img, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA) # Plots label on image, if exists if label: tf = max(line_thickness - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=line_thickness / 3, thickness=tf)[ 0 ] c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText( img, label, (c1[0], c1[1] - 2), 0, line_thickness / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, ) def get_plates(src): det_predictions = detect_plates(src) ocr_predictions = ocr_plates(src, det_predictions) for det_prediction, ocr_prediction in zip(det_predictions, ocr_predictions): plot_box(src, det_prediction["coords"], ocr_prediction["plate"]) return src, det_predictions, ocr_predictions def predict_image(src): detected_image, det_predictions, ocr_predictions = get_plates(src) return detected_image def predict_image_api(src): detected_image, det_predictions, ocr_predictions = get_plates(src) return ocr_predictions[0]["plate"] def pascal_voc_to_coco(x1y1x2y2): x1, y1, x2, y2 = x1y1x2y2 return [x1, y1, x2 - x1, y2 - y1] def get_best_ocr(preds, rec_conf, ocr_res, track_id): for info in preds: # Check if it is current track id if info["track_id"] == track_id: # Check if the ocr confidence is maximum or not if info["ocr_conf"] < rec_conf: info["ocr_conf"] = rec_conf info["ocr_txt"] = ocr_res else: rec_conf = info["ocr_conf"] ocr_res = info["ocr_txt"] break return preds, rec_conf, ocr_res def predict_video(src): output = f"{Path(src).stem}_detected{Path(src).suffix}" # Create a VideoCapture object video = cv2.VideoCapture(src) # Default resolutions of the frame are obtained. The default resolutions are system dependent. # We convert the resolutions from float to integer. width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = video.get(cv2.CAP_PROP_FPS) frames_total = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) # Define the codec and create VideoWriter object. temp = f"{Path(output).stem}_temp{Path(output).suffix}" export = cv2.VideoWriter( temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height) ) # Intializing tracker tracker = DeepSort() # Initializing some helper variables. preds = [] total_obj = 0 for i in trange(frames_total): ret, frame = video.read() if ret is True: # Run the ANPR algorithm det_predictions = detect_plates(frame) # Convert Pascal VOC detections to COCO bboxes = list( map( lambda bbox: pascal_voc_to_coco(bbox), [det_prediction["coords"] for det_prediction in det_predictions], ) ) if len(bboxes) > 0: # Storing all the required info in a list. detections = [ (bbox, score, "number_plate") for bbox, score in zip( bboxes, [ det_prediction["det_conf"] for det_prediction in det_predictions ], ) ] # Applying tracker. # The tracker code flow: kalman filter -> target association(using hungarian algorithm) and appearance descriptor. tracks = tracker.update_tracks(detections, frame=frame) # Checking if tracks exist. for track in tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # Changing track bbox to top left, bottom right coordinates bbox = [int(position) for position in list(track.to_tlbr())] for i in range(len(bbox)): if bbox[i] < 0: bbox[i] = 0 # Cropping the license plate and applying the OCR. plate_region = crop(frame, bbox) ocr_prediction = ocr_plate(plate_region) plate_text, ocr_confidence = ( ocr_prediction["plate"], ocr_prediction["ocr_conf"], ) # Storing the ocr output for corresponding track id. output_frame = { "track_id": track.track_id, "ocr_txt": plate_text, "ocr_conf": ocr_confidence, } # Appending track_id to list only if it does not exist in the list # else looking for the current track in the list and updating the highest confidence of it. if track.track_id not in list( set(pred["track_id"] for pred in preds) ): total_obj += 1 preds.append(output_frame) else: preds, ocr_confidence, plate_text = get_best_ocr( preds, ocr_confidence, plate_text, track.track_id, ) # Plotting the prediction. plot_box( frame, bbox, f"{str(track.track_id)}. {plate_text}", color=[255, 150, 0], ) # Write the frame into the output file export.write(frame) else: break # When everything done, release the video capture and video write objects video.release() export.release() # Compressing the video for smaller size and web compatibility. os.system( f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}" ) os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree") return output with gr.Blocks() as demo: gr.Markdown('###

Automatic Number Plate Recognition

') gr.Markdown( "This AI was trained to detect and recognize number plates on vehicles." ) with gr.Tabs(): with gr.TabItem("Image"): with gr.Row(): image_input = gr.Image() image_output = gr.Image() image_input.upload( predict_image, inputs=[image_input], outputs=[image_output], ) with gr.Row(visible=False): # Prediction API api_image_input = gr.Image() api_prediction_output = gr.Textbox() api_image_input.upload( predict_image_api, inputs=[api_image_input], outputs=[api_prediction_output], api_name="predict", ) gr.Examples( [ ["./anpr_examples_202208/test_image_1.jpg"], ["./anpr_examples_202208/test_image_2.jpg"], ["./anpr_examples_202208/test_image_3.jpeg"], ], [image_input], [image_output], predict_image, cache_examples=True, ) with gr.TabItem("Video"): with gr.Row(): video_input = gr.Video(format="mp4") video_output = gr.Video(format="mp4") video_input.upload( predict_video, inputs=[video_input], outputs=[video_output] ) gr.Examples( [["./anpr_examples_202208/test_video_1.mp4"]], [video_input], [video_output], predict_video, cache_examples=True, ) gr.Markdown("[@itsyoboieltr](https://github.com/itsyoboieltr)") demo.launch()