Spaces:

itsyoboieltr
/

anpr

Build error

anpr / app.py

itsyoboieltr

fix: minor changes

6519fca about 1 year ago

No virus

11.6 kB

	import os
	import gradio as gr # type: ignore
	from paddleocr import PaddleOCR # type: ignore
	from ultralytics import YOLO # type: ignore
	from pathlib import Path
	from deep_sort_realtime.deepsort_tracker import DeepSort # type: ignore
	import cv2 # type: ignore
	import numpy as np
	import re
	from internetarchive import download # type: ignore
	from tqdm import trange

	download("anpr_weights", files=["anpr.pt"], verbose=True) # type: ignore

	download(
	"anpr_examples_202208",
	files=["test_image_1.jpg", "test_image_2.jpg", "test_image_3.jpeg", "test_video_1.mp4"], # type: ignore
	verbose=True,
	)

	paddle = PaddleOCR(lang="en", use_angle_cls=True, show_log=False)

	model = YOLO(model="./anpr_weights/anpr.pt", task="detect")


	def detect_plates(src):
	predictions = model.predict(src, verbose=False)

	results = []

	for prediction in predictions:
	for box in prediction.boxes:
	det_confidence = box.conf.item()
	if det_confidence < 0.6:
	continue
	coords = [int(position) for position in (box.xyxy.view(1, 4)).tolist()[0]]
	results.append({"coords": coords, "det_conf": det_confidence})

	return results


	def crop(img, coords):
	cropped = img[coords[1] : coords[3], coords[0] : coords[2]]
	return cropped


	def preprocess_image(src):
	normalize = cv2.normalize(
	src, np.zeros((src.shape[0], src.shape[1])), 0, 255, cv2.NORM_MINMAX
	)
	denoise = cv2.fastNlMeansDenoisingColored(
	normalize, h=10, hColor=10, templateWindowSize=7, searchWindowSize=15
	)
	grayscale = cv2.cvtColor(denoise, cv2.COLOR_BGR2GRAY)
	threshold = cv2.threshold(grayscale, 0, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)[1]
	return threshold


	def ocr_plate(src):
	# Preprocess the image for better OCR results
	preprocessed = preprocess_image(src)

	# OCR the preprocessed image
	results = paddle.ocr(preprocessed, det=False, cls=True)

	# Get the best OCR result
	plate_text, ocr_confidence = max(
	results,
	key=lambda ocr_prediction: max(
	ocr_prediction,
	key=lambda ocr_prediction_result: ocr_prediction_result[1], # type: ignore
	),
	)[0]

	# Filter out anything but uppercase letters, digits, hypens and whitespace.
	# Also, remove hypens and whitespaces at the first and last positions
	plate_text_filtered = re.sub(r"[^A-Z0-9- ]", "", plate_text).strip("- ")

	return {"plate": plate_text_filtered, "ocr_conf": ocr_confidence}


	def ocr_plates(src, det_predictions):
	results = []

	for det_prediction in det_predictions:
	plate_region = crop(src, det_prediction["coords"])
	ocr_prediction = ocr_plate(plate_region)
	results.append(ocr_prediction)

	return results


	def plot_box(img, coords, label=None, color=[0, 150, 255], line_thickness=3):
	# Plots box on image
	c1, c2 = (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3]))
	cv2.rectangle(img, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA)
	# Plots label on image, if exists
	if label:
	tf = max(line_thickness - 1, 1) # font thickness
	t_size = cv2.getTextSize(label, 0, fontScale=line_thickness / 3, thickness=tf)[
	0
	]
	c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
	cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
	cv2.putText(
	img,
	label,
	(c1[0], c1[1] - 2),
	0,
	line_thickness / 3,
	[225, 255, 255],
	thickness=tf,
	lineType=cv2.LINE_AA,
	)


	def get_plates(src):
	det_predictions = detect_plates(src)
	ocr_predictions = ocr_plates(src, det_predictions)

	for det_prediction, ocr_prediction in zip(det_predictions, ocr_predictions):
	plot_box(src, det_prediction["coords"], ocr_prediction["plate"])

	return src, det_predictions, ocr_predictions


	def predict_image(src):
	detected_image, det_predictions, ocr_predictions = get_plates(src)
	return detected_image


	def predict_image_api(src):
	detected_image, det_predictions, ocr_predictions = get_plates(src)
	return ocr_predictions[0]["plate"]


	def pascal_voc_to_coco(x1y1x2y2):
	x1, y1, x2, y2 = x1y1x2y2
	return [x1, y1, x2 - x1, y2 - y1]


	def get_best_ocr(preds, rec_conf, ocr_res, track_id):
	for info in preds:
	# Check if it is current track id
	if info["track_id"] == track_id:
	# Check if the ocr confidence is maximum or not
	if info["ocr_conf"] < rec_conf:
	info["ocr_conf"] = rec_conf
	info["ocr_txt"] = ocr_res
	else:
	rec_conf = info["ocr_conf"]
	ocr_res = info["ocr_txt"]
	break
	return preds, rec_conf, ocr_res


	def predict_video(src):
	output = f"{Path(src).stem}_detected{Path(src).suffix}"

	# Create a VideoCapture object
	video = cv2.VideoCapture(src)

	# Default resolutions of the frame are obtained. The default resolutions are system dependent.
	# We convert the resolutions from float to integer.
	width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = video.get(cv2.CAP_PROP_FPS)
	frames_total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

	# Define the codec and create VideoWriter object.
	temp = f"{Path(output).stem}_temp{Path(output).suffix}"
	export = cv2.VideoWriter(
	temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
	)

	# Intializing tracker
	tracker = DeepSort()

	# Initializing some helper variables.
	preds = []
	total_obj = 0

	for i in trange(frames_total):
	ret, frame = video.read()
	if ret is True:
	# Run the ANPR algorithm
	det_predictions = detect_plates(frame)
	# Convert Pascal VOC detections to COCO
	bboxes = list(
	map(
	lambda bbox: pascal_voc_to_coco(bbox),
	[det_prediction["coords"] for det_prediction in det_predictions],
	)
	)

	if len(bboxes) > 0:
	# Storing all the required info in a list.
	detections = [
	(bbox, score, "number_plate")
	for bbox, score in zip(
	bboxes,
	[
	det_prediction["det_conf"]
	for det_prediction in det_predictions
	],
	)
	]

	# Applying tracker.
	# The tracker code flow: kalman filter -> target association(using hungarian algorithm) and appearance descriptor.
	tracks = tracker.update_tracks(detections, frame=frame)

	# Checking if tracks exist.
	for track in tracks:
	if not track.is_confirmed() or track.time_since_update > 1:
	continue

	# Changing track bbox to top left, bottom right coordinates
	bbox = [int(position) for position in list(track.to_tlbr())]

	for i in range(len(bbox)):
	if bbox[i] < 0:
	bbox[i] = 0

	# Cropping the license plate and applying the OCR.
	plate_region = crop(frame, bbox)
	ocr_prediction = ocr_plate(plate_region)
	plate_text, ocr_confidence = (
	ocr_prediction["plate"],
	ocr_prediction["ocr_conf"],
	)

	# Storing the ocr output for corresponding track id.
	output_frame = {
	"track_id": track.track_id,
	"ocr_txt": plate_text,
	"ocr_conf": ocr_confidence,
	}

	# Appending track_id to list only if it does not exist in the list
	# else looking for the current track in the list and updating the highest confidence of it.
	if track.track_id not in list(
	set(pred["track_id"] for pred in preds)
	):
	total_obj += 1
	preds.append(output_frame)
	else:
	preds, ocr_confidence, plate_text = get_best_ocr(
	preds,
	ocr_confidence,
	plate_text,
	track.track_id,
	)

	# Plotting the prediction.
	plot_box(
	frame,
	bbox,
	f"{str(track.track_id)}. {plate_text}",
	color=[255, 150, 0],
	)

	# Write the frame into the output file
	export.write(frame)
	else:
	break

	# When everything done, release the video capture and video write objects
	video.release()
	export.release()

	# Compressing the video for smaller size and web compatibility.
	os.system(
	f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
	)
	os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
	return output


	with gr.Blocks() as demo:
	gr.Markdown('### <h3 align="center">Automatic Number Plate Recognition</h3>')
	gr.Markdown(
	"This AI was trained to detect and recognize number plates on vehicles."
	)
	with gr.Tabs():
	with gr.TabItem("Image"):
	with gr.Row():
	image_input = gr.Image()
	image_output = gr.Image()
	image_input.upload(
	predict_image,
	inputs=[image_input],
	outputs=[image_output],
	)
	with gr.Row(visible=False): # Prediction API
	api_image_input = gr.Image()
	api_prediction_output = gr.Textbox()
	api_image_input.upload(
	predict_image_api,
	inputs=[api_image_input],
	outputs=[api_prediction_output],
	api_name="predict",
	)
	gr.Examples(
	[
	["./anpr_examples_202208/test_image_1.jpg"],
	["./anpr_examples_202208/test_image_2.jpg"],
	["./anpr_examples_202208/test_image_3.jpeg"],
	],
	[image_input],
	[image_output],
	predict_image,
	cache_examples=True,
	)
	with gr.TabItem("Video"):
	with gr.Row():
	video_input = gr.Video(format="mp4")
	video_output = gr.Video(format="mp4")
	video_input.upload(
	predict_video, inputs=[video_input], outputs=[video_output]
	)
	gr.Examples(
	[["./anpr_examples_202208/test_video_1.mp4"]],
	[video_input],
	[video_output],
	predict_video,
	cache_examples=True,
	)
	gr.Markdown("[@itsyoboieltr](https://github.com/itsyoboieltr)")

	demo.launch()