from ultralytics import YOLO | |
import numpy as np | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
car_detection = YOLO("models/yolov8n.pt") | |
lp_detection = YOLO("models/yolov8n_lp_det.pt") | |
processor = TrOCRProcessor.from_pretrained('models/processor') | |
model = VisionEncoderDecoderModel.from_pretrained('models/model') | |
# char_dect = YOLO("models/yolov8n_lpchar_det.pt") | |
# char_rec = torch.load("models/charrec.pt", map_location="cpu") | |
# function to detect cars in the given image | |
def detect_cars(inputs): | |
cars = [] | |
# running the cars detection model with 50% confidence threshold | |
car_results = car_detection.predict(source=inputs, classes=[2], conf=0.5, verbose=False) | |
# iterating through each output (num of outputs will be same as num of inputs) | |
for car_result in car_results: | |
# finding the bounding boxes of the cars detected | |
boxes = car_result.boxes.xyxy.tolist() | |
# iterating through each car detected | |
for box in boxes: | |
# cropping car image from the input image | |
car = car_result.orig_img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] | |
cars.append(car) | |
return cars | |
# function to detect licence plates in the given car images | |
def detect_lp(inputs): | |
lps = [] | |
# running the license plate detection model with 50% confidence threshold | |
lp_results = lp_detection.predict(source=inputs, conf=0.5, verbose=False) | |
# iterating through each output (num of outputs will be same as num of inputs) | |
for lp_result in lp_results: | |
# finding the bounding boxes of the license plate detected | |
lp_boxes = lp_result.boxes.xyxy.tolist() | |
# iterating through each license plate detected | |
for lp_box in lp_boxes: | |
# cropping license plate image from the car image | |
lp = lp_result.orig_img[int(lp_box[1]):int(lp_box[3]), int(lp_box[0]):int(lp_box[2])] | |
lps.append(lp) | |
# breaking as we only want to detect one licence plate per car | |
break | |
# if no licence plate is detected then we are adding a black image | |
if len(lp_boxes) == 0: | |
lps.append(np.zeros((100,100,3), np.uint8)) | |
return lps | |
# function to detect licence plate number in the given licence plate images | |
def detect_lp_text(inputs): | |
plate_number = [] | |
# iterating through each licence plate | |
for input in inputs: | |
# finding the number/text in licence plate | |
pixel_values = processor(input, return_tensors="pt").pixel_values | |
generated_ids = model.generate(pixel_values) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# if no text is found in the licence plate, then adding a default text not found | |
if len(generated_text) == 0: | |
plate_number.append("not found") | |
else: | |
# adding the licence plate number to a list | |
plate_number.append(generated_text) | |
return plate_number | |
def run(inputs): | |
# for future, to handle multiple inputs | |
# currently using just one input | |
inputs = inputs[0] | |
# detecting cars, this function returns all detected car images | |
cars = detect_cars(inputs) | |
# if no car is detected black images are returned | |
if len(cars) == 0: | |
return [np.zeros((100,100,3), np.uint8)], [np.zeros((100,100,3), np.uint8)], "not found" | |
# detecting licence plates from the car images | |
# returns licence plate images, if it cant find a license plate a black image is returned | |
lps = detect_lp(cars) | |
# detecting licence plate number from licence plate images | |
# returns text from the licence plate images, if none is detected "not found" text is returned | |
lp_text = detect_lp_text(lps) | |
return cars, lps, lp_text | |