from ultralytics import YOLO import numpy as np from transformers import TrOCRProcessor, VisionEncoderDecoderModel car_detection = YOLO("models/yolov8n.pt") lp_detection = YOLO("models/yolov8n_lp_det.pt") processor = TrOCRProcessor.from_pretrained('models/processor') model = VisionEncoderDecoderModel.from_pretrained('models/model') # char_dect = YOLO("models/yolov8n_lpchar_det.pt") # char_rec = torch.load("models/charrec.pt", map_location="cpu") # function to detect cars in the given image def detect_cars(inputs): cars = [] # running the cars detection model with 50% confidence threshold car_results = car_detection.predict(source=inputs, classes=[2], conf=0.5, verbose=False) # iterating through each output (num of outputs will be same as num of inputs) for car_result in car_results: # finding the bounding boxes of the cars detected boxes = car_result.boxes.xyxy.tolist() # iterating through each car detected for box in boxes: # cropping car image from the input image car = car_result.orig_img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] cars.append(car) return cars # function to detect licence plates in the given car images def detect_lp(inputs): lps = [] # running the license plate detection model with 50% confidence threshold lp_results = lp_detection.predict(source=inputs, conf=0.5, verbose=False) # iterating through each output (num of outputs will be same as num of inputs) for lp_result in lp_results: # finding the bounding boxes of the license plate detected lp_boxes = lp_result.boxes.xyxy.tolist() # iterating through each license plate detected for lp_box in lp_boxes: # cropping license plate image from the car image lp = lp_result.orig_img[int(lp_box[1]):int(lp_box[3]), int(lp_box[0]):int(lp_box[2])] lps.append(lp) # breaking as we only want to detect one licence plate per car break # if no licence plate is detected then we are adding a black image if len(lp_boxes) == 0: lps.append(np.zeros((100,100,3), np.uint8)) return lps # function to detect licence plate number in the given licence plate images def detect_lp_text(inputs): plate_number = [] # iterating through each licence plate for input in inputs: # finding the number/text in licence plate pixel_values = processor(input, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # if no text is found in the licence plate, then adding a default text not found if len(generated_text) == 0: plate_number.append("not found") else: # adding the licence plate number to a list plate_number.append(generated_text) return plate_number def run(inputs): # for future, to handle multiple inputs # currently using just one input inputs = inputs[0] # detecting cars, this function returns all detected car images cars = detect_cars(inputs) # if no car is detected black images are returned if len(cars) == 0: return [np.zeros((100,100,3), np.uint8)], [np.zeros((100,100,3), np.uint8)], "not found" # detecting licence plates from the car images # returns licence plate images, if it cant find a license plate a black image is returned lps = detect_lp(cars) # detecting licence plate number from licence plate images # returns text from the licence plate images, if none is detected "not found" text is returned lp_text = detect_lp_text(lps) return cars, lps, lp_text