import os import gradio as gr import pytesseract import yolov5 from transformers import CLIPProcessor, CLIPModel vit_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") # load model model = yolov5.load('keremberke/yolov5m-license-plate') # set model parameters model.conf = 0.5 # NMS confidence threshold model.iou = 0.25 # NMS IoU threshold model.agnostic = False # NMS class-agnostic model.multi_label = False # NMS multiple labels per box model.max_det = 1000 # maximum number of detections per image def license_plate_detect(img): results = model(img, size=640) # parse results predictions = results.pred[0] if len(predictions): boxes = predictions[:, :4] # x1, y1, x2, y2 return boxes def read_license_number(img): boxes = license_plate_detect(img) if boxes is not None: return [pytesseract.image_to_string( img.crop(bbox.tolist())) for bbox in boxes] def zero_shot_classification(image, labels): print(type(image)) inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) print(type(inputs)) print(inputs) outputs = vit_model(**inputs) logits_per_image = outputs.logits_per_image # this is the image-text similarity score return logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities installed_list = [] # image = Image.open(requests.get(url, stream=True).raw) def check_solarplant_installed_by_license(license_number_list): if len(installed_list): return [license_number in installed_list for license_number in license_number_list] def check_solarplant_installed_by_image(image, output_label=False): zero_shot_class_labels = ["bus with solar panel grids", "bus without solar panel grids"] probs = zero_shot_classification(image, zero_shot_class_labels) if output_label: return zero_shot_class_labels[probs.argmax().item()] return probs.argmax().item() == 0 def check_solarplant_broken(image, output_label=False): zero_shot_class_labels = ["white broken solar panel", "normal black solar panel grids"] probs = zero_shot_classification(image, zero_shot_class_labels) idx = probs.argmax().item() if output_label: return zero_shot_class_labels[idx].split(" ")[1-idx] return idx == 0 def greet(img): print(type(img)) lns = read_license_number(img) if len(lns): planttype = check_solarplant_installed_by_image(img, True) # return (seg, return ("車牌: " + '; '.join(lns) + "\n\n" \ + "類型: "+ planttype + "\n\n" \ + "狀態:" + (check_solarplant_broken(img, True) if 'with' in planttype else 'normal')) return (img, "空地。。。") iface = gr.Interface(fn=greet, inputs=gr.Image(type="pil"), outputs="text") iface.launch()