File size: 3,122 Bytes
74c6580
a8d57c0
74c6580
a8d57c0
 
fd69589
e083d3d
fd69589
 
904b5dd
 
a8d57c0
904b5dd
 
 
 
 
 
a8d57c0
74c6580
a8d57c0
 
74c6580
fd69589
 
74c6580
 
 
 
 
 
bfbd5ae
74c6580
af06ad9
74c6580
 
 
fd69589
7a25912
fd69589
 
 
 
7a25912
 
e083d3d
fd69589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
904b5dd
fd69589
 
 
 
904b5dd
 
 
fd69589
 
74c6580
7a25912
904b5dd
640ce1c
904b5dd
cba5df4
 
904b5dd
 
 
fd69589
74c6580
a8d57c0
af06ad9
a8d57c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import gradio as gr
import pytesseract
import yolov5

from transformers import CLIPProcessor, CLIPModel
vit_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# load model
model = yolov5.load('keremberke/yolov5m-license-plate')

# set model parameters
model.conf = 0.5  # NMS confidence threshold
model.iou = 0.25  # NMS IoU threshold
model.agnostic = False  # NMS class-agnostic
model.multi_label = False  # NMS multiple labels per box
model.max_det = 1000  # maximum number of detections per image

def license_plate_detect(img):
    results = model(img, size=640)

    # parse results
    predictions = results.pred[0]
    if len(predictions):
        boxes = predictions[:, :4] # x1, y1, x2, y2
        return boxes


def read_license_number(img):
    boxes = license_plate_detect(img)
    if boxes is not None:
        return [pytesseract.image_to_string(
                    img.crop(bbox.tolist()))
               for bbox in boxes]


def zero_shot_classification(image, labels):
    print(type(image))
    inputs = processor(text=labels,
                       images=image,
                       return_tensors="pt",
                       padding=True)
    print(type(inputs))
    print(inputs)
    outputs = vit_model(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score
    return logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities

installed_list = []
# image = Image.open(requests.get(url, stream=True).raw)
def check_solarplant_installed_by_license(license_number_list):
    if len(installed_list):
        return [license_number in installed_list
                for license_number in license_number_list]

def check_solarplant_installed_by_image(image, output_label=False):
    zero_shot_class_labels = ["bus with solar panel grids",
                              "bus without solar panel grids"]
    probs = zero_shot_classification(image, zero_shot_class_labels)
    if output_label:
        return zero_shot_class_labels[probs.argmax().item()]
    return probs.argmax().item() == 0

def check_solarplant_broken(image, output_label=False):
    zero_shot_class_labels = ["white broken solar panel",
                              "normal black solar panel grids"]
    probs = zero_shot_classification(image, zero_shot_class_labels)
    idx = probs.argmax().item()
    if output_label:
        return zero_shot_class_labels[idx].split(" ")[1-idx]
    return idx == 0


def greet(img):
    print(type(img))
    lns = read_license_number(img)
    if len(lns):
        planttype = check_solarplant_installed_by_image(img, True)
        # return (seg,
        return ("車牌: " + '; '.join(lns) + "\n\n" \
                + "類型: "+ planttype + "\n\n" \
                + "狀態:" + (check_solarplant_broken(img, True)
                           if 'with' in planttype else 'normal'))
    return (img, "空地。。。")


iface = gr.Interface(fn=greet, inputs=gr.Image(type="pil"), outputs="text")
iface.launch()