File size: 4,268 Bytes
79f9dfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aedefaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a88ada
 
 
 
 
aedefaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a88ada
 
aedefaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
os.system('wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/weights/FastSAM.pt')

import yolov5

# load model
model = yolov5.load('keremberke/yolov5m-license-plate')

# set model parameters
model.conf = 0.5  # NMS confidence threshold
model.iou = 0.25  # NMS IoU threshold
model.agnostic = False  # NMS class-agnostic
model.multi_label = False  # NMS multiple labels per box
model.max_det = 1000  # maximum number of detections per image

# set image
def license_plate_detect(img):
    # perform inference
    results = model(img, size=640)
    
    # inference with test time augmentation
    results = model(img, augment=True)
    
    # parse results
    if len(results.pred):
        predictions = results.pred[0]
        boxes = predictions[:, :4] # x1, y1, x2, y2
        scores = predictions[:, 4]
        categories = predictions[:, 5]
        return boxes

from PIL import Image
# image = Image.open(img)
import pytesseract

def read_license_number(img):
    boxes = license_plate_detect(img)
    if boxes:
        return [pytesseract.image_to_string(
                    image.crop(bbox.tolist()))
               for bbox in boxes]

from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def zero_shot_classification(image, labels):
    inputs = processor(text=labels,
                       images=image,
                       return_tensors="pt",
                       padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score
    return logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities

installed_list = []
# image = Image.open(requests.get(url, stream=True).raw)
def check_solarplant_installed_by_license(license_number_list):
    if len(installed_list):
        return [license_number in installed_list
                for license_number in license_number_list]

def check_solarplant_installed_by_image(image, output_label=False):
    zero_shot_class_labels = ["bus with solar panel grids",
                              "bus without solar panel grids"]
    probs = zero_shot_classification(image, zero_shot_class_labels)
    if output_label:
        return zero_shot_class_labels[probs.argmax().item()]
    return probs.argmax().item() == 0

def check_solarplant_broken(image):
    zero_shot_class_labels = ["white broken solar panel",
                              "normal black solar panel grids"]
    probs = zero_shot_classification(image, zero_shot_class_labels)
    idx = probs.argmax().item()
    return zero_shot_class_labels[idx][1-idx]

from fastsam import FastSAM, FastSAMPrompt

model = FastSAM('./FastSAM.pt')
DEVICE = 'cpu'
def segment_solar_panel(img):
    # os.system('python Inference.py --model_path FastSAM.pt --img_path bus.jpg  --text_prompt "solar panel grids"')
    img = img.convert("RGB")

    everything_results = model(img, device=DEVICE, retina_masks=True, imgsz=1024, conf=0.4, iou=0.9,)
    prompt_process = FastSAMPrompt(img, everything_results, device=DEVICE)
    
    # everything prompt
    ann = prompt_process.everything_prompt()
    
    # bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
    ann = prompt_process.box_prompt(bbox=[[200, 200, 300, 300]])
    
    # text prompt
    ann = prompt_process.text_prompt(text='solar panel grids')
    
    # point prompt
    # points default [[0,0]] [[x1,y1],[x2,y2]]
    # point_label default [0] [1,0] 0:background, 1:foreground
    ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
    
    prompt_process.plot(annotations=ann,output_path='./bus.jpg',)
    return Image.Open('./bus.jpg')


import gradio as gr

def greet(img):
    lns = read_license_number(img)
    if len(lns):
        seg = segment_solar_panel(img)
        return (seg,
                "車牌: " + '; '.join(lns) + "\n\n" \
                + "類型: "+ check_solarplant_installed_by_image(img, True) + "\n\n" \
                + "狀態:" + check_solarplant_broken(img))
    return (img, "空地。。。")

iface = gr.Interface(fn=greet, inputs="image", outputs=["image", "text"])
iface.launch()