File size: 4,447 Bytes
644cee9
7df070b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd63339
7df070b
 
cd63339
7df070b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69157ce
 
 
 
 
 
 
 
 
 
 
 
 
 
7df070b
 
 
 
 
 
69157ce
 
 
 
 
 
 
 
 
7df070b
 
 
 
 
 
cd63339
45a05cd
 
cd63339
45a05cd
 
 
 
 
cd63339
45a05cd
 
cd63339
45a05cd
 
cd63339
45a05cd
 
cd63339
45a05cd
 
cd63339
45a05cd
 
 
 
cd63339
45a05cd
 
cd63339
 
 
 
 
7df070b
45a05cd
af5339f
7df070b
 
 
 
af5339f
cd63339
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
# import yolov5

# # load model
# model = yolov5.load('keremberke/yolov5m-license-plate')

# # set model parameters
# model.conf = 0.5  # NMS confidence threshold
# model.iou = 0.25  # NMS IoU threshold
# model.agnostic = False  # NMS class-agnostic
# model.multi_label = False  # NMS multiple labels per box
# model.max_det = 1000  # maximum number of detections per image

# # set image
# def license_plate_detect(img):
#     # perform inference
#     results = model(img, size=640)
    
#     # inference with test time augmentation
#     results = model(img, augment=True)
    
#     # parse results
#     if len(results.pred):
#         predictions = results.pred[0]
#         boxes = predictions[:, :4] # x1, y1, x2, y2
#         scores = predictions[:, 4]
#         categories = predictions[:, 5]
#         return boxes

# from PIL import Image
# # image = Image.open(img)
# import pytesseract

# def read_license_number(img):
#     boxes = license_plate_detect(img)
#     if boxes:
#         return [pytesseract.image_to_string(
#                     image.crop(bbox.tolist()))
#                for bbox in boxes]

from transformers import CLIPProcessor, CLIPModel
vit_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def zero_shot_classification(image, labels):
    inputs = processor(text=labels,
                       images=image,
                       return_tensors="pt",
                       padding=True)
    outputs = vit_model(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score
    return logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities

# installed_list = []
# # image = Image.open(requests.get(url, stream=True).raw)
# def check_solarplant_installed_by_license(license_number_list):
#     if len(installed_list):
#         return [license_number in installed_list
#                 for license_number in license_number_list]

def check_solarplant_installed_by_image(image, output_label=False):
    zero_shot_class_labels = ["bus with solar panel grids",
                              "bus without solar panel grids"]
    probs = zero_shot_classification(image, zero_shot_class_labels)
    if output_label:
        return zero_shot_class_labels[probs.argmax().item()]
    return probs.argmax().item() == 0

# def check_solarplant_broken(image):
#     zero_shot_class_labels = ["white broken solar panel",
#                               "normal black solar panel grids"]
#     probs = zero_shot_classification(image, zero_shot_class_labels)
#     idx = probs.argmax().item()
#     return zero_shot_class_labels[idx].split(" ")[1-idx]

from fastsam import FastSAM, FastSAMPrompt
os.system('wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/weights/FastSAM.pt')

model = FastSAM('./FastSAM.pt')
DEVICE = 'cpu'
def segment_solar_panel(img):
    # os.system('python Inference.py --model_path FastSAM.pt --img_path bus.jpg  --text_prompt "solar panel grids"')
    img = img.convert("RGB")

    everything_results = model(img, device=DEVICE, retina_masks=True, imgsz=1024, conf=0.4, iou=0.9,)
    prompt_process = FastSAMPrompt(img, everything_results, device=DEVICE)
    
    # everything prompt
    ann = prompt_process.everything_prompt()
    
    # bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
    ann = prompt_process.box_prompt(bbox=[[200, 200, 300, 300]])
    
    # text prompt
    ann = prompt_process.text_prompt(text='solar panel grids')
    
    # point prompt
    # points default [[0,0]] [[x1,y1],[x2,y2]]
    # point_label default [0] [1,0] 0:background, 1:foreground
    ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
    
    prompt_process.plot(annotations=ann,output_path='./bus.jpg',)
    return Image.Open('./bus.jpg')


import gradio as gr

def greet(img):
    if check_solarplant_installed_by_image(img):
        seg = segment_solar_panel(img)
        return (seg, '嘗試分割太陽能板部分')
        # return (seg,
        #         "車牌: " + '; '.join(lns) + "\n\n" \
        #         + "類型: "+ check_solarplant_installed_by_image(img, True) + "\n\n" \
        #         + "狀態:" + check_solarplant_broken(img))
    return (img, "沒有太陽能板部分分割")

iface = gr.Interface(fn=greet, inputs="image", outputs=["image", "text"])
iface.launch()