npc0 commited on
Commit
cd63339
1 Parent(s): 22122da
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/weights/FastSAM.pt')
3
+
4
+ import yolov5
5
+
6
+ # load model
7
+ model = yolov5.load('keremberke/yolov5m-license-plate')
8
+
9
+ # set model parameters
10
+ model.conf = 0.5 # NMS confidence threshold
11
+ model.iou = 0.25 # NMS IoU threshold
12
+ model.agnostic = False # NMS class-agnostic
13
+ model.multi_label = False # NMS multiple labels per box
14
+ model.max_det = 1000 # maximum number of detections per image
15
+
16
+ # set image
17
+ def license_plate_detect(img):
18
+ # perform inference
19
+ results = model(img, size=640)
20
+
21
+ # inference with test time augmentation
22
+ results = model(img, augment=True)
23
+
24
+ # parse results
25
+ if len(results.pred):
26
+ predictions = results.pred[0]
27
+ boxes = predictions[:, :4] # x1, y1, x2, y2
28
+ scores = predictions[:, 4]
29
+ categories = predictions[:, 5]
30
+ return boxes
31
+
32
+ from PIL import Image
33
+ # image = Image.open(img)
34
+ import pytesseract
35
+
36
+ def read_license_number(img):
37
+ boxes = license_plate_detect(img)
38
+ if boxes:
39
+ return [pytesseract.image_to_string(
40
+ image.crop(bbox.tolist()))
41
+ for bbox in boxes]
42
+
43
+ from transformers import CLIPProcessor, CLIPModel
44
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
45
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
46
+
47
+ def zero_shot_classification(image, labels):
48
+ inputs = processor(text=labels,
49
+ images=image,
50
+ return_tensors="pt",
51
+ padding=True)
52
+ outputs = model(**inputs)
53
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
54
+ return logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
55
+
56
+ installed_list = []
57
+ # image = Image.open(requests.get(url, stream=True).raw)
58
+ def check_solarplant_installed_by_license(license_number_list):
59
+ if len(installed_list):
60
+ return [license_number in installed_list
61
+ for license_number in license_number_list]
62
+
63
+ def check_solarplant_installed_by_image(image, output_label=False):
64
+ zero_shot_class_labels = ["bus with solar panel grids",
65
+ "bus without solar panel grids"]
66
+ probs = zero_shot_classification(image, zero_shot_class_labels)
67
+ if output_label:
68
+ return zero_shot_class_labels[probs.argmax().item()]
69
+ return probs.argmax().item() == 0
70
+
71
+ def check_solarplant_broken(image):
72
+ zero_shot_class_labels = ["white broken solar panel",
73
+ "normal black solar panel grids"]
74
+ probs = zero_shot_classification(image, zero_shot_class_labels)
75
+ idx = probs.argmax().item()
76
+ return zero_shot_class_labels[idx][1-idx]
77
+
78
+ from fastsam import FastSAM, FastSAMPrompt
79
+
80
+ model = FastSAM('./FastSAM.pt')
81
+ DEVICE = 'cpu'
82
+ def segment_solar_panel(img):
83
+ # os.system('python Inference.py --model_path FastSAM.pt --img_path bus.jpg --text_prompt "solar panel grids"')
84
+ img = img.convert("RGB")
85
+
86
+ everything_results = model(img, device=DEVICE, retina_masks=True, imgsz=1024, conf=0.4, iou=0.9,)
87
+ prompt_process = FastSAMPrompt(img, everything_results, device=DEVICE)
88
+
89
+ # everything prompt
90
+ ann = prompt_process.everything_prompt()
91
+
92
+ # bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
93
+ ann = prompt_process.box_prompt(bbox=[[200, 200, 300, 300]])
94
+
95
+ # text prompt
96
+ ann = prompt_process.text_prompt(text='solar panel grids')
97
+
98
+ # point prompt
99
+ # points default [[0,0]] [[x1,y1],[x2,y2]]
100
+ # point_label default [0] [1,0] 0:background, 1:foreground
101
+ ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
102
+
103
+ prompt_process.plot(annotations=ann,output_path='./bus.jpg',)
104
+ return Image.Open('./bus.jpg')
105
+
106
+
107
+ import gradio as gr
108
+
109
+ def greet(img):
110
+ lns = read_license_number(img)
111
+ if len(lns):
112
+ seg = segment_solar_panel(img)
113
+ return (seg,
114
+ "車牌: " + '; '.join(lns) + "\n\n" \
115
+ + "類型: "+ check_solarplant_installed_by_image(img, True) + "\n\n" \
116
+ + "狀態:" + check_solarplant_broken(img))
117
+ return (img, "空地。。。")
118
+
119
+ iface = gr.Interface(fn=greet, inputs="image", outputs=["image", "text"])
120
+ iface.launch()