Aastha commited on
Commit
5dbb854
1 Parent(s): 9ef2c21

initial commit

Browse files
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+ import cv2
3
+ import numpy as np
4
+ from yolov7 import YOLOv7
5
+ import gradio as gr
6
+ from PIL import Image
7
+
8
+ class Inference:
9
+ def setup_models(self, model_path, labels_path, engine_path):
10
+ yolo = YOLOv7(
11
+ model_path,
12
+ labels_path,
13
+ engine_path
14
+ )
15
+ return yolo
16
+
17
+ def __init__(self, model_path, labels_path, engine_path):
18
+ self.model = self.setup_models(
19
+ model_path,
20
+ labels_path,
21
+ engine_path
22
+ )
23
+
24
+ def __call__(self, frame: np.ndarray, conf_threshold: float, nms_threshold: float, *args: Any, **kwds: Any) -> Any:
25
+ boxes, scores, class_ids = self.model(frame, conf_threshold, nms_threshold)
26
+ return boxes, scores, class_ids
27
+
28
+ infer1 = Inference(
29
+ "models/firesmoke.onnx",
30
+ "models/labels.txt",
31
+ "firesmoke.trt"
32
+ )
33
+
34
+ infer2 = Inference(
35
+ "models/firesmoke-henry.onnx",
36
+ "models/labels.txt",
37
+ "firesmoke-henry.trt"
38
+ )
39
+
40
+ def run(content_img, conf_threshold, nms_threshold):
41
+ content_img = cv2.cvtColor(np.array(content_img), cv2.COLOR_RGB2BGR)
42
+
43
+ boxes1, scores1, class_ids1 = infer1(content_img, conf_threshold, nms_threshold)
44
+ boxes2, scores2, class_ids2 = infer2(content_img, conf_threshold, nms_threshold)
45
+ img1 = content_img.copy()
46
+ img2 = content_img.copy()
47
+
48
+ if len(boxes1) > 0:
49
+ for box, score, class_id in zip(boxes1, scores1, class_ids1):
50
+ x1 = int(box[0])
51
+ y1 = int(box[1])
52
+ x2 = int(box[2])
53
+ y2 = int(box[3])
54
+ cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 0, 255), 2)
55
+ cv2.rectangle(img1, (x1, y1-20), (x1+100, y1), (0, 0, 255), -1)
56
+ cv2.putText(img1, "{}:{:.2f}".format(class_id, score), (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
57
+
58
+ if len(boxes2) > 0:
59
+ for box, score, class_id in zip(boxes2, scores2, class_ids2):
60
+ x1 = int(box[0])
61
+ y1 = int(box[1])
62
+ x2 = int(box[2])
63
+ y2 = int(box[3])
64
+ cv2.rectangle(img2, (x1, y1), (x2, y2), (0, 0, 255), 2)
65
+ cv2.rectangle(img2, (x1, y1-20), (x1+100, y1), (0, 0, 255), -1)
66
+ cv2.putText(img2, "{}:{:.2f}".format(class_id, score), (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
67
+
68
+ img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
69
+ img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
70
+ img1 = Image.fromarray(img1)
71
+ img2 = Image.fromarray(img2)
72
+
73
+ return img1, img2
74
+
75
+ if __name__ == '__main__':
76
+ style = gr.Interface(
77
+ fn=run,
78
+ inputs=[
79
+ gr.Image(label='Input Image'),
80
+ gr.Slider(minimum=0.05, maximum=1, step=0.05, default=0.3, label="Confidence Threshold"),
81
+ gr.Slider(minimum=0.05, maximum=1, step=0.05, default=0.3, label="NMS Threshold"),
82
+ ],
83
+ outputs=[
84
+ gr.Image(
85
+ type="pil",
86
+ label="Finetuned"
87
+ ),
88
+ gr.Image(
89
+ type="pil",
90
+ label="Finetuned + New Data"
91
+ ),
92
+ ],
93
+ examples=[
94
+ ['examples/fire1.jpg'],
95
+ ['examples/fire2.jpg'],
96
+ ['examples/fire3.jpg']
97
+ ]
98
+ )
99
+ style.launch()
examples/fire1.jpg ADDED
examples/fire2.jpg ADDED
examples/fire3.jpg ADDED
models/firesmoke-henry.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6223c3d99acb5515abce9056de3b5151df5bcc321b7927815c325f79e95420c1
3
+ size 146024150
models/firesmoke.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d1a9543bc6f610c0d9d6ec1ad64ce3df0ff703f29a92325480fa4f21f807f7
3
+ size 146024150
models/labels.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ smoke
2
+ fire
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ numpy==1.26.1
3
+ onnxruntime==1.16.1
4
+ opencv_python==4.8.1.78
5
+ Pillow==10.1.0
yolov7.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import onnxruntime
4
+
5
+ class YOLOv7:
6
+
7
+ def __init__(
8
+ self,
9
+ model_path,
10
+ labels_path,
11
+ engine_path,
12
+ official_nms=False
13
+ ):
14
+ self.official_nms = official_nms
15
+
16
+ self.class_names = []
17
+ with open(labels_path, 'r') as f:
18
+ self.class_names = [cname.strip() for cname in f.readlines()]
19
+ f.close()
20
+
21
+ # Create a list of colors for each class where each color is a tuple of 3 integer values
22
+ rng = np.random.default_rng(3)
23
+ self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3))
24
+
25
+ # Initialize model
26
+ self.initialize_model(model_path, engine_path)
27
+
28
+ def __call__(self, image, confidence_threshold, nms_threshold):
29
+ return self.detect_objects(image, confidence_threshold, nms_threshold)
30
+
31
+ def xywh2xyxy(self, x):
32
+ # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
33
+ y = np.copy(x)
34
+ y[..., 0] = x[..., 0] - x[..., 2] / 2
35
+ y[..., 1] = x[..., 1] - x[..., 3] / 2
36
+ y[..., 2] = x[..., 0] + x[..., 2] / 2
37
+ y[..., 3] = x[..., 1] + x[..., 3] / 2
38
+ return y
39
+
40
+ def initialize_model(self, model_path, engine_path):
41
+ self.session = onnxruntime.InferenceSession(
42
+ model_path,
43
+ providers=[
44
+ # (
45
+ # 'TensorrtExecutionProvider',
46
+ # {
47
+ # 'device_id': 0,
48
+ # 'trt_max_workspace_size': 2147483648,
49
+ # 'trt_fp16_enable': True,
50
+ # 'trt_engine_cache_enable': True,
51
+ # 'trt_engine_cache_path': '{}'.format(engine_path),
52
+ # }
53
+ # ),
54
+ # (
55
+ # 'CUDAExecutionProvider',
56
+ # {
57
+ # 'device_id': 0,
58
+ # 'arena_extend_strategy': 'kNextPowerOfTwo',
59
+ # 'gpu_mem_limit': 2 * 1024 * 1024 * 1024,
60
+ # 'cudnn_conv_algo_search': 'EXHAUSTIVE',
61
+ # 'do_copy_in_default_stream': True,
62
+ # }
63
+ # )
64
+ 'CPUExecutionProvider'
65
+ ]
66
+ )
67
+ # Get model info
68
+ self.get_input_details()
69
+ self.get_output_details()
70
+
71
+ self.has_postprocess = 'score' in self.output_names or self.official_nms
72
+
73
+
74
+ def detect_objects(self, image, confidence_threshold, nms_threshold):
75
+ input_tensor = self.prepare_input(image)
76
+
77
+ # Perform inference on the image
78
+ outputs = self.inference(input_tensor)
79
+
80
+ # Process output data
81
+ self.boxes, self.scores, self.class_ids = self.process_output(outputs, confidence_threshold, nms_threshold)
82
+
83
+ return self.boxes, self.scores, self.class_ids
84
+
85
+ def prepare_input(self, image):
86
+ self.img_height, self.img_width = image.shape[:2]
87
+
88
+ input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
89
+
90
+ # Resize input image
91
+ input_img = cv2.resize(input_img, (self.input_width, self.input_height))
92
+
93
+ # Scale input pixel values to 0 to 1
94
+ input_img = input_img / 255.0
95
+ input_img = input_img.transpose(2, 0, 1)
96
+ input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
97
+
98
+ return input_tensor
99
+
100
+ def rescale_boxes(self, boxes):
101
+
102
+ # Rescale boxes to original image dimensions
103
+ input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
104
+ boxes = np.divide(boxes, input_shape, dtype=np.float32)
105
+ boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
106
+ return boxes
107
+
108
+ def process_output(self, output, conf_threshold, nms_threshold):
109
+ boxes, scores, class_ids = output
110
+ boxes = boxes[0]
111
+ scores = scores[0]
112
+ class_ids = class_ids[0]
113
+
114
+ res_boxes = []
115
+ res_scores = []
116
+ res_class_ids = []
117
+
118
+ for box, score, class_id in zip(boxes, scores, class_ids):
119
+ if score > conf_threshold:
120
+ score = score[0]
121
+ res_boxes.append(box)
122
+ res_scores.append(score)
123
+ res_class_ids.append(int(class_id))
124
+
125
+ if len(res_scores) == 0:
126
+ return [], [], []
127
+
128
+ # Scale boxes to original image dimensions
129
+ res_boxes = self.rescale_boxes(res_boxes)
130
+
131
+ fin_boxes, fin_scores, fin_class_ids = [], [], []
132
+ final_boxes = cv2.dnn.NMSBoxes(res_boxes, res_scores, conf_threshold, nms_threshold)
133
+ for max_valueid in final_boxes:
134
+ fin_boxes.append(res_boxes[max_valueid])
135
+ fin_scores.append(res_scores[max_valueid])
136
+ fin_class_ids.append(res_class_ids[max_valueid])
137
+
138
+ # Convert boxes to xyxy format
139
+ fin_boxes = self.xywh2xyxy(np.array(fin_boxes))
140
+
141
+ # Convert class ids to class names
142
+ fin_class_ids = [self.class_names[i] for i in fin_class_ids]
143
+ return fin_boxes, fin_scores, fin_class_ids
144
+
145
+ def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
146
+
147
+ mask_img = image.copy()
148
+ det_img = image.copy()
149
+
150
+ img_height, img_width = image.shape[:2]
151
+ size = min([img_height, img_width]) * 0.0006
152
+ text_thickness = int(min([img_height, img_width]) * 0.001)
153
+
154
+ # Draw bounding boxes and labels of detections
155
+ for box, score, class_id in zip(self.boxes, self.scores, self.class_ids):
156
+ color = self.colors[class_id]
157
+
158
+ x1, y1, x2, y2 = box.astype(int)
159
+
160
+ # Draw rectangle
161
+ cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)
162
+
163
+ # Draw fill rectangle in mask image
164
+ cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
165
+
166
+ label = self.class_names[class_id]
167
+ caption = f'{label} {int(score * 100)}%'
168
+ (tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
169
+ fontScale=size, thickness=text_thickness)
170
+ th = int(th * 1.2)
171
+
172
+ cv2.rectangle(det_img, (x1, y1),
173
+ (x1 + tw, y1 - th), color, -1)
174
+ cv2.rectangle(mask_img, (x1, y1),
175
+ (x1 + tw, y1 - th), color, -1)
176
+ cv2.putText(det_img, caption, (x1, y1),
177
+ cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
178
+
179
+ cv2.putText(mask_img, caption, (x1, y1),
180
+ cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
181
+
182
+ return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)
183
+
184
+ def get_input_details(self):
185
+ model_inputs = self.session.get_inputs()
186
+ self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
187
+
188
+ self.input_shape = model_inputs[0].shape
189
+ self.input_height = self.input_shape[2]
190
+ self.input_width = self.input_shape[3]
191
+
192
+ def get_output_details(self):
193
+ model_outputs = self.session.get_outputs()
194
+ self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
195
+
196
+ def inference(self, input_tensor):
197
+ outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
198
+ return outputs