Wildfire_and_Smoke / yolov7.py
Aastha
initial commit
5dbb854
raw
history blame contribute delete
No virus
7.43 kB
import cv2
import numpy as np
import onnxruntime
class YOLOv7:
def __init__(
self,
model_path,
labels_path,
engine_path,
official_nms=False
):
self.official_nms = official_nms
self.class_names = []
with open(labels_path, 'r') as f:
self.class_names = [cname.strip() for cname in f.readlines()]
f.close()
# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3))
# Initialize model
self.initialize_model(model_path, engine_path)
def __call__(self, image, confidence_threshold, nms_threshold):
return self.detect_objects(image, confidence_threshold, nms_threshold)
def xywh2xyxy(self, x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def initialize_model(self, model_path, engine_path):
self.session = onnxruntime.InferenceSession(
model_path,
providers=[
# (
# 'TensorrtExecutionProvider',
# {
# 'device_id': 0,
# 'trt_max_workspace_size': 2147483648,
# 'trt_fp16_enable': True,
# 'trt_engine_cache_enable': True,
# 'trt_engine_cache_path': '{}'.format(engine_path),
# }
# ),
# (
# 'CUDAExecutionProvider',
# {
# 'device_id': 0,
# 'arena_extend_strategy': 'kNextPowerOfTwo',
# 'gpu_mem_limit': 2 * 1024 * 1024 * 1024,
# 'cudnn_conv_algo_search': 'EXHAUSTIVE',
# 'do_copy_in_default_stream': True,
# }
# )
'CPUExecutionProvider'
]
)
# Get model info
self.get_input_details()
self.get_output_details()
self.has_postprocess = 'score' in self.output_names or self.official_nms
def detect_objects(self, image, confidence_threshold, nms_threshold):
input_tensor = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
# Process output data
self.boxes, self.scores, self.class_ids = self.process_output(outputs, confidence_threshold, nms_threshold)
return self.boxes, self.scores, self.class_ids
def prepare_input(self, image):
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize input image
input_img = cv2.resize(input_img, (self.input_width, self.input_height))
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor
def rescale_boxes(self, boxes):
# Rescale boxes to original image dimensions
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def process_output(self, output, conf_threshold, nms_threshold):
boxes, scores, class_ids = output
boxes = boxes[0]
scores = scores[0]
class_ids = class_ids[0]
res_boxes = []
res_scores = []
res_class_ids = []
for box, score, class_id in zip(boxes, scores, class_ids):
if score > conf_threshold:
score = score[0]
res_boxes.append(box)
res_scores.append(score)
res_class_ids.append(int(class_id))
if len(res_scores) == 0:
return [], [], []
# Scale boxes to original image dimensions
res_boxes = self.rescale_boxes(res_boxes)
fin_boxes, fin_scores, fin_class_ids = [], [], []
final_boxes = cv2.dnn.NMSBoxes(res_boxes, res_scores, conf_threshold, nms_threshold)
for max_valueid in final_boxes:
fin_boxes.append(res_boxes[max_valueid])
fin_scores.append(res_scores[max_valueid])
fin_class_ids.append(res_class_ids[max_valueid])
# Convert boxes to xyxy format
fin_boxes = self.xywh2xyxy(np.array(fin_boxes))
# Convert class ids to class names
fin_class_ids = [self.class_names[i] for i in fin_class_ids]
return fin_boxes, fin_scores, fin_class_ids
def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
mask_img = image.copy()
det_img = image.copy()
img_height, img_width = image.shape[:2]
size = min([img_height, img_width]) * 0.0006
text_thickness = int(min([img_height, img_width]) * 0.001)
# Draw bounding boxes and labels of detections
for box, score, class_id in zip(self.boxes, self.scores, self.class_ids):
color = self.colors[class_id]
x1, y1, x2, y2 = box.astype(int)
# Draw rectangle
cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)
# Draw fill rectangle in mask image
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)
label = self.class_names[class_id]
caption = f'{label} {int(score * 100)}%'
(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=size, thickness=text_thickness)
th = int(th * 1.2)
cv2.rectangle(det_img, (x1, y1),
(x1 + tw, y1 - th), color, -1)
cv2.rectangle(mask_img, (x1, y1),
(x1 + tw, y1 - th), color, -1)
cv2.putText(det_img, caption, (x1, y1),
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
cv2.putText(mask_img, caption, (x1, y1),
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)
return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[0].shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
def inference(self, input_tensor):
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
return outputs