Spaces:

eireneai
/

Wildfire_and_Smoke

Runtime error

File size: 7,433 Bytes

5dbb854

import cv2
import numpy as np
import onnxruntime

class YOLOv7:

    def __init__(
            self, 
            model_path, 
            labels_path,
            engine_path, 
            official_nms=False
        ):
        self.official_nms = official_nms
        
        self.class_names = []
        with open(labels_path, 'r') as f:
            self.class_names = [cname.strip() for cname in f.readlines()]
        f.close()

        # Create a list of colors for each class where each color is a tuple of 3 integer values
        rng = np.random.default_rng(3)
        self.colors = rng.uniform(0, 255, size=(len(self.class_names), 3))

        # Initialize model
        self.initialize_model(model_path, engine_path)

    def __call__(self, image, confidence_threshold, nms_threshold):
        return self.detect_objects(image, confidence_threshold, nms_threshold)

    def xywh2xyxy(self, x):
        # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
        y = np.copy(x)
        y[..., 0] = x[..., 0] - x[..., 2] / 2
        y[..., 1] = x[..., 1] - x[..., 3] / 2
        y[..., 2] = x[..., 0] + x[..., 2] / 2
        y[..., 3] = x[..., 1] + x[..., 3] / 2
        return y

    def initialize_model(self, model_path, engine_path):
        self.session = onnxruntime.InferenceSession(
            model_path,
            providers=[
                # (
                #     'TensorrtExecutionProvider',
                #     {
                #         'device_id': 0,
                #         'trt_max_workspace_size': 2147483648,
                #         'trt_fp16_enable': True,
                #         'trt_engine_cache_enable': True,
                #         'trt_engine_cache_path': '{}'.format(engine_path),
                #     }
                # ),
                # (
                #     'CUDAExecutionProvider', 
                #     {
                #         'device_id': 0,
                #         'arena_extend_strategy': 'kNextPowerOfTwo',
                #         'gpu_mem_limit': 2 * 1024 * 1024 * 1024,
                #         'cudnn_conv_algo_search': 'EXHAUSTIVE',
                #         'do_copy_in_default_stream': True,
                #     }
                # )
                'CPUExecutionProvider'
            ]
        )
        # Get model info
        self.get_input_details()
        self.get_output_details()

        self.has_postprocess = 'score' in self.output_names or self.official_nms


    def detect_objects(self, image, confidence_threshold, nms_threshold):
        input_tensor = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        # Process output data
        self.boxes, self.scores, self.class_ids = self.process_output(outputs, confidence_threshold, nms_threshold)

        return self.boxes, self.scores, self.class_ids

    def prepare_input(self, image):
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize input image
        input_img = cv2.resize(input_img, (self.input_width, self.input_height))

        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

        return input_tensor
    
    def rescale_boxes(self, boxes):

        # Rescale boxes to original image dimensions
        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
        return boxes
    
    def process_output(self, output, conf_threshold, nms_threshold):
        boxes, scores, class_ids = output
        boxes = boxes[0]
        scores = scores[0]
        class_ids = class_ids[0]

        res_boxes = []
        res_scores = []
        res_class_ids = []

        for box, score, class_id in zip(boxes, scores, class_ids):
            if score > conf_threshold:
                score = score[0]
                res_boxes.append(box)
                res_scores.append(score)
                res_class_ids.append(int(class_id))
        
        if len(res_scores) == 0:
            return [], [], []
        
        # Scale boxes to original image dimensions
        res_boxes = self.rescale_boxes(res_boxes)

        fin_boxes, fin_scores, fin_class_ids = [], [], []
        final_boxes = cv2.dnn.NMSBoxes(res_boxes, res_scores, conf_threshold, nms_threshold)
        for max_valueid in final_boxes:
            fin_boxes.append(res_boxes[max_valueid])
            fin_scores.append(res_scores[max_valueid])
            fin_class_ids.append(res_class_ids[max_valueid])
        
        # Convert boxes to xyxy format
        fin_boxes = self.xywh2xyxy(np.array(fin_boxes))
        
        # Convert class ids to class names
        fin_class_ids = [self.class_names[i] for i in fin_class_ids]
        return fin_boxes, fin_scores, fin_class_ids

    def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):
        
        mask_img = image.copy()
        det_img = image.copy()

        img_height, img_width = image.shape[:2]
        size = min([img_height, img_width]) * 0.0006
        text_thickness = int(min([img_height, img_width]) * 0.001)

        # Draw bounding boxes and labels of detections
        for box, score, class_id in zip(self.boxes, self.scores, self.class_ids):
            color = self.colors[class_id]

            x1, y1, x2, y2 = box.astype(int)

            # Draw rectangle
            cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)

            # Draw fill rectangle in mask image
            cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)

            label = self.class_names[class_id]
            caption = f'{label} {int(score * 100)}%'
            (tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                        fontScale=size, thickness=text_thickness)
            th = int(th * 1.2)

            cv2.rectangle(det_img, (x1, y1),
                        (x1 + tw, y1 - th), color, -1)
            cv2.rectangle(mask_img, (x1, y1),
                        (x1 + tw, y1 - th), color, -1)
            cv2.putText(det_img, caption, (x1, y1),
                        cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)

            cv2.putText(mask_img, caption, (x1, y1),
                        cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)

        return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        self.input_shape = model_inputs[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

    def inference(self, input_tensor):
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
        return outputs