Spaces:

alshimaa
/

SEE

Running

App Files Files Community

AlshimaaGamalAlsaied commited on Jan 16, 2023

Commit

8a41b66

•

1 Parent(s): 1f063cf

udpade

Browse files

Files changed (29) hide show

app.py +171 -101
inferer.py +0 -238
yolov6/core/engine.py +0 -273
yolov6/core/evaler.py +0 -256
yolov6/core/inferer.py +0 -231
yolov6/data/data_augment.py +0 -193
yolov6/data/data_load.py +0 -113
yolov6/data/datasets.py +0 -550
yolov6/data/vis_dataset.py +0 -57
yolov6/data/voc2yolo.py +0 -99
yolov6/layers/common.py +0 -501
yolov6/layers/dbb_transforms.py +0 -50
yolov6/models/efficientrep.py +0 -102
yolov6/models/effidehead.py +0 -211
yolov6/models/end2end.py +0 -147
yolov6/models/loss.py +0 -411
yolov6/models/reppan.py +0 -108
yolov6/models/yolo.py +0 -83
yolov6/solver/build.py +0 -42
yolov6/utils/Arial.ttf +0 -0
yolov6/utils/checkpoint.py +0 -60
yolov6/utils/config.py +0 -101
yolov6/utils/ema.py +0 -59
yolov6/utils/envs.py +0 -54
yolov6/utils/events.py +0 -41
yolov6/utils/figure_iou.py +0 -114
yolov6/utils/general.py +0 -24
yolov6/utils/nms.py +0 -106
yolov6/utils/torch_utils.py +0 -110

app.py CHANGED Viewed

@@ -1,78 +1,6 @@
-# import gradio as gr
-# #import torch
-# import yolov7
-# #
-# # from huggingface_hub import hf_hub_download
-# from huggingface_hub import HfApi
-# # Images
-# #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
-# #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
-# def yolov7_inference(
-#     image: gr.inputs.Image = None,
-#     model_path: gr.inputs.Dropdown = None,
-#     image_size: gr.inputs.Slider = 640,
-#     conf_threshold: gr.inputs.Slider = 0.25,
-#     iou_threshold: gr.inputs.Slider = 0.45,
-# ):
-#     """
-#     YOLOv7 inference function
-#     Args:
-#         image: Input image
-#         model_path: Path to the model
-#         image_size: Image size
-#         conf_threshold: Confidence threshold
-#         iou_threshold: IOU threshold
-#     Returns:
-#         Rendered image
-#     """
-#     model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
-#     model.conf = conf_threshold
-#     model.iou = iou_threshold
-#     results = model([image], size=image_size)
-#     return results.render()[0]
-# inputs = [
-#     gr.inputs.Image(type="pil", label="Input Image"),
-#     gr.inputs.Dropdown(
-#         choices=[
-#             "alshimaa/model_baseline",
-#             "alshimaa/model_yolo7",
-#             #"kadirnar/yolov7-v0.1",
-#         ],
-#         default="alshimaa/model_baseline",
-#         label="Model",
-#     )
-#     #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
-#     #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
-#     #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
-# ]
-# outputs = gr.outputs.Image(type="filepath", label="Output Image")
-# title = "Smart Environmental Eye (SEE)"
-# examples = [['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]]
-# demo_app = gr.Interface(
-#     fn=yolov7_inference,
-#     inputs=inputs,
-#     outputs=outputs,
-#     title=title,
-#     examples=examples,
-#     cache_examples=True,
-#     theme='huggingface',
-# )
-# demo_app.launch(debug=True, enable_queue=True)
 import subprocess
 import tempfile
 import time
@@ -81,16 +9,41 @@ from pathlib import Path
 import cv2
 import gradio as gr
-from inferer import Inferer
-pipeline = Inferer("alshimaa/model_yolo7", device='cuda')
-def fn_image(image, conf_thres, iou_thres):
-    return pipeline(image, conf_thres, iou_thres)
-def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
     start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
     end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
@@ -121,7 +74,7 @@ def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
                 print(e)
                 continue
             print("FRAME DTYPE", type(frame))
-            out.write(pipeline(frame, conf_thres, iou_thres))
             num_frames += 1
             print("Processed {} frames".format(num_frames))
             if num_frames == max_frames:
@@ -135,29 +88,44 @@ def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
     return out_file.name
 image_interface = gr.Interface(
-    fn=fn_image,
     inputs=[
-        "image",
-        gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
-        gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
-    ],
-    outputs=gr.Image(type="file"),
-    examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]],
     title="Smart Environmental Eye (SEE)",
-    allow_flagging=False,
-    allow_screenshot=False,
 )
 video_interface = gr.Interface(
-    fn=fn_video,
     inputs=[
         gr.Video(type="file"),
-        gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
-        gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
-        gr.Slider(0, 10, value=0, label="Start Second", step=1),
-        gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1),
     ],
     outputs=gr.Video(type="file", format="mp4"),
     # examples=[
@@ -169,11 +137,113 @@ video_interface = gr.Interface(
     allow_screenshot=False,
 )
 if __name__ == "__main__":
     gr.TabbedInterface(
         [image_interface, video_interface],
         ["Run on Images", "Run on Videos"],
     ).launch()

+import gradio as gr
+#import torch
+import yolov7
 import subprocess
 import tempfile
 import time
 import cv2
 import gradio as gr
+# Images
+#torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
+#torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
+def image_fn(
+    image: gr.inputs.Image = None,
+    model_path: gr.inputs.Dropdown = None,
+    image_size: gr.inputs.Slider = 640,
+    conf_threshold: gr.inputs.Slider = 0.25,
+    iou_threshold: gr.inputs.Slider = 0.45,
+):
+    """
+    YOLOv7 inference function
+    Args:
+        image: Input image
+        model_path: Path to the model
+        image_size: Image size
+        conf_threshold: Confidence threshold
+        iou_threshold: IOU threshold
+    Returns:
+        Rendered image
+    """
+    model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
+    model.conf = conf_threshold
+    model.iou = iou_threshold
+    results = model([image], size=image_size)
+    return results.render()[0]
+def video_fn(model_path, video_file, conf_thres, iou_thres, start_sec, duration):
+    model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
     start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
     end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
                 print(e)
                 continue
             print("FRAME DTYPE", type(frame))
+            out.write(model(frame, conf_thres, iou_thres))
             num_frames += 1
             print("Processed {} frames".format(num_frames))
             if num_frames == max_frames:
     return out_file.name
 image_interface = gr.Interface(
+    fn=image_fn,
     inputs=[
+    gr.inputs.Image(type="pil", label="Input Image"),
+    gr.inputs.Dropdown(
+        choices=[
+            "alshimaa/model_baseline",
+            "alshimaa/model_yolo7",
+            #"kadirnar/yolov7-v0.1",
+        ],
+        default="alshimaa/model_baseline",
+        label="Model",
+    )
+    #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
+    #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
+    #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
+],
+    outputs=gr.outputs.Image(type="filepath", label="Output Image"),
     title="Smart Environmental Eye (SEE)",
+    examples=[['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]],
+    cache_examples=True,
+    theme='huggingface',
 )
 video_interface = gr.Interface(
+    fn=video_fn,
     inputs=[
         gr.Video(type="file"),
+        gr.inputs.Dropdown(
+        choices=[
+            "alshimaa/model_baseline",
+            "alshimaa/model_yolo7",
+            #"kadirnar/yolov7-v0.1",
+        ],
+        default="alshimaa/model_baseline",
+        label="Model",
+    ),
     ],
     outputs=gr.Video(type="file", format="mp4"),
     # examples=[
     allow_screenshot=False,
 )
 if __name__ == "__main__":
     gr.TabbedInterface(
         [image_interface, video_interface],
         ["Run on Images", "Run on Videos"],
     ).launch()
+# import subprocess
+# import tempfile
+# import time
+# from pathlib import Path
+# import cv2
+# import gradio as gr
+# from inferer import Inferer
+# pipeline = Inferer("alshimaa/model_yolo7", device='cuda')
+# def fn_image(image, conf_thres, iou_thres):
+#     return pipeline(image, conf_thres, iou_thres)
+# def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
+#     start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
+#     end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
+#     suffix = Path(video_file).suffix
+#     clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
+#     subprocess.call(
+#         f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
+#     )
+#     # Reader of clip file
+#     cap = cv2.VideoCapture(clip_temp_file.name)
+#     # This is an intermediary temp file where we'll write the video to
+#     # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness
+#     # with ffmpeg at the end of the function here.
+#     with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
+#         out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720))
+#         num_frames = 0
+#         max_frames = duration * 30
+#         while cap.isOpened():
+#             try:
+#                 ret, frame = cap.read()
+#                 if not ret:
+#                     break
+#             except Exception as e:
+#                 print(e)
+#                 continue
+#             print("FRAME DTYPE", type(frame))
+#             out.write(pipeline(frame, conf_thres, iou_thres))
+#             num_frames += 1
+#             print("Processed {} frames".format(num_frames))
+#             if num_frames == max_frames:
+#                 break
+#         out.release()
+#         # Aforementioned hackiness
+#         out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
+#         subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
+#     return out_file.name
+# image_interface = gr.Interface(
+#     fn=fn_image,
+#     inputs=[
+#         "image",
+#         gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
+#         gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
+#     ],
+#     outputs=gr.Image(type="file"),
+#     examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]],
+#     title="Smart Environmental Eye (SEE)",
+#     allow_flagging=False,
+#     allow_screenshot=False,
+# )
+# video_interface = gr.Interface(
+#     fn=fn_video,
+#     inputs=[
+#         gr.Video(type="file"),
+#         gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
+#         gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
+#         gr.Slider(0, 10, value=0, label="Start Second", step=1),
+#         gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1),
+#     ],
+#     outputs=gr.Video(type="file", format="mp4"),
+#     # examples=[
+#     #     ["video.mp4", 0.25, 0.45, 0, 2],
+#     # ],
+#     title="Smart Environmental Eye (SEE)",
+#     allow_flagging=False,
+#     allow_screenshot=False,
+# )
+# if __name__ == "__main__":
+#     gr.TabbedInterface(
+#         [image_interface, video_interface],
+#         ["Run on Images", "Run on Videos"],
+#     ).launch()

inferer.py DELETED Viewed

@@ -1,238 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import math
-import os.path as osp
-import cv2
-import numpy as np
-import torch
-from huggingface_hub import hf_hub_download
-from PIL import Image, ImageFont
-from yolov6.data.data_augment import letterbox
-from yolov6.layers.common import DetectBackend
-from yolov6.utils.events import LOGGER, load_yaml
-from yolov6.utils.nms import non_max_suppression
-class Inferer:
-    def __init__(self, model_id, device="cpu", yaml="coco.yaml", img_size=640, half=False):
-        self.__dict__.update(locals())
-        # Init model
-        self.img_size = img_size
-        cuda = device != "cpu" and torch.cuda.is_available()
-        self.device = torch.device("cuda:0" if cuda else "cpu")
-        self.model = DetectBackend(hf_hub_download(model_id, "model.pt"), device=self.device)
-        self.stride = self.model.stride
-        self.class_names = load_yaml(yaml)["names"]
-        self.img_size = self.check_img_size(self.img_size, s=self.stride)  # check image size
-        # Half precision
-        if half & (self.device.type != "cpu"):
-            self.model.model.half()
-        else:
-            self.model.model.float()
-            half = False
-        if self.device.type != "cpu":
-            self.model(
-                torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))
-            )  # warmup
-        # Switch model to deploy status
-        self.model_switch(self.model, self.img_size)
-    def model_switch(self, model, img_size):
-        """Model switch to deploy status"""
-        from yolov6.layers.common import RepVGGBlock
-        for layer in model.modules():
-            if isinstance(layer, RepVGGBlock):
-                layer.switch_to_deploy()
-        LOGGER.info("Switch model to deploy modality.")
-    def __call__(
-        self,
-        path_or_image,
-        conf_thres=0.25,
-        iou_thres=0.45,
-        classes=None,
-        agnostic_nms=False,
-        max_det=1000,
-        hide_labels=False,
-        hide_conf=False,
-    ):
-        """Model Inference and results visualization"""
-        img, img_src = self.precess_image(path_or_image, self.img_size, self.stride, self.half)
-        img = img.to(self.device)
-        if len(img.shape) == 3:
-            img = img[None]
-            # expand for batch dim
-        pred_results = self.model(img)
-        det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
-        gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]]  # normalization gain whwh
-        img_ori = img_src
-        # check image and font
-        assert (
-            img_ori.data.contiguous
-        ), "Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im)."
-        self.font_check()
-        if len(det):
-            det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
-            for *xyxy, conf, cls in reversed(det):
-                class_num = int(cls)  # integer class
-                label = (
-                    None
-                    if hide_labels
-                    else (self.class_names[class_num] if hide_conf else f"{self.class_names[class_num]} {conf:.2f}")
-                )
-                self.plot_box_and_label(
-                    img_ori,
-                    max(round(sum(img_ori.shape) / 2 * 0.003), 2),
-                    xyxy,
-                    label,
-                    color=self.generate_colors(class_num, True),
-                )
-            img_src = np.asarray(img_ori)
-            return img_src
-    @staticmethod
-    def precess_image(path_or_image, img_size, stride, half):
-        """Process image before image inference."""
-        if isinstance(path_or_image, str):
-            try:
-                img_src = cv2.imread(path_or_image)
-                assert img_src is not None, f"Invalid image: {path_or_image}"
-            except Exception as e:
-                LOGGER.warning(e)
-        elif isinstance(path_or_image, np.ndarray):
-            img_src = path_or_image
-        elif isinstance(path_or_image, Image.Image):
-            img_src = np.array(path_or_image)
-        image = letterbox(img_src, img_size, stride=stride)[0]
-        # Convert
-        image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-        image = torch.from_numpy(np.ascontiguousarray(image))
-        image = image.half() if half else image.float()  # uint8 to fp16/32
-        image /= 255  # 0 - 255 to 0.0 - 1.0
-        return image, img_src
-    @staticmethod
-    def rescale(ori_shape, boxes, target_shape):
-        """Rescale the output to the original image shape"""
-        ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
-        padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
-        boxes[:, [0, 2]] -= padding[0]
-        boxes[:, [1, 3]] -= padding[1]
-        boxes[:, :4] /= ratio
-        boxes[:, 0].clamp_(0, target_shape[1])  # x1
-        boxes[:, 1].clamp_(0, target_shape[0])  # y1
-        boxes[:, 2].clamp_(0, target_shape[1])  # x2
-        boxes[:, 3].clamp_(0, target_shape[0])  # y2
-        return boxes
-    def check_img_size(self, img_size, s=32, floor=0):
-        """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
-        if isinstance(img_size, int):  # integer i.e. img_size=640
-            new_size = max(self.make_divisible(img_size, int(s)), floor)
-        elif isinstance(img_size, list):  # list i.e. img_size=[640, 480]
-            new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
-        else:
-            raise Exception(f"Unsupported type of img_size: {type(img_size)}")
-        if new_size != img_size:
-            print(f"WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}")
-        return new_size if isinstance(img_size, list) else [new_size] * 2
-    def make_divisible(self, x, divisor):
-        # Upward revision the value x to make it evenly divisible by the divisor.
-        return math.ceil(x / divisor) * divisor
-    @staticmethod
-    def plot_box_and_label(image, lw, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
-        # Add one xyxy box to image with label
-        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
-        cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
-        if label:
-            tf = max(lw - 1, 1)  # font thickness
-            w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
-            outside = p1[1] - h - 3 >= 0  # label fits outside box
-            p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
-            cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
-            cv2.putText(
-                image,
-                label,
-                (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
-                0,
-                lw / 3,
-                txt_color,
-                thickness=tf,
-                lineType=cv2.LINE_AA,
-            )
-    @staticmethod
-    def font_check(font="./yolov6/utils/Arial.ttf", size=10):
-        # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
-        assert osp.exists(font), f"font path not exists: {font}"
-        try:
-            return ImageFont.truetype(str(font) if font.exists() else font.name, size)
-        except Exception as e:  # download if missing
-            return ImageFont.truetype(str(font), size)
-    @staticmethod
-    def box_convert(x):
-        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
-        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
-        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
-        y[:, 2] = x[:, 2] - x[:, 0]  # width
-        y[:, 3] = x[:, 3] - x[:, 1]  # height
-        return y
-    @staticmethod
-    def generate_colors(i, bgr=False):
-        hex = (
-            "FF3838",
-            "FF9D97",
-            "FF701F",
-            "FFB21D",
-            "CFD231",
-            "48F90A",
-            "92CC17",
-            "3DDB86",
-            "1A9334",
-            "00D4BB",
-            "2C99A8",
-            "00C2FF",
-            "344593",
-            "6473FF",
-            "0018EC",
-            "8438FF",
-            "520085",
-            "CB38FF",
-            "FF95C8",
-            "FF37C7",
-        )
-        palette = []
-        for iter in hex:
-            h = "#" + iter
-            palette.append(tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)))
-        num = len(palette)
-        color = palette[int(i) % num]
-        return (color[2], color[1], color[0]) if bgr else color

yolov6/core/engine.py DELETED Viewed

@@ -1,273 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-import time
-from copy import deepcopy
-import os.path as osp
-from tqdm import tqdm
-import numpy as np
-import torch
-from torch.cuda import amp
-from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.utils.tensorboard import SummaryWriter
-import tools.eval as eval
-from yolov6.data.data_load import create_dataloader
-from yolov6.models.yolo import build_model
-from yolov6.models.loss import ComputeLoss
-from yolov6.utils.events import LOGGER, NCOLS, load_yaml, write_tblog
-from yolov6.utils.ema import ModelEMA, de_parallel
-from yolov6.utils.checkpoint import load_state_dict, save_checkpoint, strip_optimizer
-from yolov6.solver.build import build_optimizer, build_lr_scheduler
-class Trainer:
-    def __init__(self, args, cfg, device):
-        self.args = args
-        self.cfg = cfg
-        self.device = device
-        if args.resume:
-            self.ckpt = torch.load(args.resume, map_location='cpu')
-        self.rank = args.rank
-        self.local_rank = args.local_rank
-        self.world_size = args.world_size
-        self.main_process = self.rank in [-1, 0]
-        self.save_dir = args.save_dir
-        # get data loader
-        self.data_dict = load_yaml(args.data_path)
-        self.num_classes = self.data_dict['nc']
-        self.train_loader, self.val_loader = self.get_data_loader(args, cfg, self.data_dict)
-        # get model and optimizer
-        model = self.get_model(args, cfg, self.num_classes, device)
-        self.optimizer = self.get_optimizer(args, cfg, model)
-        self.scheduler, self.lf = self.get_lr_scheduler(args, cfg, self.optimizer)
-        self.ema = ModelEMA(model) if self.main_process else None
-        # tensorboard
-        self.tblogger = SummaryWriter(self.save_dir) if self.main_process else None
-        self.start_epoch = 0
-        #resume
-        if hasattr(self, "ckpt"):
-            resume_state_dict = self.ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-            model.load_state_dict(resume_state_dict, strict=True)  # load
-            self.start_epoch = self.ckpt['epoch'] + 1
-            self.optimizer.load_state_dict(self.ckpt['optimizer'])
-            if self.main_process:
-                self.ema.ema.load_state_dict(self.ckpt['ema'].float().state_dict())
-                self.ema.updates = self.ckpt['updates']
-        self.model = self.parallel_model(args, model, device)
-        self.model.nc, self.model.names = self.data_dict['nc'], self.data_dict['names']
-        self.max_epoch = args.epochs
-        self.max_stepnum = len(self.train_loader)
-        self.batch_size = args.batch_size
-        self.img_size = args.img_size
-    # Training Process
-    def train(self):
-        try:
-            self.train_before_loop()
-            for self.epoch in range(self.start_epoch, self.max_epoch):
-                self.train_in_loop()
-        except Exception as _:
-            LOGGER.error('ERROR in training loop or eval/save model.')
-            raise
-        finally:
-            self.train_after_loop()
-    # Training loop for each epoch
-    def train_in_loop(self):
-        try:
-            self.prepare_for_steps()
-            for self.step, self.batch_data in self.pbar:
-                self.train_in_steps()
-                self.print_details()
-        except Exception as _:
-            LOGGER.error('ERROR in training steps.')
-            raise
-        try:
-            self.eval_and_save()
-        except Exception as _:
-            LOGGER.error('ERROR in evaluate and save model.')
-            raise
-    # Training loop for batchdata
-    def train_in_steps(self):
-        images, targets = self.prepro_data(self.batch_data, self.device)
-        # forward
-        with amp.autocast(enabled=self.device != 'cpu'):
-            preds = self.model(images)
-            total_loss, loss_items = self.compute_loss(preds, targets)
-            if self.rank != -1:
-                total_loss *= self.world_size
-        # backward
-        self.scaler.scale(total_loss).backward()
-        self.loss_items = loss_items
-        self.update_optimizer()
-    def eval_and_save(self):
-        remaining_epochs = self.max_epoch - self.epoch
-        eval_interval = self.args.eval_interval if remaining_epochs > self.args.heavy_eval_range else 1
-        is_val_epoch = (not self.args.eval_final_only or (remaining_epochs == 1)) and (self.epoch % eval_interval == 0)
-        if self.main_process:
-            self.ema.update_attr(self.model, include=['nc', 'names', 'stride']) # update attributes for ema model
-            if is_val_epoch:
-                self.eval_model()
-                self.ap = self.evaluate_results[0] * 0.1 + self.evaluate_results[1] * 0.9
-                self.best_ap = max(self.ap, self.best_ap)
-            # save ckpt
-            ckpt = {
-                    'model': deepcopy(de_parallel(self.model)).half(),
-                    'ema': deepcopy(self.ema.ema).half(),
-                    'updates': self.ema.updates,
-                    'optimizer': self.optimizer.state_dict(),
-                    'epoch': self.epoch,
-                    }
-            save_ckpt_dir = osp.join(self.save_dir, 'weights')
-            save_checkpoint(ckpt, (is_val_epoch) and (self.ap == self.best_ap), save_ckpt_dir, model_name='last_ckpt')
-            del ckpt
-            # log for tensorboard
-            write_tblog(self.tblogger, self.epoch, self.evaluate_results, self.mean_loss)
-    def eval_model(self):
-        results = eval.run(self.data_dict,
-                           batch_size=self.batch_size // self.world_size * 2,
-                           img_size=self.img_size,
-                           model=self.ema.ema,
-                           dataloader=self.val_loader,
-                           save_dir=self.save_dir,
-                           task='train')
-        LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}")
-        self.evaluate_results = results[:2]
-    def train_before_loop(self):
-        LOGGER.info('Training start...')
-        self.start_time = time.time()
-        self.warmup_stepnum = max(round(self.cfg.solver.warmup_epochs * self.max_stepnum), 1000)
-        self.scheduler.last_epoch = self.start_epoch - 1
-        self.last_opt_step = -1
-        self.scaler = amp.GradScaler(enabled=self.device != 'cpu')
-        self.best_ap, self.ap = 0.0, 0.0
-        self.evaluate_results = (0, 0) # AP50, AP50_95
-        self.compute_loss = ComputeLoss(iou_type=self.cfg.model.head.iou_type)
-    def prepare_for_steps(self):
-        if self.epoch > self.start_epoch:
-            self.scheduler.step()
-        self.model.train()
-        if self.rank != -1:
-            self.train_loader.sampler.set_epoch(self.epoch)
-        self.mean_loss = torch.zeros(4, device=self.device)
-        self.optimizer.zero_grad()
-        LOGGER.info(('\n' + '%10s' * 5) % ('Epoch', 'iou_loss', 'l1_loss', 'obj_loss', 'cls_loss'))
-        self.pbar = enumerate(self.train_loader)
-        if self.main_process:
-            self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
-    # Print loss after each steps
-    def print_details(self):
-        if self.main_process:
-            self.mean_loss = (self.mean_loss * self.step + self.loss_items) / (self.step + 1)
-            self.pbar.set_description(('%10s' + '%10.4g' * 4) % (f'{self.epoch}/{self.max_epoch - 1}', \
-                                                                *(self.mean_loss)))
-    # Empty cache if training finished
-    def train_after_loop(self):
-        if self.main_process:
-            LOGGER.info(f'\nTraining completed in {(time.time() - self.start_time) / 3600:.3f} hours.')
-            save_ckpt_dir = osp.join(self.save_dir, 'weights')
-            strip_optimizer(save_ckpt_dir, self.epoch)  # strip optimizers for saved pt model
-        if self.device != 'cpu':
-            torch.cuda.empty_cache()
-    def update_optimizer(self):
-        curr_step = self.step + self.max_stepnum * self.epoch
-        self.accumulate = max(1, round(64 / self.batch_size))
-        if curr_step <= self.warmup_stepnum:
-            self.accumulate = max(1, np.interp(curr_step, [0, self.warmup_stepnum], [1, 64 / self.batch_size]).round())
-            for k, param in enumerate(self.optimizer.param_groups):
-                warmup_bias_lr = self.cfg.solver.warmup_bias_lr if k == 2 else 0.0
-                param['lr'] = np.interp(curr_step, [0, self.warmup_stepnum], [warmup_bias_lr, param['initial_lr'] * self.lf(self.epoch)])
-                if 'momentum' in param:
-                    param['momentum'] = np.interp(curr_step, [0, self.warmup_stepnum], [self.cfg.solver.warmup_momentum, self.cfg.solver.momentum])
-        if curr_step - self.last_opt_step >= self.accumulate:
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-            self.optimizer.zero_grad()
-            if self.ema:
-                self.ema.update(self.model)
-            self.last_opt_step = curr_step
-    @staticmethod
-    def get_data_loader(args, cfg, data_dict):
-        train_path, val_path = data_dict['train'], data_dict['val']
-        # check data
-        nc = int(data_dict['nc'])
-        class_names = data_dict['names']
-        assert len(class_names) == nc, f'the length of class names does not match the number of classes defined'
-        grid_size = max(int(max(cfg.model.head.strides)), 32)
-        # create train dataloader
-        train_loader = create_dataloader(train_path, args.img_size, args.batch_size // args.world_size, grid_size,
-                                         hyp=dict(cfg.data_aug), augment=True, rect=False, rank=args.local_rank,
-                                         workers=args.workers, shuffle=True, check_images=args.check_images,
-                                         check_labels=args.check_labels, data_dict=data_dict, task='train')[0]
-        # create val dataloader
-        val_loader = None
-        if args.rank in [-1, 0]:
-            val_loader = create_dataloader(val_path, args.img_size, args.batch_size // args.world_size * 2, grid_size,
-                                           hyp=dict(cfg.data_aug), rect=True, rank=-1, pad=0.5,
-                                           workers=args.workers, check_images=args.check_images,
-                                           check_labels=args.check_labels, data_dict=data_dict, task='val')[0]
-        return train_loader, val_loader
-    @staticmethod
-    def prepro_data(batch_data, device):
-        images = batch_data[0].to(device, non_blocking=True).float() / 255
-        targets = batch_data[1].to(device)
-        return images, targets
-    def get_model(self, args, cfg, nc, device):
-        model = build_model(cfg, nc, device)
-        weights = cfg.model.pretrained
-        if weights:  # finetune if pretrained model is set
-            LOGGER.info(f'Loading state_dict from {weights} for fine-tuning...')
-            model = load_state_dict(weights, model, map_location=device)
-        LOGGER.info('Model: {}'.format(model))
-        return model
-    @staticmethod
-    def parallel_model(args, model, device):
-        # If DP mode
-        dp_mode = device.type != 'cpu' and args.rank == -1
-        if dp_mode and torch.cuda.device_count() > 1:
-            LOGGER.warning('WARNING: DP not recommended, use DDP instead.\n')
-            model = torch.nn.DataParallel(model)
-        # If DDP mode
-        ddp_mode = device.type != 'cpu' and args.rank != -1
-        if ddp_mode:
-            model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank)
-        return model
-    def get_optimizer(self, args, cfg, model):
-        accumulate = max(1, round(64 / args.batch_size))
-        cfg.solver.weight_decay *= args.batch_size * accumulate / 64
-        optimizer = build_optimizer(cfg, model)
-        return optimizer
-    @staticmethod
-    def get_lr_scheduler(args, cfg, optimizer):
-        epochs = args.epochs
-        lr_scheduler, lf = build_lr_scheduler(cfg, optimizer, epochs)
-        return lr_scheduler, lf

yolov6/core/evaler.py DELETED Viewed

@@ -1,256 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-from tqdm import tqdm
-import numpy as np
-import json
-import torch
-import yaml
-from pathlib import Path
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
-from yolov6.data.data_load import create_dataloader
-from yolov6.utils.events import LOGGER, NCOLS
-from yolov6.utils.nms import non_max_suppression
-from yolov6.utils.checkpoint import load_checkpoint
-from yolov6.utils.torch_utils import time_sync, get_model_info
-'''
-python tools/eval.py --task 'train'/'val'/'speed'
-'''
-class Evaler:
-    def __init__(self,
-                 data,
-                 batch_size=32,
-                 img_size=640,
-                 conf_thres=0.001,
-                 iou_thres=0.65,
-                 device='',
-                 half=True,
-                 save_dir=''):
-        self.data = data
-        self.batch_size = batch_size
-        self.img_size = img_size
-        self.conf_thres = conf_thres
-        self.iou_thres = iou_thres
-        self.device = device
-        self.half = half
-        self.save_dir = save_dir
-    def init_model(self, model, weights, task):
-        if task != 'train':
-            model = load_checkpoint(weights, map_location=self.device)
-            self.stride = int(model.stride.max())
-            if self.device.type != 'cpu':
-                model(torch.zeros(1, 3, self.img_size, self.img_size).to(self.device).type_as(next(model.parameters())))
-            # switch to deploy
-            from yolov6.layers.common import RepVGGBlock
-            for layer in model.modules():
-                if isinstance(layer, RepVGGBlock):
-                    layer.switch_to_deploy()
-            LOGGER.info("Switch model to deploy modality.")
-            LOGGER.info("Model Summary: {}".format(get_model_info(model, self.img_size)))
-        model.half() if self.half else model.float()
-        return model
-    def init_data(self, dataloader, task):
-        '''Initialize dataloader.
-        Returns a dataloader for task val or speed.
-        '''
-        self.is_coco = self.data.get("is_coco", False)
-        self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000))
-        if task != 'train':
-            pad = 0.0 if task == 'speed' else 0.5
-            dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'],
-                                           self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, rect=True,
-                                           data_dict=self.data, task=task)[0]
-        return dataloader
-    def predict_model(self, model, dataloader, task):
-        '''Model prediction
-        Predicts the whole dataset and gets the prediced results and inference time.
-        '''
-        self.speed_result = torch.zeros(4, device=self.device)
-        pred_results = []
-        pbar = tqdm(dataloader, desc="Inferencing model in val datasets.", ncols=NCOLS)
-        for imgs, targets, paths, shapes in pbar:
-            # pre-process
-            t1 = time_sync()
-            imgs = imgs.to(self.device, non_blocking=True)
-            imgs = imgs.half() if self.half else imgs.float()
-            imgs /= 255
-            self.speed_result[1] += time_sync() - t1  # pre-process time
-            # Inference
-            t2 = time_sync()
-            outputs = model(imgs)
-            self.speed_result[2] += time_sync() - t2  # inference time
-            # post-process
-            t3 = time_sync()
-            outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True)
-            self.speed_result[3] += time_sync() - t3  # post-process time
-            self.speed_result[0] += len(outputs)
-            # save result
-            pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids))
-        return pred_results
-    def eval_model(self, pred_results, model, dataloader, task):
-        '''Evaluate models
-        For task speed, this function only evaluates the speed of model and outputs inference time.
-        For task val, this function evaluates the speed and mAP by pycocotools, and returns
-        inference time and mAP value.
-        '''
-        LOGGER.info(f'\nEvaluating speed.')
-        self.eval_speed(task)
-        LOGGER.info(f'\nEvaluating mAP by pycocotools.')
-        if task != 'speed' and len(pred_results):
-            if 'anno_path' in self.data:
-                anno_json = self.data['anno_path']
-            else:
-                # generated coco format labels in dataset initialization
-                dataset_root = os.path.dirname(os.path.dirname(self.data['val']))
-                base_name = os.path.basename(self.data['val'])
-                anno_json = os.path.join(dataset_root, 'annotations', f'instances_{base_name}.json')
-            pred_json = os.path.join(self.save_dir, "predictions.json")
-            LOGGER.info(f'Saving {pred_json}...')
-            with open(pred_json, 'w') as f:
-                json.dump(pred_results, f)
-            anno = COCO(anno_json)
-            pred = anno.loadRes(pred_json)
-            cocoEval = COCOeval(anno, pred, 'bbox')
-            if self.is_coco:
-                imgIds = [int(os.path.basename(x).split(".")[0])
-                            for x in dataloader.dataset.img_paths]
-                cocoEval.params.imgIds = imgIds
-            cocoEval.evaluate()
-            cocoEval.accumulate()
-            cocoEval.summarize()
-            map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
-            # Return results
-            model.float()  # for training
-            if task != 'train':
-                LOGGER.info(f"Results saved to {self.save_dir}")
-            return (map50, map)
-        return (0.0, 0.0)
-    def eval_speed(self, task):
-        '''Evaluate model inference speed.'''
-        if task != 'train':
-            n_samples = self.speed_result[0].item()
-            pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples
-            for n, v in zip(["pre-process", "inference", "NMS"],[pre_time, inf_time, nms_time]):
-                LOGGER.info("Average {} time: {:.2f} ms".format(n, v))
-    def box_convert(self, x):
-        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
-        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
-        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
-        y[:, 2] = x[:, 2] - x[:, 0]  # width
-        y[:, 3] = x[:, 3] - x[:, 1]  # height
-        return y
-    def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
-        # Rescale coords (xyxy) from img1_shape to img0_shape
-        if ratio_pad is None:  # calculate from img0_shape
-            gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
-            pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
-        else:
-            gain = ratio_pad[0][0]
-            pad = ratio_pad[1]
-        coords[:, [0, 2]] -= pad[0]  # x padding
-        coords[:, [1, 3]] -= pad[1]  # y padding
-        coords[:, :4] /= gain
-        if isinstance(coords, torch.Tensor):  # faster individually
-            coords[:, 0].clamp_(0, img0_shape[1])  # x1
-            coords[:, 1].clamp_(0, img0_shape[0])  # y1
-            coords[:, 2].clamp_(0, img0_shape[1])  # x2
-            coords[:, 3].clamp_(0, img0_shape[0])  # y2
-        else:  # np.array (faster grouped)
-            coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1])  # x1, x2
-            coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0])  # y1, y2
-        return coords
-    def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
-        pred_results = []
-        for i, pred in enumerate(outputs):
-            if len(pred) == 0:
-                continue
-            path, shape = Path(paths[i]), shapes[i][0]
-            self.scale_coords(imgs[i].shape[1:], pred[:, :4], shape, shapes[i][1])
-            image_id = int(path.stem) if path.stem.isnumeric() else path.stem
-            bboxes = self.box_convert(pred[:, 0:4])
-            bboxes[:, :2] -= bboxes[:, 2:] / 2
-            cls = pred[:, 5]
-            scores = pred[:, 4]
-            for ind in range(pred.shape[0]):
-                category_id = ids[int(cls[ind])]
-                bbox = [round(x, 3) for x in bboxes[ind].tolist()]
-                score = round(scores[ind].item(), 5)
-                pred_data = {
-                    "image_id": image_id,
-                    "category_id": category_id,
-                    "bbox": bbox,
-                    "score": score
-                }
-                pred_results.append(pred_data)
-        return pred_results
-    @staticmethod
-    def check_task(task):
-        if task not in ['train','val','speed']:
-            raise Exception("task argument error: only support 'train' / 'val' / 'speed' task.")
-    @staticmethod
-    def reload_thres(conf_thres, iou_thres, task):
-        '''Sets conf and iou threshold for task val/speed'''
-        if task != 'train':
-            if task == 'val':
-                conf_thres = 0.001
-            if task == 'speed':
-                conf_thres = 0.25
-                iou_thres = 0.45
-        return conf_thres, iou_thres
-    @staticmethod
-    def reload_device(device, model, task):
-        # device = 'cpu' or '0' or '0,1,2,3'
-        if task == 'train':
-            device = next(model.parameters()).device
-        else:
-            if device == 'cpu':
-                os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-            elif device:
-                os.environ['CUDA_VISIBLE_DEVICES'] = device
-                assert torch.cuda.is_available()
-            cuda = device != 'cpu' and torch.cuda.is_available()
-            device = torch.device('cuda:0' if cuda else 'cpu')
-        return device
-    @staticmethod
-    def reload_dataset(data):
-        with open(data, errors='ignore') as yaml_file:
-            data = yaml.safe_load(yaml_file)
-        val = data.get('val')
-        if not os.path.exists(val):
-            raise Exception('Dataset not found.')
-        return data
-    @staticmethod
-    def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
-    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
-        x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
-            21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
-            41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
-            59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
-            80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
-        return x

yolov6/core/inferer.py DELETED Viewed

@@ -1,231 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-import os.path as osp
-import math
-from tqdm import tqdm
-import numpy as np
-import cv2
-import torch
-from PIL import ImageFont
-from yolov6.utils.events import LOGGER, load_yaml
-from yolov6.layers.common import DetectBackend
-from yolov6.data.data_augment import letterbox
-from yolov6.utils.nms import non_max_suppression
-from yolov6.utils.torch_utils import get_model_info
-class Inferer:
-    def __init__(self, source, weights, device, yaml, img_size, half):
-        import glob
-        from yolov6.data.datasets import IMG_FORMATS
-        self.__dict__.update(locals())
-        # Init model
-        self.device = device
-        self.img_size = img_size
-        cuda = self.device != 'cpu' and torch.cuda.is_available()
-        self.device = torch.device('cuda:0' if cuda else 'cpu')
-        self.model = DetectBackend(weights, device=self.device)
-        self.stride = self.model.stride
-        self.class_names = load_yaml(yaml)['names']
-        self.img_size = self.check_img_size(self.img_size, s=self.stride)  # check image size
-        # Half precision
-        if half & (self.device.type != 'cpu'):
-            self.model.model.half()
-        else:
-            self.model.model.float()
-            half = False
-        if self.device.type != 'cpu':
-            self.model(torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters())))  # warmup
-        # Load data
-        if os.path.isdir(source):
-            img_paths = sorted(glob.glob(os.path.join(source, '*.*')))  # dir
-        elif os.path.isfile(source):
-            img_paths = [source]  # files
-        else:
-            raise Exception(f'Invalid path: {source}')
-        self.img_paths = [img_path for img_path in img_paths if img_path.split('.')[-1].lower() in IMG_FORMATS]
-        # Switch model to deploy status
-        self.model_switch(self.model, self.img_size)
-    def model_switch(self, model, img_size):
-        ''' Model switch to deploy status '''
-        from yolov6.layers.common import RepVGGBlock
-        for layer in model.modules():
-            if isinstance(layer, RepVGGBlock):
-                layer.switch_to_deploy()
-        LOGGER.info("Switch model to deploy modality.")
-    def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf):
-        ''' Model Inference and results visualization '''
-        for img_path in tqdm(self.img_paths):
-            img, img_src = self.precess_image(img_path, self.img_size, self.stride, self.half)
-            img = img.to(self.device)
-            if len(img.shape) == 3:
-                img = img[None]
-                # expand for batch dim
-            pred_results = self.model(img)
-            det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
-            save_path = osp.join(save_dir, osp.basename(img_path))  # im.jpg
-            txt_path = osp.join(save_dir, 'labels', osp.splitext(osp.basename(img_path))[0])
-            gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]]  # normalization gain whwh
-            img_ori = img_src
-            # check image and font
-            assert img_ori.data.contiguous, 'Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im).'
-            self.font_check()
-            if len(det):
-                det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
-                for *xyxy, conf, cls in reversed(det):
-                    if save_txt:  # Write to file
-                        xywh = (self.box_convert(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        line = (cls, *xywh, conf)
-                        with open(txt_path + '.txt', 'a') as f:
-                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
-                    if save_img:
-                        class_num = int(cls)  # integer class
-                        label = None if hide_labels else (self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}')
-                        self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True))
-                img_src = np.asarray(img_ori)
-                # Save results (image with detections)
-                if save_img:
-                    cv2.imwrite(save_path, img_src)
-    @staticmethod
-    def precess_image(path, img_size, stride, half):
-        '''Process image before image inference.'''
-        try:
-            img_src = cv2.imread(path)
-            assert img_src is not None, f'Invalid image: {path}'
-        except Exception as e:
-            LOGGER.warning(e)
-        image = letterbox(img_src, img_size, stride=stride)[0]
-        # Convert
-        image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-        image = torch.from_numpy(np.ascontiguousarray(image))
-        image = image.half() if half else image.float()  # uint8 to fp16/32
-        image /= 255  # 0 - 255 to 0.0 - 1.0
-        return image, img_src
-    @staticmethod
-    def rescale(ori_shape, boxes, target_shape):
-        '''Rescale the output to the original image shape'''
-        ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
-        padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
-        boxes[:, [0, 2]] -= padding[0]
-        boxes[:, [1, 3]] -= padding[1]
-        boxes[:, :4] /= ratio
-        boxes[:, 0].clamp_(0, target_shape[1])  # x1
-        boxes[:, 1].clamp_(0, target_shape[0])  # y1
-        boxes[:, 2].clamp_(0, target_shape[1])  # x2
-        boxes[:, 3].clamp_(0, target_shape[0])  # y2
-        return boxes
-    def check_img_size(self, img_size, s=32, floor=0):
-        """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
-        if isinstance(img_size, int):  # integer i.e. img_size=640
-            new_size = max(self.make_divisible(img_size, int(s)), floor)
-        elif isinstance(img_size, list):  # list i.e. img_size=[640, 480]
-            new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
-        else:
-            raise Exception(f"Unsupported type of img_size: {type(img_size)}")
-        if new_size != img_size:
-            print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}')
-        return new_size if isinstance(img_size,list) else [new_size]*2
-    def make_divisible(self, x, divisor):
-        # Upward revision the value x to make it evenly divisible by the divisor.
-        return math.ceil(x / divisor) * divisor
-    @staticmethod
-    def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
-        # Add one xyxy box to image with label
-        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
-        cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
-        if label:
-            tf = max(lw - 1, 1)  # font thickness
-            w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
-            outside = p1[1] - h - 3 >= 0  # label fits outside box
-            p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
-            cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
-            cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 3, txt_color,
-                        thickness=tf, lineType=cv2.LINE_AA)
-    @staticmethod
-    def font_check(font='./yolov6/utils/Arial.ttf', size=10):
-        # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
-        assert osp.exists(font), f'font path not exists: {font}'
-        try:
-            return ImageFont.truetype(str(font) if font.exists() else font.name, size)
-        except Exception as e:  # download if missing
-            return ImageFont.truetype(str(font), size)
-    @staticmethod
-    def box_convert(x):
-        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
-        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
-        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
-        y[:, 2] = x[:, 2] - x[:, 0]  # width
-        y[:, 3] = x[:, 3] - x[:, 1]  # height
-        return y
-    @staticmethod
-    def generate_colors(i, bgr=False):
-        hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
-               '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
-        palette = []
-        for iter in hex:
-            h = '#' + iter
-            palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
-        num = len(palette)
-        color = palette[int(i) % num]
-        return (color[2], color[1], color[0]) if bgr else color
-class VideoInferer(Inferer):
-    def setup_source(self, source):
-        # Load data
-        if os.path.isfile(source):
-            self.vid_path = source
-            self.vid_name = '.'.join(os.path.basename(source).split('.')[:-1])
-        else:
-            raise Exception(f'Invalid path: {source}')
-        self.cap = cv2.VideoCapture(self.vid_path)
-    def iterator_length(self):
-        return int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    def img_iterator(self):
-        cur_fid = 0
-        ret, frame = self.cap.read()
-        while ret:
-            yield frame, f'{self.vid_name}_frame_{cur_fid:06}.jpg'
-            ret, frame = self.cap.read()
-            cur_fid += 1

yolov6/data/data_augment.py DELETED Viewed

@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# This code is based on
-# https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
-import math
-import random
-import cv2
-import numpy as np
-def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
-    # HSV color-space augmentation
-    if hgain or sgain or vgain:
-        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
-        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
-        dtype = im.dtype  # uint8
-        x = np.arange(0, 256, dtype=r.dtype)
-        lut_hue = ((x * r[0]) % 180).astype(dtype)
-        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
-        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
-        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
-    # Resize and pad image while meeting stride-multiple constraints
-    shape = im.shape[:2]  # current shape [height, width]
-    if isinstance(new_shape, int):
-        new_shape = (new_shape, new_shape)
-    # Scale ratio (new / old)
-    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
-    if not scaleup:  # only scale down, do not scale up (for better val mAP)
-        r = min(r, 1.0)
-    # Compute padding
-    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
-    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
-    if auto:  # minimum rectangle
-        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
-    dw /= 2  # divide padding into 2 sides
-    dh /= 2
-    if shape[::-1] != new_unpad:  # resize
-        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
-    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
-    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
-    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return im, r, (dw, dh)
-def mixup(im, labels, im2, labels2):
-    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
-    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
-    im = (im * r + im2 * (1 - r)).astype(np.uint8)
-    labels = np.concatenate((labels, labels2), 0)
-    return im, labels
-def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
-    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
-    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
-    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
-    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
-def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
-                  new_shape=(640, 640)):
-    n = len(labels)
-    height, width = new_shape
-    M, s = get_transform_matrix(img.shape[:2], (height, width), degrees, scale, shear, translate)
-    if (M != np.eye(3)).any():  # image changed
-        img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
-    # Transform label coordinates
-    if n:
-        new = np.zeros((n, 4))
-        xy = np.ones((n * 4, 3))
-        xy[:, :2] = labels[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-        xy = xy @ M.T  # transform
-        xy = xy[:, :2].reshape(n, 8)  # perspective rescale or affine
-        # create new boxes
-        x = xy[:, [0, 2, 4, 6]]
-        y = xy[:, [1, 3, 5, 7]]
-        new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-        # clip
-        new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
-        new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
-        # filter candidates
-        i = box_candidates(box1=labels[:, 1:5].T * s, box2=new.T, area_thr=0.1)
-        labels = labels[i]
-        labels[:, 1:5] = new[i]
-    return img, labels
-def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate):
-    new_height, new_width = new_shape
-    # Center
-    C = np.eye(3)
-    C[0, 2] = -img_shape[1] / 2  # x translation (pixels)
-    C[1, 2] = -img_shape[0] / 2  # y translation (pixels)
-    # Rotation and Scale
-    R = np.eye(3)
-    a = random.uniform(-degrees, degrees)
-    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
-    s = random.uniform(1 - scale, 1 + scale)
-    # s = 2 ** random.uniform(-scale, scale)
-    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
-    # Shear
-    S = np.eye(3)
-    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
-    # Translation
-    T = np.eye(3)
-    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_width  # x translation (pixels)
-    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_height  # y transla ion (pixels)
-    # Combined rotation matrix
-    M = T @ S @ R @ C  # order of operations (right to left) is IMPORTANT
-    return M, s
-def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp):
-    assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images."
-    labels4 = []
-    s = img_size
-    yc, xc = (int(random.uniform(s//2, 3*s//2)) for _ in range(2))  # mosaic center x, y
-    for i in range(len(imgs)):
-        # Load image
-        img, h, w = imgs[i], hs[i], ws[i]
-        # place img in img4
-        if i == 0:  # top left
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
-        elif i == 1:  # top right
-            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
-            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
-        elif i == 2:  # bottom left
-            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
-        elif i == 3:  # bottom right
-            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
-            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-        padw = x1a - x1b
-        padh = y1a - y1b
-        # Labels
-        labels_per_img = labels[i].copy()
-        if labels_per_img.size:
-            boxes = np.copy(labels_per_img[:, 1:])
-            boxes[:, 0] = w * (labels_per_img[:, 1] - labels_per_img[:, 3] / 2) + padw  # top left x
-            boxes[:, 1] = h * (labels_per_img[:, 2] - labels_per_img[:, 4] / 2) + padh  # top left y
-            boxes[:, 2] = w * (labels_per_img[:, 1] + labels_per_img[:, 3] / 2) + padw  # bottom right x
-            boxes[:, 3] = h * (labels_per_img[:, 2] + labels_per_img[:, 4] / 2) + padh  # bottom right y
-            labels_per_img[:, 1:] = boxes
-        labels4.append(labels_per_img)
-    # Concat/clip labels
-    labels4 = np.concatenate(labels4, 0)
-    for x in (labels4[:, 1:]):
-        np.clip(x, 0, 2 * s, out=x)
-    # Augment
-    img4, labels4 = random_affine(img4, labels4,
-                                  degrees=hyp['degrees'],
-                                  translate=hyp['translate'],
-                                  scale=hyp['scale'],
-                                  shear=hyp['shear'])
-    return img4, labels4

yolov6/data/data_load.py DELETED Viewed

@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# This code is based on
-# https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
-import os
-from torch.utils.data import dataloader, distributed
-from .datasets import TrainValDataset
-from yolov6.utils.events import LOGGER
-from yolov6.utils.torch_utils import torch_distributed_zero_first
-def create_dataloader(
-    path,
-    img_size,
-    batch_size,
-    stride,
-    hyp=None,
-    augment=False,
-    check_images=False,
-    check_labels=False,
-    pad=0.0,
-    rect=False,
-    rank=-1,
-    workers=8,
-    shuffle=False,
-    data_dict=None,
-    task="Train",
-):
-    """Create general dataloader.
-    Returns dataloader and dataset
-    """
-    if rect and shuffle:
-        LOGGER.warning(
-            "WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False"
-        )
-        shuffle = False
-    with torch_distributed_zero_first(rank):
-        dataset = TrainValDataset(
-            path,
-            img_size,
-            batch_size,
-            augment=augment,
-            hyp=hyp,
-            rect=rect,
-            check_images=check_images,
-            check_labels=check_labels,
-            stride=int(stride),
-            pad=pad,
-            rank=rank,
-            data_dict=data_dict,
-            task=task,
-        )
-    batch_size = min(batch_size, len(dataset))
-    workers = min(
-        [
-            os.cpu_count() // int(os.getenv("WORLD_SIZE", 1)),
-            batch_size if batch_size > 1 else 0,
-            workers,
-        ]
-    )  # number of workers
-    sampler = (
-        None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
-    )
-    return (
-        TrainValDataLoader(
-            dataset,
-            batch_size=batch_size,
-            shuffle=shuffle and sampler is None,
-            num_workers=workers,
-            sampler=sampler,
-            pin_memory=True,
-            collate_fn=TrainValDataset.collate_fn,
-        ),
-        dataset,
-    )
-class TrainValDataLoader(dataloader.DataLoader):
-    """Dataloader that reuses workers
-    Uses same syntax as vanilla DataLoader
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
-        self.iterator = super().__iter__()
-    def __len__(self):
-        return len(self.batch_sampler.sampler)
-    def __iter__(self):
-        for i in range(len(self)):
-            yield next(self.iterator)
-class _RepeatSampler:
-    """Sampler that repeats forever
-    Args:
-        sampler (Sampler)
-    """
-    def __init__(self, sampler):
-        self.sampler = sampler
-    def __iter__(self):
-        while True:
-            yield from iter(self.sampler)

yolov6/data/datasets.py DELETED Viewed

@@ -1,550 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import glob
-import os
-import os.path as osp
-import random
-import json
-import time
-import hashlib
-from multiprocessing.pool import Pool
-import cv2
-import numpy as np
-import torch
-from PIL import ExifTags, Image, ImageOps
-from torch.utils.data import Dataset
-from tqdm import tqdm
-from .data_augment import (
-    augment_hsv,
-    letterbox,
-    mixup,
-    random_affine,
-    mosaic_augmentation,
-)
-from yolov6.utils.events import LOGGER
-# Parameters
-IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
-# Get orientation exif tag
-for k, v in ExifTags.TAGS.items():
-    if v == "Orientation":
-        ORIENTATION = k
-        break
-class TrainValDataset(Dataset):
-    # YOLOv6 train_loader/val_loader, loads images and labels for training and validation
-    def __init__(
-        self,
-        img_dir,
-        img_size=640,
-        batch_size=16,
-        augment=False,
-        hyp=None,
-        rect=False,
-        check_images=False,
-        check_labels=False,
-        stride=32,
-        pad=0.0,
-        rank=-1,
-        data_dict=None,
-        task="train",
-    ):
-        assert task.lower() in ("train", "val", "speed"), f"Not supported task: {task}"
-        t1 = time.time()
-        self.__dict__.update(locals())
-        self.main_process = self.rank in (-1, 0)
-        self.task = self.task.capitalize()
-        self.class_names = data_dict["names"]
-        self.img_paths, self.labels = self.get_imgs_labels(self.img_dir)
-        if self.rect:
-            shapes = [self.img_info[p]["shape"] for p in self.img_paths]
-            self.shapes = np.array(shapes, dtype=np.float64)
-            self.batch_indices = np.floor(
-                np.arange(len(shapes)) / self.batch_size
-            ).astype(
-                np.int
-            )  # batch indices of each image
-            self.sort_files_shapes()
-        t2 = time.time()
-        if self.main_process:
-            LOGGER.info(f"%.1fs for dataset initialization." % (t2 - t1))
-    def __len__(self):
-        """Get the length of dataset"""
-        return len(self.img_paths)
-    def __getitem__(self, index):
-        """Fetching a data sample for a given key.
-        This function applies mosaic and mixup augments during training.
-        During validation, letterbox augment is applied.
-        """
-        # Mosaic Augmentation
-        if self.augment and random.random() < self.hyp["mosaic"]:
-            img, labels = self.get_mosaic(index)
-            shapes = None
-            # MixUp augmentation
-            if random.random() < self.hyp["mixup"]:
-                img_other, labels_other = self.get_mosaic(
-                    random.randint(0, len(self.img_paths) - 1)
-                )
-                img, labels = mixup(img, labels, img_other, labels_other)
-        else:
-            # Load image
-            img, (h0, w0), (h, w) = self.load_image(index)
-            # Letterbox
-            shape = (
-                self.batch_shapes[self.batch_indices[index]]
-                if self.rect
-                else self.img_size
-            )  # final letterboxed shape
-            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
-            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
-            labels = self.labels[index].copy()
-            if labels.size:
-                w *= ratio
-                h *= ratio
-                # new boxes
-                boxes = np.copy(labels[:, 1:])
-                boxes[:, 0] = (
-                    w * (labels[:, 1] - labels[:, 3] / 2) + pad[0]
-                )  # top left x
-                boxes[:, 1] = (
-                    h * (labels[:, 2] - labels[:, 4] / 2) + pad[1]
-                )  # top left y
-                boxes[:, 2] = (
-                    w * (labels[:, 1] + labels[:, 3] / 2) + pad[0]
-                )  # bottom right x
-                boxes[:, 3] = (
-                    h * (labels[:, 2] + labels[:, 4] / 2) + pad[1]
-                )  # bottom right y
-                labels[:, 1:] = boxes
-            if self.augment:
-                img, labels = random_affine(
-                    img,
-                    labels,
-                    degrees=self.hyp["degrees"],
-                    translate=self.hyp["translate"],
-                    scale=self.hyp["scale"],
-                    shear=self.hyp["shear"],
-                    new_shape=(self.img_size, self.img_size),
-                )
-        if len(labels):
-            h, w = img.shape[:2]
-            labels[:, [1, 3]] = labels[:, [1, 3]].clip(0, w - 1e-3)  # x1, x2
-            labels[:, [2, 4]] = labels[:, [2, 4]].clip(0, h - 1e-3)  # y1, y2
-            boxes = np.copy(labels[:, 1:])
-            boxes[:, 0] = ((labels[:, 1] + labels[:, 3]) / 2) / w  # x center
-            boxes[:, 1] = ((labels[:, 2] + labels[:, 4]) / 2) / h  # y center
-            boxes[:, 2] = (labels[:, 3] - labels[:, 1]) / w  # width
-            boxes[:, 3] = (labels[:, 4] - labels[:, 2]) / h  # height
-            labels[:, 1:] = boxes
-        if self.augment:
-            img, labels = self.general_augment(img, labels)
-        labels_out = torch.zeros((len(labels), 6))
-        if len(labels):
-            labels_out[:, 1:] = torch.from_numpy(labels)
-        # Convert
-        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
-        img = np.ascontiguousarray(img)
-        return torch.from_numpy(img), labels_out, self.img_paths[index], shapes
-    def load_image(self, index):
-        """Load image.
-        This function loads image by cv2, resize original image to target shape(img_size) with keeping ratio.
-        Returns:
-            Image, original shape of image, resized image shape
-        """
-        path = self.img_paths[index]
-        im = cv2.imread(path)
-        assert im is not None, f"Image Not Found {path}, workdir: {os.getcwd()}"
-        h0, w0 = im.shape[:2]  # origin shape
-        r = self.img_size / max(h0, w0)
-        if r != 1:
-            im = cv2.resize(
-                im,
-                (int(w0 * r), int(h0 * r)),
-                interpolation=cv2.INTER_AREA
-                if r < 1 and not self.augment
-                else cv2.INTER_LINEAR,
-            )
-        return im, (h0, w0), im.shape[:2]
-    @staticmethod
-    def collate_fn(batch):
-        """Merges a list of samples to form a mini-batch of Tensor(s)"""
-        img, label, path, shapes = zip(*batch)
-        for i, l in enumerate(label):
-            l[:, 0] = i  # add target image index for build_targets()
-        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
-    def get_imgs_labels(self, img_dir):
-        assert osp.exists(img_dir), f"{img_dir} is an invalid directory path!"
-        valid_img_record = osp.join(
-            osp.dirname(img_dir), "." + osp.basename(img_dir) + ".json"
-        )
-        NUM_THREADS = min(8, os.cpu_count())
-        img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True)
-        img_paths = sorted(
-            p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS
-        )
-        assert img_paths, f"No images found in {img_dir}."
-        img_hash = self.get_hash(img_paths)
-        if osp.exists(valid_img_record):
-            with open(valid_img_record, "r") as f:
-                cache_info = json.load(f)
-                if "image_hash" in cache_info and cache_info["image_hash"] == img_hash:
-                    img_info = cache_info["information"]
-                else:
-                    self.check_images = True
-        else:
-            self.check_images = True
-        # check images
-        if self.check_images and self.main_process:
-            img_info = {}
-            nc, msgs = 0, []  # number corrupt, messages
-            LOGGER.info(
-                f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): "
-            )
-            with Pool(NUM_THREADS) as pool:
-                pbar = tqdm(
-                    pool.imap(TrainValDataset.check_image, img_paths),
-                    total=len(img_paths),
-                )
-                for img_path, shape_per_img, nc_per_img, msg in pbar:
-                    if nc_per_img == 0:  # not corrupted
-                        img_info[img_path] = {"shape": shape_per_img}
-                    nc += nc_per_img
-                    if msg:
-                        msgs.append(msg)
-                    pbar.desc = f"{nc} image(s) corrupted"
-            pbar.close()
-            if msgs:
-                LOGGER.info("\n".join(msgs))
-            cache_info = {"information": img_info, "image_hash": img_hash}
-            # save valid image paths.
-            with open(valid_img_record, "w") as f:
-                json.dump(cache_info, f)
-        # check and load anns
-        label_dir = osp.join(
-            osp.dirname(osp.dirname(img_dir)), "labels", osp.basename(img_dir)
-        )
-        assert osp.exists(label_dir), f"{label_dir} is an invalid directory path!"
-        img_paths = list(img_info.keys())
-        label_paths = sorted(
-            osp.join(label_dir, osp.splitext(osp.basename(p))[0] + ".txt")
-            for p in img_paths
-        )
-        label_hash = self.get_hash(label_paths)
-        if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash:
-            self.check_labels = True
-        if self.check_labels:
-            cache_info["label_hash"] = label_hash
-            nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number corrupt, messages
-            LOGGER.info(
-                f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): "
-            )
-            with Pool(NUM_THREADS) as pool:
-                pbar = pool.imap(
-                    TrainValDataset.check_label_files, zip(img_paths, label_paths)
-                )
-                pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar
-                for (
-                    img_path,
-                    labels_per_file,
-                    nc_per_file,
-                    nm_per_file,
-                    nf_per_file,
-                    ne_per_file,
-                    msg,
-                ) in pbar:
-                    if nc_per_file == 0:
-                        img_info[img_path]["labels"] = labels_per_file
-                    else:
-                        img_info.pop(img_path)
-                    nc += nc_per_file
-                    nm += nm_per_file
-                    nf += nf_per_file
-                    ne += ne_per_file
-                    if msg:
-                        msgs.append(msg)
-                    if self.main_process:
-                        pbar.desc = f"{nf} label(s) found, {nm} label(s) missing, {ne} label(s) empty, {nc} invalid label files"
-            if self.main_process:
-                pbar.close()
-                with open(valid_img_record, "w") as f:
-                    json.dump(cache_info, f)
-            if msgs:
-                LOGGER.info("\n".join(msgs))
-            if nf == 0:
-                LOGGER.warning(
-                    f"WARNING: No labels found in {osp.dirname(self.img_paths[0])}. "
-                )
-        if self.task.lower() == "val":
-            if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset.
-                assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml"
-            else:
-                assert (
-                    self.class_names
-                ), "Class names is required when converting labels to coco format for evaluating."
-                save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), "annotations")
-                if not osp.exists(save_dir):
-                    os.mkdir(save_dir)
-                save_path = osp.join(
-                    save_dir, "instances_" + osp.basename(img_dir) + ".json"
-                )
-                TrainValDataset.generate_coco_format_labels(
-                    img_info, self.class_names, save_path
-                )
-        img_paths, labels = list(
-            zip(
-                *[
-                    (
-                        img_path,
-                        np.array(info["labels"], dtype=np.float32)
-                        if info["labels"]
-                        else np.zeros((0, 5), dtype=np.float32),
-                    )
-                    for img_path, info in img_info.items()
-                ]
-            )
-        )
-        self.img_info = img_info
-        LOGGER.info(
-            f"{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. "
-        )
-        return img_paths, labels
-    def get_mosaic(self, index):
-        """Gets images and labels after mosaic augments"""
-        indices = [index] + random.choices(
-            range(0, len(self.img_paths)), k=3
-        )  # 3 additional image indices
-        random.shuffle(indices)
-        imgs, hs, ws, labels = [], [], [], []
-        for index in indices:
-            img, _, (h, w) = self.load_image(index)
-            labels_per_img = self.labels[index]
-            imgs.append(img)
-            hs.append(h)
-            ws.append(w)
-            labels.append(labels_per_img)
-        img, labels = mosaic_augmentation(self.img_size, imgs, hs, ws, labels, self.hyp)
-        return img, labels
-    def general_augment(self, img, labels):
-        """Gets images and labels after general augment
-        This function applies hsv, random ud-flip and random lr-flips augments.
-        """
-        nl = len(labels)
-        # HSV color-space
-        augment_hsv(
-            img,
-            hgain=self.hyp["hsv_h"],
-            sgain=self.hyp["hsv_s"],
-            vgain=self.hyp["hsv_v"],
-        )
-        # Flip up-down
-        if random.random() < self.hyp["flipud"]:
-            img = np.flipud(img)
-            if nl:
-                labels[:, 2] = 1 - labels[:, 2]
-        # Flip left-right
-        if random.random() < self.hyp["fliplr"]:
-            img = np.fliplr(img)
-            if nl:
-                labels[:, 1] = 1 - labels[:, 1]
-        return img, labels
-    def sort_files_shapes(self):
-        # Sort by aspect ratio
-        batch_num = self.batch_indices[-1] + 1
-        s = self.shapes  # wh
-        ar = s[:, 1] / s[:, 0]  # aspect ratio
-        irect = ar.argsort()
-        self.img_paths = [self.img_paths[i] for i in irect]
-        self.labels = [self.labels[i] for i in irect]
-        self.shapes = s[irect]  # wh
-        ar = ar[irect]
-        # Set training image shapes
-        shapes = [[1, 1]] * batch_num
-        for i in range(batch_num):
-            ari = ar[self.batch_indices == i]
-            mini, maxi = ari.min(), ari.max()
-            if maxi < 1:
-                shapes[i] = [maxi, 1]
-            elif mini > 1:
-                shapes[i] = [1, 1 / mini]
-        self.batch_shapes = (
-            np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype(
-                np.int
-            )
-            * self.stride
-        )
-    @staticmethod
-    def check_image(im_file):
-        # verify an image.
-        nc, msg = 0, ""
-        try:
-            im = Image.open(im_file)
-            im.verify()  # PIL verify
-            shape = im.size  # (width, height)
-            im_exif = im._getexif()
-            if im_exif and ORIENTATION in im_exif:
-                rotation = im_exif[ORIENTATION]
-                if rotation in (6, 8):
-                    shape = (shape[1], shape[0])
-            assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
-            assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
-            if im.format.lower() in ("jpg", "jpeg"):
-                with open(im_file, "rb") as f:
-                    f.seek(-2, 2)
-                    if f.read() != b"\xff\xd9":  # corrupt JPEG
-                        ImageOps.exif_transpose(Image.open(im_file)).save(
-                            im_file, "JPEG", subsampling=0, quality=100
-                        )
-                        msg += f"WARNING: {im_file}: corrupt JPEG restored and saved"
-            return im_file, shape, nc, msg
-        except Exception as e:
-            nc = 1
-            msg = f"WARNING: {im_file}: ignoring corrupt image: {e}"
-            return im_file, None, nc, msg
-    @staticmethod
-    def check_label_files(args):
-        img_path, lb_path = args
-        nm, nf, ne, nc, msg = 0, 0, 0, 0, ""  # number (missing, found, empty, message
-        try:
-            if osp.exists(lb_path):
-                nf = 1  # label found
-                with open(lb_path, "r") as f:
-                    labels = [
-                        x.split() for x in f.read().strip().splitlines() if len(x)
-                    ]
-                    labels = np.array(labels, dtype=np.float32)
-                if len(labels):
-                    assert all(
-                        len(l) == 5 for l in labels
-                    ), f"{lb_path}: wrong label format."
-                    assert (
-                        labels >= 0
-                    ).all(), f"{lb_path}: Label values error: all values in label file must > 0"
-                    assert (
-                        labels[:, 1:] <= 1
-                    ).all(), f"{lb_path}: Label values error: all coordinates must be normalized"
-                    _, indices = np.unique(labels, axis=0, return_index=True)
-                    if len(indices) < len(labels):  # duplicate row check
-                        labels = labels[indices]  # remove duplicates
-                        msg += f"WARNING: {lb_path}: {len(labels) - len(indices)} duplicate labels removed"
-                    labels = labels.tolist()
-                else:
-                    ne = 1  # label empty
-                    labels = []
-            else:
-                nm = 1  # label missing
-                labels = []
-            return img_path, labels, nc, nm, nf, ne, msg
-        except Exception as e:
-            nc = 1
-            msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}"
-            return img_path, None, nc, nm, nf, ne, msg
-    @staticmethod
-    def generate_coco_format_labels(img_info, class_names, save_path):
-        # for evaluation with pycocotools
-        dataset = {"categories": [], "annotations": [], "images": []}
-        for i, class_name in enumerate(class_names):
-            dataset["categories"].append(
-                {"id": i, "name": class_name, "supercategory": ""}
-            )
-        ann_id = 0
-        LOGGER.info(f"Convert to COCO format")
-        for i, (img_path, info) in enumerate(tqdm(img_info.items())):
-            labels = info["labels"] if info["labels"] else []
-            img_id = osp.splitext(osp.basename(img_path))[0]
-            img_id = int(img_id) if img_id.isnumeric() else img_id
-            img_w, img_h = info["shape"]
-            dataset["images"].append(
-                {
-                    "file_name": os.path.basename(img_path),
-                    "id": img_id,
-                    "width": img_w,
-                    "height": img_h,
-                }
-            )
-            if labels:
-                for label in labels:
-                    c, x, y, w, h = label[:5]
-                    # convert x,y,w,h to x1,y1,x2,y2
-                    x1 = (x - w / 2) * img_w
-                    y1 = (y - h / 2) * img_h
-                    x2 = (x + w / 2) * img_w
-                    y2 = (y + h / 2) * img_h
-                    # cls_id starts from 0
-                    cls_id = int(c)
-                    w = max(0, x2 - x1)
-                    h = max(0, y2 - y1)
-                    dataset["annotations"].append(
-                        {
-                            "area": h * w,
-                            "bbox": [x1, y1, w, h],
-                            "category_id": cls_id,
-                            "id": ann_id,
-                            "image_id": img_id,
-                            "iscrowd": 0,
-                            # mask
-                            "segmentation": [],
-                        }
-                    )
-                    ann_id += 1
-        with open(save_path, "w") as f:
-            json.dump(dataset, f)
-            LOGGER.info(
-                f"Convert to COCO format finished. Resutls saved in {save_path}"
-            )
-    @staticmethod
-    def get_hash(paths):
-        """Get the hash value of paths"""
-        assert isinstance(paths, list), "Only support list currently."
-        h = hashlib.md5("".join(paths).encode())
-        return h.hexdigest()

yolov6/data/vis_dataset.py DELETED Viewed

@@ -1,57 +0,0 @@
-# coding=utf-8
-# Description:  visualize yolo label image.
-import argparse
-import os
-import cv2
-import numpy as np
-IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
-def main(args):
-    img_dir, label_dir, class_names = args.img_dir, args.label_dir, args.class_names
-    label_map = dict()
-    for class_id, classname in enumerate(class_names):
-        label_map[class_id] = classname
-    for file in os.listdir(img_dir):
-        if file.split('.')[-1] not in IMG_FORMATS:
-            print(f'[Warning]: Non-image file {file}')
-            continue
-        img_path = os.path.join(img_dir, file)
-        label_path = os.path.join(label_dir, file[: file.rindex('.')] + '.txt')
-        try:
-            img_data = cv2.imread(img_path)
-            height, width, _ = img_data.shape
-            color = [tuple(np.random.choice(range(256), size=3)) for i in class_names]
-            thickness = 2
-            with open(label_path, 'r') as f:
-                for bbox in f:
-                    cls, x_c, y_c, w, h = [float(v) if i > 0 else int(v) for i, v in enumerate(bbox.split('\n')[0].split(' '))]
-                    x_tl = int((x_c - w / 2) * width)
-                    y_tl = int((y_c - h / 2) * height)
-                    cv2.rectangle(img_data, (x_tl, y_tl), (x_tl + int(w * width), y_tl + int(h * height)), tuple([int(x) for x in color[cls]]), thickness)
-                    cv2.putText(img_data, label_map[cls], (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 1, tuple([int(x) for x in color[cls]]), thickness)
-            cv2.imshow('image', img_data)
-            cv2.waitKey(0)
-        except Exception as e:
-            print(f'[Error]: {e} {img_path}')
-    print('======All Done!======')
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--img_dir', default='VOCdevkit/voc_07_12/images')
-    parser.add_argument('--label_dir', default='VOCdevkit/voc_07_12/labels')
-    parser.add_argument('--class_names', default=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
-        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'])
-    args = parser.parse_args()
-    print(args)
-    main(args)

yolov6/data/voc2yolo.py DELETED Viewed

@@ -1,99 +0,0 @@
-import xml.etree.ElementTree as ET
-from tqdm import tqdm
-import os
-import shutil
-import argparse
-# VOC dataset (refer https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml)
-# VOC2007 trainval: 446MB, 5012 images
-# VOC2007 test:     438MB, 4953 images
-# VOC2012 trainval: 1.95GB, 17126 images
-VOC_NAMES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
-        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
-def convert_label(path, lb_path, year, image_id):
-    def convert_box(size, box):
-        dw, dh = 1. / size[0], 1. / size[1]
-        x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
-        return x * dw, y * dh, w * dw, h * dh
-    in_file = open(os.path.join(path, f'VOC{year}/Annotations/{image_id}.xml'))
-    out_file = open(lb_path, 'w')
-    tree = ET.parse(in_file)
-    root = tree.getroot()
-    size = root.find('size')
-    w = int(size.find('width').text)
-    h = int(size.find('height').text)
-    for obj in root.iter('object'):
-        cls = obj.find('name').text
-        if cls in VOC_NAMES and not int(obj.find('difficult').text) == 1:
-            xmlbox = obj.find('bndbox')
-            bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
-            cls_id = VOC_NAMES.index(cls)  # class id
-            out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
-def gen_voc07_12(voc_path):
-    '''
-    Generate voc07+12 setting dataset:
-    train: # train images 16551 images
-        - images/train2012
-        - images/train2007
-        - images/val2012
-        - images/val2007
-    val: # val images (relative to 'path')  4952 images
-        - images/test2007
-    '''
-    dataset_root = os.path.join(voc_path, 'voc_07_12')
-    if not os.path.exists(dataset_root):
-        os.makedirs(dataset_root)
-    dataset_settings = {'train': ['train2007', 'val2007', 'train2012', 'val2012'], 'val':['test2007']}
-    for item in ['images', 'labels']:
-        for data_type, data_list in dataset_settings.items():
-            for data_name in data_list:
-                ori_path = os.path.join(voc_path, item, data_name)
-                new_path = os.path.join(dataset_root, item, data_type)
-                if not os.path.exists(new_path):
-                    os.makedirs(new_path)
-                print(f'[INFO]: Copying {ori_path} to {new_path}')
-                for file in os.listdir(ori_path):
-                    shutil.copy(os.path.join(ori_path, file), new_path)
-def main(args):
-    voc_path = args.voc_path
-    for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
-        imgs_path = os.path.join(voc_path, 'images', f'{image_set}')
-        lbs_path = os.path.join(voc_path, 'labels', f'{image_set}')
-        try:
-            with open(os.path.join(voc_path, f'VOC{year}/ImageSets/Main/{image_set}.txt'), 'r') as f:
-                image_ids = f.read().strip().split()
-            if not os.path.exists(imgs_path):
-                os.makedirs(imgs_path)
-            if not os.path.exists(lbs_path):
-                os.makedirs(lbs_path)
-            for id in tqdm(image_ids, desc=f'{image_set}{year}'):
-                f = os.path.join(voc_path, f'VOC{year}/JPEGImages/{id}.jpg')  # old img path
-                lb_path = os.path.join(lbs_path, f'{id}.txt')  # new label path
-                convert_label(voc_path, lb_path, year, id)  # convert labels to YOLO format
-                if os.path.exists(f):
-                    shutil.move(f, imgs_path)       # move image
-        except Exception as e:
-            print(f'[Warning]: {e} {year}{image_set} convert fail!')
-    gen_voc07_12(voc_path)
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--voc_path', default='VOCdevkit')
-    args = parser.parse_args()
-    print(args)
-    main(args)

yolov6/layers/common.py DELETED Viewed

@@ -1,501 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import warnings
-from pathlib import Path
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from yolov6.layers.dbb_transforms import *
-class SiLU(nn.Module):
-    '''Activation of SiLU'''
-    @staticmethod
-    def forward(x):
-        return x * torch.sigmoid(x)
-class Conv(nn.Module):
-    '''Normal Conv with SiLU activation'''
-    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
-        super().__init__()
-        padding = kernel_size // 2
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=bias,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.act = nn.SiLU()
-    def forward(self, x):
-        return self.act(self.bn(self.conv(x)))
-    def forward_fuse(self, x):
-        return self.act(self.conv(x))
-class SimConv(nn.Module):
-    '''Normal Conv with ReLU activation'''
-    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
-        super().__init__()
-        padding = kernel_size // 2
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=bias,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.act = nn.ReLU()
-    def forward(self, x):
-        return self.act(self.bn(self.conv(x)))
-    def forward_fuse(self, x):
-        return self.act(self.conv(x))
-class SimSPPF(nn.Module):
-    '''Simplified SPPF with ReLU activation'''
-    def __init__(self, in_channels, out_channels, kernel_size=5):
-        super().__init__()
-        c_ = in_channels // 2  # hidden channels
-        self.cv1 = SimConv(in_channels, c_, 1, 1)
-        self.cv2 = SimConv(c_ * 4, out_channels, 1, 1)
-        self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
-    def forward(self, x):
-        x = self.cv1(x)
-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore')
-            y1 = self.m(x)
-            y2 = self.m(y1)
-            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
-class Transpose(nn.Module):
-    '''Normal Transpose, default for upsampling'''
-    def __init__(self, in_channels, out_channels, kernel_size=2, stride=2):
-        super().__init__()
-        self.upsample_transpose = torch.nn.ConvTranspose2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            bias=True
-        )
-    def forward(self, x):
-        return self.upsample_transpose(x)
-class Concat(nn.Module):
-    def __init__(self, dimension=1):
-        super().__init__()
-        self.d = dimension
-    def forward(self, x):
-        return torch.cat(x, self.d)
-def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
-    '''Basic cell for rep-style block, including conv and bn'''
-    result = nn.Sequential()
-    result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
-                                                  kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False))
-    result.add_module('bn', nn.BatchNorm2d(num_features=out_channels))
-    return result
-class RepBlock(nn.Module):
-    '''
-        RepBlock is a stage block with rep-style basic block
-    '''
-    def __init__(self, in_channels, out_channels, n=1):
-        super().__init__()
-        self.conv1 = RepVGGBlock(in_channels, out_channels)
-        self.block = nn.Sequential(*(RepVGGBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
-    def forward(self, x):
-        x = self.conv1(x)
-        if self.block is not None:
-            x = self.block(x)
-        return x
-class RepVGGBlock(nn.Module):
-    '''RepVGGBlock is a basic rep-style block, including training and deploy status
-    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
-    '''
-    def __init__(self, in_channels, out_channels, kernel_size=3,
-                 stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False):
-        super(RepVGGBlock, self).__init__()
-        """ Initialization of the class.
-        Args:
-            in_channels (int): Number of channels in the input image
-            out_channels (int): Number of channels produced by the convolution
-            kernel_size (int or tuple): Size of the convolving kernel
-            stride (int or tuple, optional): Stride of the convolution. Default: 1
-            padding (int or tuple, optional): Zero-padding added to both sides of
-                the input. Default: 1
-            dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
-            groups (int, optional): Number of blocked connections from input
-                channels to output channels. Default: 1
-            padding_mode (string, optional): Default: 'zeros'
-            deploy: Whether to be deploy status or training status. Default: False
-            use_se: Whether to use se. Default: False
-        """
-        self.deploy = deploy
-        self.groups = groups
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        assert kernel_size == 3
-        assert padding == 1
-        padding_11 = padding - kernel_size // 2
-        self.nonlinearity = nn.ReLU()
-        if use_se:
-            raise NotImplementedError("se block not supported yet")
-        else:
-            self.se = nn.Identity()
-        if deploy:
-            self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
-                                         padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
-        else:
-            self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None
-            self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
-            self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)
-    def forward(self, inputs):
-        '''Forward process'''
-        if hasattr(self, 'rbr_reparam'):
-            return self.nonlinearity(self.se(self.rbr_reparam(inputs)))
-        if self.rbr_identity is None:
-            id_out = 0
-        else:
-            id_out = self.rbr_identity(inputs)
-        return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
-    def get_equivalent_kernel_bias(self):
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
-        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
-        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
-        if kernel1x1 is None:
-            return 0
-        else:
-            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
-    def _fuse_bn_tensor(self, branch):
-        if branch is None:
-            return 0, 0
-        if isinstance(branch, nn.Sequential):
-            kernel = branch.conv.weight
-            running_mean = branch.bn.running_mean
-            running_var = branch.bn.running_var
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn.eps
-        else:
-            assert isinstance(branch, nn.BatchNorm2d)
-            if not hasattr(self, 'id_tensor'):
-                input_dim = self.in_channels // self.groups
-                kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
-                for i in range(self.in_channels):
-                    kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
-            kernel = self.id_tensor
-            running_mean = branch.running_mean
-            running_var = branch.running_var
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch.eps
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape(-1, 1, 1, 1)
-        return kernel * t, beta - running_mean * gamma / std
-    def switch_to_deploy(self):
-        if hasattr(self, 'rbr_reparam'):
-            return
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels,
-                                     kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride,
-                                     padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True)
-        self.rbr_reparam.weight.data = kernel
-        self.rbr_reparam.bias.data = bias
-        for para in self.parameters():
-            para.detach_()
-        self.__delattr__('rbr_dense')
-        self.__delattr__('rbr_1x1')
-        if hasattr(self, 'rbr_identity'):
-            self.__delattr__('rbr_identity')
-        if hasattr(self, 'id_tensor'):
-            self.__delattr__('id_tensor')
-        self.deploy = True
-def conv_bn_v2(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,
-               padding_mode='zeros'):
-    conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
-                           stride=stride, padding=padding, dilation=dilation, groups=groups,
-                           bias=False, padding_mode=padding_mode)
-    bn_layer = nn.BatchNorm2d(num_features=out_channels, affine=True)
-    se = nn.Sequential()
-    se.add_module('conv', conv_layer)
-    se.add_module('bn', bn_layer)
-    return se
-class IdentityBasedConv1x1(nn.Conv2d):
-    def __init__(self, channels, groups=1):
-        super(IdentityBasedConv1x1, self).__init__(in_channels=channels, out_channels=channels, kernel_size=1, stride=1, padding=0, groups=groups, bias=False)
-        assert channels % groups == 0
-        input_dim = channels // groups
-        id_value = np.zeros((channels, input_dim, 1, 1))
-        for i in range(channels):
-            id_value[i, i % input_dim, 0, 0] = 1
-        self.id_tensor = torch.from_numpy(id_value).type_as(self.weight)
-        nn.init.zeros_(self.weight)
-    def forward(self, input):
-        kernel = self.weight + self.id_tensor.to(self.weight.device)
-        result = F.conv2d(input, kernel, None, stride=1, padding=0, dilation=self.dilation, groups=self.groups)
-        return result
-    def get_actual_kernel(self):
-        return self.weight + self.id_tensor.to(self.weight.device)
-class BNAndPadLayer(nn.Module):
-    def __init__(self,
-                 pad_pixels,
-                 num_features,
-                 eps=1e-5,
-                 momentum=0.1,
-                 affine=True,
-                 track_running_stats=True):
-        super(BNAndPadLayer, self).__init__()
-        self.bn = nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats)
-        self.pad_pixels = pad_pixels
-    def forward(self, input):
-        output = self.bn(input)
-        if self.pad_pixels > 0:
-            if self.bn.affine:
-                pad_values = self.bn.bias.detach() - self.bn.running_mean * self.bn.weight.detach() / torch.sqrt(self.bn.running_var + self.bn.eps)
-            else:
-                pad_values = - self.bn.running_mean / torch.sqrt(self.bn.running_var + self.bn.eps)
-            output = F.pad(output, [self.pad_pixels] * 4)
-            pad_values = pad_values.view(1, -1, 1, 1)
-            output[:, :, 0:self.pad_pixels, :] = pad_values
-            output[:, :, -self.pad_pixels:, :] = pad_values
-            output[:, :, :, 0:self.pad_pixels] = pad_values
-            output[:, :, :, -self.pad_pixels:] = pad_values
-        return output
-    @property
-    def bn_weight(self):
-        return self.bn.weight
-    @property
-    def bn_bias(self):
-        return self.bn.bias
-    @property
-    def running_mean(self):
-        return self.bn.running_mean
-    @property
-    def running_var(self):
-        return self.bn.running_var
-    @property
-    def eps(self):
-        return self.bn.eps
-class DBBBlock(nn.Module):
-    '''
-        RepBlock is a stage block with rep-style basic block
-    '''
-    def __init__(self, in_channels, out_channels, n=1):
-        super().__init__()
-        self.conv1 = DiverseBranchBlock(in_channels, out_channels)
-        self.block = nn.Sequential(*(DiverseBranchBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
-    def forward(self, x):
-        x = self.conv1(x)
-        if self.block is not None:
-            x = self.block(x)
-        return x
-class DiverseBranchBlock(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=3,
-                 stride=1, padding=1, dilation=1, groups=1,
-                 internal_channels_1x1_3x3=None,
-                 deploy=False, nonlinear=nn.ReLU(), single_init=False):
-        super(DiverseBranchBlock, self).__init__()
-        self.deploy = deploy
-        if nonlinear is None:
-            self.nonlinear = nn.Identity()
-        else:
-            self.nonlinear = nonlinear
-        self.kernel_size = kernel_size
-        self.out_channels = out_channels
-        self.groups = groups
-        assert padding == kernel_size // 2
-        if deploy:
-            self.dbb_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
-                                      padding=padding, dilation=dilation, groups=groups, bias=True)
-        else:
-            self.dbb_origin = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups)
-            self.dbb_avg = nn.Sequential()
-            if groups < out_channels:
-                self.dbb_avg.add_module('conv',
-                                        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1,
-                                                  stride=1, padding=0, groups=groups, bias=False))
-                self.dbb_avg.add_module('bn', BNAndPadLayer(pad_pixels=padding, num_features=out_channels))
-                self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0))
-                self.dbb_1x1 = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride,
-                                       padding=0, groups=groups)
-            else:
-                self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding))
-            self.dbb_avg.add_module('avgbn', nn.BatchNorm2d(out_channels))
-            if internal_channels_1x1_3x3 is None:
-                internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels   # For mobilenet, it is better to have 2X internal channels
-            self.dbb_1x1_kxk = nn.Sequential()
-            if internal_channels_1x1_3x3 == in_channels:
-                self.dbb_1x1_kxk.add_module('idconv1', IdentityBasedConv1x1(channels=in_channels, groups=groups))
-            else:
-                self.dbb_1x1_kxk.add_module('conv1', nn.Conv2d(in_channels=in_channels, out_channels=internal_channels_1x1_3x3,
-                                                            kernel_size=1, stride=1, padding=0, groups=groups, bias=False))
-            self.dbb_1x1_kxk.add_module('bn1', BNAndPadLayer(pad_pixels=padding, num_features=internal_channels_1x1_3x3, affine=True))
-            self.dbb_1x1_kxk.add_module('conv2', nn.Conv2d(in_channels=internal_channels_1x1_3x3, out_channels=out_channels,
-                                                            kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=False))
-            self.dbb_1x1_kxk.add_module('bn2', nn.BatchNorm2d(out_channels))
-        #   The experiments reported in the paper used the default initialization of bn.weight (all as 1). But changing the initialization may be useful in some cases.
-        if single_init:
-            #   Initialize the bn.weight of dbb_origin as 1 and others as 0. This is not the default setting.
-            self.single_init()
-    def get_equivalent_kernel_bias(self):
-        k_origin, b_origin = transI_fusebn(self.dbb_origin.conv.weight, self.dbb_origin.bn)
-        if hasattr(self, 'dbb_1x1'):
-            k_1x1, b_1x1 = transI_fusebn(self.dbb_1x1.conv.weight, self.dbb_1x1.bn)
-            k_1x1 = transVI_multiscale(k_1x1, self.kernel_size)
-        else:
-            k_1x1, b_1x1 = 0, 0
-        if hasattr(self.dbb_1x1_kxk, 'idconv1'):
-            k_1x1_kxk_first = self.dbb_1x1_kxk.idconv1.get_actual_kernel()
-        else:
-            k_1x1_kxk_first = self.dbb_1x1_kxk.conv1.weight
-        k_1x1_kxk_first, b_1x1_kxk_first = transI_fusebn(k_1x1_kxk_first, self.dbb_1x1_kxk.bn1)
-        k_1x1_kxk_second, b_1x1_kxk_second = transI_fusebn(self.dbb_1x1_kxk.conv2.weight, self.dbb_1x1_kxk.bn2)
-        k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(k_1x1_kxk_first, b_1x1_kxk_first, k_1x1_kxk_second, b_1x1_kxk_second, groups=self.groups)
-        k_avg = transV_avg(self.out_channels, self.kernel_size, self.groups)
-        k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg.to(self.dbb_avg.avgbn.weight.device), self.dbb_avg.avgbn)
-        if hasattr(self.dbb_avg, 'conv'):
-            k_1x1_avg_first, b_1x1_avg_first = transI_fusebn(self.dbb_avg.conv.weight, self.dbb_avg.bn)
-            k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(k_1x1_avg_first, b_1x1_avg_first, k_1x1_avg_second, b_1x1_avg_second, groups=self.groups)
-        else:
-            k_1x1_avg_merged, b_1x1_avg_merged = k_1x1_avg_second, b_1x1_avg_second
-        return transII_addbranch((k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged), (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged))
-    def switch_to_deploy(self):
-        if hasattr(self, 'dbb_reparam'):
-            return
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.dbb_reparam = nn.Conv2d(in_channels=self.dbb_origin.conv.in_channels, out_channels=self.dbb_origin.conv.out_channels,
-                                     kernel_size=self.dbb_origin.conv.kernel_size, stride=self.dbb_origin.conv.stride,
-                                     padding=self.dbb_origin.conv.padding, dilation=self.dbb_origin.conv.dilation, groups=self.dbb_origin.conv.groups, bias=True)
-        self.dbb_reparam.weight.data = kernel
-        self.dbb_reparam.bias.data = bias
-        for para in self.parameters():
-            para.detach_()
-        self.__delattr__('dbb_origin')
-        self.__delattr__('dbb_avg')
-        if hasattr(self, 'dbb_1x1'):
-            self.__delattr__('dbb_1x1')
-        self.__delattr__('dbb_1x1_kxk')
-    def forward(self, inputs):
-        if hasattr(self, 'dbb_reparam'):
-            return self.nonlinear(self.dbb_reparam(inputs))
-        out = self.dbb_origin(inputs)
-        if hasattr(self, 'dbb_1x1'):
-            out += self.dbb_1x1(inputs)
-        out += self.dbb_avg(inputs)
-        out += self.dbb_1x1_kxk(inputs)
-        return self.nonlinear(out)
-    def init_gamma(self, gamma_value):
-        if hasattr(self, "dbb_origin"):
-            torch.nn.init.constant_(self.dbb_origin.bn.weight, gamma_value)
-        if hasattr(self, "dbb_1x1"):
-            torch.nn.init.constant_(self.dbb_1x1.bn.weight, gamma_value)
-        if hasattr(self, "dbb_avg"):
-            torch.nn.init.constant_(self.dbb_avg.avgbn.weight, gamma_value)
-        if hasattr(self, "dbb_1x1_kxk"):
-            torch.nn.init.constant_(self.dbb_1x1_kxk.bn2.weight, gamma_value)
-    def single_init(self):
-        self.init_gamma(0.0)
-        if hasattr(self, "dbb_origin"):
-            torch.nn.init.constant_(self.dbb_origin.bn.weight, 1.0)
-class DetectBackend(nn.Module):
-    def __init__(self, weights='yolov6s.pt', device=None, dnn=True):
-        super().__init__()
-        assert isinstance(weights, str) and Path(weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.'
-        from yolov6.utils.checkpoint import load_checkpoint
-        model = load_checkpoint(weights, map_location=device)
-        stride = int(model.stride.max())
-        self.__dict__.update(locals())  # assign all variables to self
-    def forward(self, im, val=False):
-        y = self.model(im)
-        if isinstance(y, np.ndarray):
-            y = torch.tensor(y, device=self.device)
-        return y

yolov6/layers/dbb_transforms.py DELETED Viewed

@@ -1,50 +0,0 @@
-import torch
-import numpy as np
-import torch.nn.functional as F
-def transI_fusebn(kernel, bn):
-    gamma = bn.weight
-    std = (bn.running_var + bn.eps).sqrt()
-    return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std
-def transII_addbranch(kernels, biases):
-    return sum(kernels), sum(biases)
-def transIII_1x1_kxk(k1, b1, k2, b2, groups):
-    if groups == 1:
-        k = F.conv2d(k2, k1.permute(1, 0, 2, 3))      #
-        b_hat = (k2 * b1.reshape(1, -1, 1, 1)).sum((1, 2, 3))
-    else:
-        k_slices = []
-        b_slices = []
-        k1_T = k1.permute(1, 0, 2, 3)
-        k1_group_width = k1.size(0) // groups
-        k2_group_width = k2.size(0) // groups
-        for g in range(groups):
-            k1_T_slice = k1_T[:, g*k1_group_width:(g+1)*k1_group_width, :, :]
-            k2_slice = k2[g*k2_group_width:(g+1)*k2_group_width, :, :, :]
-            k_slices.append(F.conv2d(k2_slice, k1_T_slice))
-            b_slices.append((k2_slice * b1[g*k1_group_width:(g+1)*k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3)))
-        k, b_hat = transIV_depthconcat(k_slices, b_slices)
-    return k, b_hat + b2
-def transIV_depthconcat(kernels, biases):
-    return torch.cat(kernels, dim=0), torch.cat(biases)
-def transV_avg(channels, kernel_size, groups):
-    input_dim = channels // groups
-    k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
-    k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
-    return k
-#   This has not been tested with non-square kernels (kernel.size(2) != kernel.size(3)) nor even-size kernels
-def transVI_multiscale(kernel, target_kernel_size):
-    H_pixels_to_pad = (target_kernel_size - kernel.size(2)) // 2
-    W_pixels_to_pad = (target_kernel_size - kernel.size(3)) // 2
-    return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad])

yolov6/models/efficientrep.py DELETED Viewed

@@ -1,102 +0,0 @@
-from torch import nn
-from yolov6.layers.common import RepVGGBlock, RepBlock, SimSPPF
-class EfficientRep(nn.Module):
-    '''EfficientRep Backbone
-    EfficientRep is handcrafted by hardware-aware neural network design.
-    With rep-style struct, EfficientRep is friendly to high-computation hardware(e.g. GPU).
-    '''
-    def __init__(
-        self,
-        in_channels=3,
-        channels_list=None,
-        num_repeats=None,
-    ):
-        super().__init__()
-        assert channels_list is not None
-        assert num_repeats is not None
-        self.stem = RepVGGBlock(
-            in_channels=in_channels,
-            out_channels=channels_list[0],
-            kernel_size=3,
-            stride=2
-        )
-        self.ERBlock_2 = nn.Sequential(
-            RepVGGBlock(
-                in_channels=channels_list[0],
-                out_channels=channels_list[1],
-                kernel_size=3,
-                stride=2
-            ),
-            RepBlock(
-                in_channels=channels_list[1],
-                out_channels=channels_list[1],
-                n=num_repeats[1]
-            )
-        )
-        self.ERBlock_3 = nn.Sequential(
-            RepVGGBlock(
-                in_channels=channels_list[1],
-                out_channels=channels_list[2],
-                kernel_size=3,
-                stride=2
-            ),
-            RepBlock(
-                in_channels=channels_list[2],
-                out_channels=channels_list[2],
-                n=num_repeats[2]
-            )
-        )
-        self.ERBlock_4 = nn.Sequential(
-            RepVGGBlock(
-                in_channels=channels_list[2],
-                out_channels=channels_list[3],
-                kernel_size=3,
-                stride=2
-            ),
-            RepBlock(
-                in_channels=channels_list[3],
-                out_channels=channels_list[3],
-                n=num_repeats[3]
-            )
-        )
-        self.ERBlock_5 = nn.Sequential(
-            RepVGGBlock(
-                in_channels=channels_list[3],
-                out_channels=channels_list[4],
-                kernel_size=3,
-                stride=2,
-            ),
-            RepBlock(
-                in_channels=channels_list[4],
-                out_channels=channels_list[4],
-                n=num_repeats[4]
-            ),
-            SimSPPF(
-                in_channels=channels_list[4],
-                out_channels=channels_list[4],
-                kernel_size=5
-            )
-        )
-    def forward(self, x):
-        outputs = []
-        x = self.stem(x)
-        x = self.ERBlock_2(x)
-        x = self.ERBlock_3(x)
-        outputs.append(x)
-        x = self.ERBlock_4(x)
-        outputs.append(x)
-        x = self.ERBlock_5(x)
-        outputs.append(x)
-        return tuple(outputs)

yolov6/models/effidehead.py DELETED Viewed

@@ -1,211 +0,0 @@
-import torch
-import torch.nn as nn
-import math
-from yolov6.layers.common import *
-class Detect(nn.Module):
-    '''Efficient Decoupled Head
-    With hardware-aware degisn, the decoupled head is optimized with
-    hybridchannels methods.
-    '''
-    def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_layers=None):  # detection layer
-        super().__init__()
-        assert head_layers is not None
-        self.nc = num_classes  # number of classes
-        self.no = num_classes + 5  # number of outputs per anchor
-        self.nl = num_layers  # number of detection layers
-        if isinstance(anchors, (list, tuple)):
-            self.na = len(anchors[0]) // 2
-        else:
-            self.na = anchors
-        self.anchors = anchors
-        self.grid = [torch.zeros(1)] * num_layers
-        self.prior_prob = 1e-2
-        self.inplace = inplace
-        stride = [8, 16, 32]  # strides computed during build
-        self.stride = torch.tensor(stride)
-        # Init decouple head
-        self.cls_convs = nn.ModuleList()
-        self.reg_convs = nn.ModuleList()
-        self.cls_preds = nn.ModuleList()
-        self.reg_preds = nn.ModuleList()
-        self.obj_preds = nn.ModuleList()
-        self.stems = nn.ModuleList()
-        # Efficient decoupled head layers
-        for i in range(num_layers):
-            idx = i*6
-            self.stems.append(head_layers[idx])
-            self.cls_convs.append(head_layers[idx+1])
-            self.reg_convs.append(head_layers[idx+2])
-            self.cls_preds.append(head_layers[idx+3])
-            self.reg_preds.append(head_layers[idx+4])
-            self.obj_preds.append(head_layers[idx+5])
-    def initialize_biases(self):
-        for conv in self.cls_preds:
-            b = conv.bias.view(self.na, -1)
-            b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
-            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        for conv in self.obj_preds:
-            b = conv.bias.view(self.na, -1)
-            b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
-            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-    def forward(self, x):
-        z = []
-        for i in range(self.nl):
-            x[i] = self.stems[i](x[i])
-            cls_x = x[i]
-            reg_x = x[i]
-            cls_feat = self.cls_convs[i](cls_x)
-            cls_output = self.cls_preds[i](cls_feat)
-            reg_feat = self.reg_convs[i](reg_x)
-            reg_output = self.reg_preds[i](reg_feat)
-            obj_output = self.obj_preds[i](reg_feat)
-            if self.training:
-                x[i] = torch.cat([reg_output, obj_output, cls_output], 1)
-                bs, _, ny, nx = x[i].shape
-                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
-            else:
-                y = torch.cat([reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1)
-                bs, _, ny, nx = y.shape
-                y = y.view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
-                if self.grid[i].shape[2:4] != y.shape[2:4]:
-                    d = self.stride.device
-                    yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
-                    self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float()
-                if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
-                    y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh
-                else:
-                    xy = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
-                    wh = torch.exp(y[..., 2:4]) * self.stride[i]  # wh
-                    y = torch.cat((xy, wh, y[..., 4:]), -1)
-                z.append(y.view(bs, -1, self.no))
-        return x if self.training else torch.cat(z, 1)
-def build_effidehead_layer(channels_list, num_anchors, num_classes):
-    head_layers = nn.Sequential(
-        # stem0
-        Conv(
-            in_channels=channels_list[6],
-            out_channels=channels_list[6],
-            kernel_size=1,
-            stride=1
-        ),
-        # cls_conv0
-        Conv(
-            in_channels=channels_list[6],
-            out_channels=channels_list[6],
-            kernel_size=3,
-            stride=1
-        ),
-        # reg_conv0
-        Conv(
-            in_channels=channels_list[6],
-            out_channels=channels_list[6],
-            kernel_size=3,
-            stride=1
-        ),
-        # cls_pred0
-        nn.Conv2d(
-            in_channels=channels_list[6],
-            out_channels=num_classes * num_anchors,
-            kernel_size=1
-        ),
-        # reg_pred0
-        nn.Conv2d(
-            in_channels=channels_list[6],
-            out_channels=4 * num_anchors,
-            kernel_size=1
-        ),
-        # obj_pred0
-        nn.Conv2d(
-            in_channels=channels_list[6],
-            out_channels=1 * num_anchors,
-            kernel_size=1
-        ),
-        # stem1
-        Conv(
-            in_channels=channels_list[8],
-            out_channels=channels_list[8],
-            kernel_size=1,
-            stride=1
-        ),
-        # cls_conv1
-        Conv(
-            in_channels=channels_list[8],
-            out_channels=channels_list[8],
-            kernel_size=3,
-            stride=1
-        ),
-        # reg_conv1
-        Conv(
-            in_channels=channels_list[8],
-            out_channels=channels_list[8],
-            kernel_size=3,
-            stride=1
-        ),
-        # cls_pred1
-        nn.Conv2d(
-            in_channels=channels_list[8],
-            out_channels=num_classes * num_anchors,
-            kernel_size=1
-        ),
-        # reg_pred1
-        nn.Conv2d(
-            in_channels=channels_list[8],
-            out_channels=4 * num_anchors,
-            kernel_size=1
-        ),
-        # obj_pred1
-        nn.Conv2d(
-            in_channels=channels_list[8],
-            out_channels=1 * num_anchors,
-            kernel_size=1
-        ),
-        # stem2
-        Conv(
-            in_channels=channels_list[10],
-            out_channels=channels_list[10],
-            kernel_size=1,
-            stride=1
-        ),
-        # cls_conv2
-        Conv(
-            in_channels=channels_list[10],
-            out_channels=channels_list[10],
-            kernel_size=3,
-            stride=1
-        ),
-        # reg_conv2
-        Conv(
-            in_channels=channels_list[10],
-            out_channels=channels_list[10],
-            kernel_size=3,
-            stride=1
-        ),
-        # cls_pred2
-        nn.Conv2d(
-            in_channels=channels_list[10],
-            out_channels=num_classes * num_anchors,
-            kernel_size=1
-        ),
-        # reg_pred2
-        nn.Conv2d(
-            in_channels=channels_list[10],
-            out_channels=4 * num_anchors,
-            kernel_size=1
-        ),
-        # obj_pred2
-        nn.Conv2d(
-            in_channels=channels_list[10],
-            out_channels=1 * num_anchors,
-            kernel_size=1
-        )
-    )
-    return head_layers

yolov6/models/end2end.py DELETED Viewed

@@ -1,147 +0,0 @@
-import torch
-import torch.nn as nn
-import random
-class ORT_NMS(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx,
-                boxes,
-                scores,
-                max_output_boxes_per_class=torch.tensor([100]),
-                iou_threshold=torch.tensor([0.45]),
-                score_threshold=torch.tensor([0.25])):
-        device = boxes.device
-        batch = scores.shape[0]
-        num_det = random.randint(0, 100)
-        batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
-        idxs = torch.arange(100, 100 + num_det).to(device)
-        zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
-        selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
-        selected_indices = selected_indices.to(torch.int64)
-        return selected_indices
-    @staticmethod
-    def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
-        return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
-class TRT_NMS(torch.autograd.Function):
-    @staticmethod
-    def forward(
-        ctx,
-        boxes,
-        scores,
-        background_class=-1,
-        box_coding=1,
-        iou_threshold=0.45,
-        max_output_boxes=100,
-        plugin_version="1",
-        score_activation=0,
-        score_threshold=0.25,
-    ):
-        batch_size, num_boxes, num_classes = scores.shape
-        num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
-        det_boxes = torch.randn(batch_size, max_output_boxes, 4)
-        det_scores = torch.randn(batch_size, max_output_boxes)
-        det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
-        return num_det, det_boxes, det_scores, det_classes
-    @staticmethod
-    def symbolic(g,
-                 boxes,
-                 scores,
-                 background_class=-1,
-                 box_coding=1,
-                 iou_threshold=0.45,
-                 max_output_boxes=100,
-                 plugin_version="1",
-                 score_activation=0,
-                 score_threshold=0.25):
-        out = g.op("TRT::EfficientNMS_TRT",
-                     boxes,
-                     scores,
-                     background_class_i=background_class,
-                     box_coding_i=box_coding,
-                     iou_threshold_f=iou_threshold,
-                     max_output_boxes_i=max_output_boxes,
-                     plugin_version_s=plugin_version,
-                     score_activation_i=score_activation,
-                     score_threshold_f=score_threshold,
-                     outputs=4)
-        nums, boxes, scores, classes = out
-        return nums,boxes,scores,classes
-class ONNX_ORT(nn.Module):
-    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
-        super().__init__()
-        self.device = device if device else torch.device("cpu")
-        self.max_obj = torch.tensor([max_obj]).to(device)
-        self.iou_threshold = torch.tensor([iou_thres]).to(device)
-        self.score_threshold = torch.tensor([score_thres]).to(device)
-        self.max_wh = max_wh
-        self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
-                                           dtype=torch.float32,
-                                           device=self.device)
-    def forward(self, x):
-        box = x[:, :, :4]
-        conf = x[:, :, 4:5]
-        score = x[:, :, 5:]
-        score *= conf
-        box @= self.convert_matrix
-        objScore, objCls = score.max(2, keepdim=True)
-        dis = objCls.float() * self.max_wh
-        nmsbox = box + dis
-        objScore1 = objScore.transpose(1, 2).contiguous()
-        selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold)
-        X, Y = selected_indices[:, 0], selected_indices[:, 2]
-        resBoxes = box[X, Y, :]
-        resClasses = objCls[X, Y, :].float()
-        resScores = objScore[X, Y, :]
-        X = X.unsqueeze(1).float()
-        return torch.cat([X, resBoxes, resClasses, resScores], 1)
-class ONNX_TRT(nn.Module):
-    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None):
-        super().__init__()
-        assert max_wh is None
-        self.device = device if device else torch.device('cpu')
-        self.background_class = -1,
-        self.box_coding = 1,
-        self.iou_threshold = iou_thres
-        self.max_obj = max_obj
-        self.plugin_version = '1'
-        self.score_activation = 0
-        self.score_threshold = score_thres
-    def forward(self, x):
-        box = x[:, :, :4]
-        conf = x[:, :, 4:5]
-        score = x[:, :, 5:]
-        score *= conf
-        num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding,
-                                                                    self.iou_threshold, self.max_obj,
-                                                                    self.plugin_version, self.score_activation,
-                                                                    self.score_threshold)
-        return num_det, det_boxes, det_scores, det_classes
-class End2End(nn.Module):
-    def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
-        super().__init__()
-        device = device if device else torch.device('cpu')
-        self.model = model.to(device)
-        self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
-        self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
-        self.end2end.eval()
-    def forward(self, x):
-        x = self.model(x)
-        x = self.end2end(x)
-        return x

yolov6/models/loss.py DELETED Viewed

@@ -1,411 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# The code is based on
-# https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
-# Copyright (c) Megvii, Inc. and its affiliates.
-import torch
-import torch.nn as nn
-import numpy as np
-import torch.nn.functional as F
-from yolov6.utils.figure_iou import IOUloss, pairwise_bbox_iou
-class ComputeLoss:
-    '''Loss computation func.
-    This func contains SimOTA and siou loss.
-    '''
-    def __init__(self,
-                 reg_weight=5.0,
-                 iou_weight=3.0,
-                 cls_weight=1.0,
-                 center_radius=2.5,
-                 eps=1e-7,
-                 in_channels=[256, 512, 1024],
-                 strides=[8, 16, 32],
-                 n_anchors=1,
-                 iou_type='ciou'
-                 ):
-        self.reg_weight = reg_weight
-        self.iou_weight = iou_weight
-        self.cls_weight = cls_weight
-        self.center_radius = center_radius
-        self.eps = eps
-        self.n_anchors = n_anchors
-        self.strides = strides
-        self.grids = [torch.zeros(1)] * len(in_channels)
-        # Define criteria
-        self.l1_loss = nn.L1Loss(reduction="none")
-        self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
-        self.iou_loss = IOUloss(iou_type=iou_type, reduction="none")
-    def __call__(
-        self,
-        outputs,
-        targets
-    ):
-        dtype = outputs[0].type()
-        device = targets.device
-        loss_cls, loss_obj, loss_iou, loss_l1 = torch.zeros(1, device=device), torch.zeros(1, device=device), \
-            torch.zeros(1, device=device), torch.zeros(1, device=device)
-        num_classes = outputs[0].shape[-1] - 5
-        outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides = self.get_outputs_and_grids(
-            outputs, self.strides, dtype, device)
-        total_num_anchors = outputs.shape[1]
-        bbox_preds = outputs[:, :, :4]  # [batch, n_anchors_all, 4]
-        bbox_preds_org = outputs_origin[:, :, :4]  # [batch, n_anchors_all, 4]
-        obj_preds = outputs[:, :, 4].unsqueeze(-1)  # [batch, n_anchors_all, 1]
-        cls_preds = outputs[:, :, 5:]  # [batch, n_anchors_all, n_cls]
-        # targets
-        batch_size = bbox_preds.shape[0]
-        targets_list = np.zeros((batch_size, 1, 5)).tolist()
-        for i, item in enumerate(targets.cpu().numpy().tolist()):
-            targets_list[int(item[0])].append(item[1:])
-        max_len = max((len(l) for l in targets_list))
-        targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
-        num_targets_list = (targets.sum(dim=2) > 0).sum(dim=1)  # number of objects
-        num_fg, num_gts = 0, 0
-        cls_targets, reg_targets, l1_targets, obj_targets, fg_masks = [], [], [], [], []
-        for batch_idx in range(batch_size):
-            num_gt = int(num_targets_list[batch_idx])
-            num_gts += num_gt
-            if num_gt == 0:
-                cls_target = outputs.new_zeros((0, num_classes))
-                reg_target = outputs.new_zeros((0, 4))
-                l1_target = outputs.new_zeros((0, 4))
-                obj_target = outputs.new_zeros((total_num_anchors, 1))
-                fg_mask = outputs.new_zeros(total_num_anchors).bool()
-            else:
-                gt_bboxes_per_image = targets[batch_idx, :num_gt, 1:5].mul_(gt_bboxes_scale)
-                gt_classes = targets[batch_idx, :num_gt, 0]
-                bboxes_preds_per_image = bbox_preds[batch_idx]
-                cls_preds_per_image = cls_preds[batch_idx]
-                obj_preds_per_image = obj_preds[batch_idx]
-                try:
-                    (
-                        gt_matched_classes,
-                        fg_mask,
-                        pred_ious_this_matching,
-                        matched_gt_inds,
-                        num_fg_img,
-                    ) = self.get_assignments(
-                        batch_idx,
-                        num_gt,
-                        total_num_anchors,
-                        gt_bboxes_per_image,
-                        gt_classes,
-                        bboxes_preds_per_image,
-                        cls_preds_per_image,
-                        obj_preds_per_image,
-                        expanded_strides,
-                        xy_shifts,
-                        num_classes
-                    )
-                except RuntimeError:
-                    print(
-                        "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
-                           CPU mode is applied in this batch. If you want to avoid this issue, \
-                           try to reduce the batch size or image size."
-                    )
-                    torch.cuda.empty_cache()
-                    print("------------CPU Mode for This Batch-------------")
-                    _gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
-                    _gt_classes = gt_classes.cpu().float()
-                    _bboxes_preds_per_image = bboxes_preds_per_image.cpu().float()
-                    _cls_preds_per_image = cls_preds_per_image.cpu().float()
-                    _obj_preds_per_image = obj_preds_per_image.cpu().float()
-                    _expanded_strides = expanded_strides.cpu().float()
-                    _xy_shifts = xy_shifts.cpu()
-                    (
-                        gt_matched_classes,
-                        fg_mask,
-                        pred_ious_this_matching,
-                        matched_gt_inds,
-                        num_fg_img,
-                    ) = self.get_assignments(
-                        batch_idx,
-                        num_gt,
-                        total_num_anchors,
-                        _gt_bboxes_per_image,
-                        _gt_classes,
-                        _bboxes_preds_per_image,
-                        _cls_preds_per_image,
-                        _obj_preds_per_image,
-                        _expanded_strides,
-                        _xy_shifts,
-                        num_classes
-                    )
-                    gt_matched_classes = gt_matched_classes.cuda()
-                    fg_mask = fg_mask.cuda()
-                    pred_ious_this_matching = pred_ious_this_matching.cuda()
-                    matched_gt_inds = matched_gt_inds.cuda()
-                torch.cuda.empty_cache()
-                num_fg += num_fg_img
-                if num_fg_img > 0:
-                    cls_target = F.one_hot(
-                        gt_matched_classes.to(torch.int64), num_classes
-                    ) * pred_ious_this_matching.unsqueeze(-1)
-                    obj_target = fg_mask.unsqueeze(-1)
-                    reg_target = gt_bboxes_per_image[matched_gt_inds]
-                    l1_target = self.get_l1_target(
-                        outputs.new_zeros((num_fg_img, 4)),
-                        gt_bboxes_per_image[matched_gt_inds],
-                        expanded_strides[0][fg_mask],
-                        xy_shifts=xy_shifts[0][fg_mask],
-                    )
-            cls_targets.append(cls_target)
-            reg_targets.append(reg_target)
-            obj_targets.append(obj_target)
-            l1_targets.append(l1_target)
-            fg_masks.append(fg_mask)
-        cls_targets = torch.cat(cls_targets, 0)
-        reg_targets = torch.cat(reg_targets, 0)
-        obj_targets = torch.cat(obj_targets, 0)
-        l1_targets = torch.cat(l1_targets, 0)
-        fg_masks = torch.cat(fg_masks, 0)
-        num_fg = max(num_fg, 1)
-        # loss
-        loss_iou += (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks].T, reg_targets)).sum() / num_fg
-        loss_l1 += (self.l1_loss(bbox_preds_org.view(-1, 4)[fg_masks], l1_targets)).sum() / num_fg
-        loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets*1.0)).sum() / num_fg
-        loss_cls += (self.bcewithlog_loss(cls_preds.view(-1, num_classes)[fg_masks], cls_targets)).sum() / num_fg
-        total_losses = self.reg_weight * loss_iou + loss_l1 + loss_obj + loss_cls
-        return total_losses, torch.cat((self.reg_weight * loss_iou, loss_l1, loss_obj, loss_cls)).detach()
-    def decode_output(self, output, k, stride, dtype, device):
-        grid = self.grids[k].to(device)
-        batch_size = output.shape[0]
-        hsize, wsize = output.shape[2:4]
-        if grid.shape[2:4] != output.shape[2:4]:
-            yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)])
-            grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype).to(device)
-            self.grids[k] = grid
-        output = output.reshape(batch_size, self.n_anchors * hsize * wsize, -1)
-        output_origin = output.clone()
-        grid = grid.view(1, -1, 2)
-        output[..., :2] = (output[..., :2] + grid) * stride
-        output[..., 2:4] = torch.exp(output[..., 2:4]) * stride
-        return output, output_origin, grid, hsize, wsize
-    def get_outputs_and_grids(self, outputs, strides, dtype, device):
-        xy_shifts = []
-        expanded_strides = []
-        outputs_new = []
-        outputs_origin = []
-        for k, output in enumerate(outputs):
-            output, output_origin, grid, feat_h, feat_w = self.decode_output(
-                output, k, strides[k], dtype, device)
-            xy_shift = grid
-            expanded_stride = torch.full((1, grid.shape[1], 1), strides[k], dtype=grid.dtype, device=grid.device)
-            xy_shifts.append(xy_shift)
-            expanded_strides.append(expanded_stride)
-            outputs_new.append(output)
-            outputs_origin.append(output_origin)
-        xy_shifts = torch.cat(xy_shifts, 1)  # [1, n_anchors_all, 2]
-        expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1]
-        outputs_origin = torch.cat(outputs_origin, 1)
-        outputs = torch.cat(outputs_new, 1)
-        feat_h *= strides[-1]
-        feat_w *= strides[-1]
-        gt_bboxes_scale = torch.Tensor([[feat_w, feat_h, feat_w, feat_h]]).type_as(outputs)
-        return outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides
-    def get_l1_target(self, l1_target, gt, stride, xy_shifts, eps=1e-8):
-        l1_target[:, 0:2] = gt[:, 0:2] / stride - xy_shifts
-        l1_target[:, 2:4] = torch.log(gt[:, 2:4] / stride + eps)
-        return l1_target
-    @torch.no_grad()
-    def get_assignments(
-        self,
-        batch_idx,
-        num_gt,
-        total_num_anchors,
-        gt_bboxes_per_image,
-        gt_classes,
-        bboxes_preds_per_image,
-        cls_preds_per_image,
-        obj_preds_per_image,
-        expanded_strides,
-        xy_shifts,
-        num_classes
-    ):
-        fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(
-            gt_bboxes_per_image,
-            expanded_strides,
-            xy_shifts,
-            total_num_anchors,
-            num_gt,
-        )
-        bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
-        cls_preds_ = cls_preds_per_image[fg_mask]
-        obj_preds_ = obj_preds_per_image[fg_mask]
-        num_in_boxes_anchor = bboxes_preds_per_image.shape[0]
-        # cost
-        pair_wise_ious = pairwise_bbox_iou(gt_bboxes_per_image, bboxes_preds_per_image, box_format='xywh')
-        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
-        gt_cls_per_image = (
-            F.one_hot(gt_classes.to(torch.int64), num_classes)
-            .float()
-            .unsqueeze(1)
-            .repeat(1, num_in_boxes_anchor, 1)
-        )
-        with torch.cuda.amp.autocast(enabled=False):
-            cls_preds_ = (
-                cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
-                * obj_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
-            )
-            pair_wise_cls_loss = F.binary_cross_entropy(
-                cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
-            ).sum(-1)
-        del cls_preds_, obj_preds_
-        cost = (
-            self.cls_weight * pair_wise_cls_loss
-            + self.iou_weight * pair_wise_ious_loss
-            + 100000.0 * (~is_in_boxes_and_center)
-        )
-        (
-            num_fg,
-            gt_matched_classes,
-            pred_ious_this_matching,
-            matched_gt_inds,
-        ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask)
-        del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss
-        return (
-            gt_matched_classes,
-            fg_mask,
-            pred_ious_this_matching,
-            matched_gt_inds,
-            num_fg,
-        )
-    def get_in_boxes_info(
-        self,
-        gt_bboxes_per_image,
-        expanded_strides,
-        xy_shifts,
-        total_num_anchors,
-        num_gt,
-    ):
-        expanded_strides_per_image = expanded_strides[0]
-        xy_shifts_per_image = xy_shifts[0] * expanded_strides_per_image
-        xy_centers_per_image = (
-            (xy_shifts_per_image + 0.5 * expanded_strides_per_image)
-            .unsqueeze(0)
-            .repeat(num_gt, 1, 1)
-        )  # [n_anchor, 2] -> [n_gt, n_anchor, 2]
-        gt_bboxes_per_image_lt = (
-            (gt_bboxes_per_image[:, 0:2] - 0.5 * gt_bboxes_per_image[:, 2:4])
-            .unsqueeze(1)
-            .repeat(1, total_num_anchors, 1)
-        )
-        gt_bboxes_per_image_rb = (
-            (gt_bboxes_per_image[:, 0:2] + 0.5 * gt_bboxes_per_image[:, 2:4])
-            .unsqueeze(1)
-            .repeat(1, total_num_anchors, 1)
-        )  # [n_gt, 2] -> [n_gt, n_anchor, 2]
-        b_lt = xy_centers_per_image - gt_bboxes_per_image_lt
-        b_rb = gt_bboxes_per_image_rb - xy_centers_per_image
-        bbox_deltas = torch.cat([b_lt, b_rb], 2)
-        is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0
-        is_in_boxes_all = is_in_boxes.sum(dim=0) > 0
-        # in fixed center
-        gt_bboxes_per_image_lt = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
-            1, total_num_anchors, 1
-        ) - self.center_radius * expanded_strides_per_image.unsqueeze(0)
-        gt_bboxes_per_image_rb = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
-            1, total_num_anchors, 1
-        ) + self.center_radius * expanded_strides_per_image.unsqueeze(0)
-        c_lt = xy_centers_per_image - gt_bboxes_per_image_lt
-        c_rb = gt_bboxes_per_image_rb - xy_centers_per_image
-        center_deltas = torch.cat([c_lt, c_rb], 2)
-        is_in_centers = center_deltas.min(dim=-1).values > 0.0
-        is_in_centers_all = is_in_centers.sum(dim=0) > 0
-        # in boxes and in centers
-        is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all
-        is_in_boxes_and_center = (
-            is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
-        )
-        return is_in_boxes_anchor, is_in_boxes_and_center
-    def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
-        matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)
-        ious_in_boxes_matrix = pair_wise_ious
-        n_candidate_k = min(10, ious_in_boxes_matrix.size(1))
-        topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1)
-        dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)
-        dynamic_ks = dynamic_ks.tolist()
-        for gt_idx in range(num_gt):
-            _, pos_idx = torch.topk(
-                cost[gt_idx], k=dynamic_ks[gt_idx], largest=False
-            )
-            matching_matrix[gt_idx][pos_idx] = 1
-        del topk_ious, dynamic_ks, pos_idx
-        anchor_matching_gt = matching_matrix.sum(0)
-        if (anchor_matching_gt > 1).sum() > 0:
-            _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
-            matching_matrix[:, anchor_matching_gt > 1] *= 0
-            matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1
-        fg_mask_inboxes = matching_matrix.sum(0) > 0
-        num_fg = fg_mask_inboxes.sum().item()
-        fg_mask[fg_mask.clone()] = fg_mask_inboxes
-        matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)
-        gt_matched_classes = gt_classes[matched_gt_inds]
-        pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[
-            fg_mask_inboxes
-        ]
-        return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds

yolov6/models/reppan.py DELETED Viewed

@@ -1,108 +0,0 @@
-import torch
-from torch import nn
-from yolov6.layers.common import RepBlock, SimConv, Transpose
-class RepPANNeck(nn.Module):
-    """RepPANNeck Module
-    EfficientRep is the default backbone of this model.
-    RepPANNeck has the balance of feature fusion ability and hardware efficiency.
-    """
-    def __init__(
-        self,
-        channels_list=None,
-        num_repeats=None
-    ):
-        super().__init__()
-        assert channels_list is not None
-        assert num_repeats is not None
-        self.Rep_p4 = RepBlock(
-            in_channels=channels_list[3] + channels_list[5],
-            out_channels=channels_list[5],
-            n=num_repeats[5],
-        )
-        self.Rep_p3 = RepBlock(
-            in_channels=channels_list[2] + channels_list[6],
-            out_channels=channels_list[6],
-            n=num_repeats[6]
-        )
-        self.Rep_n3 = RepBlock(
-            in_channels=channels_list[6] + channels_list[7],
-            out_channels=channels_list[8],
-            n=num_repeats[7],
-        )
-        self.Rep_n4 = RepBlock(
-            in_channels=channels_list[5] + channels_list[9],
-            out_channels=channels_list[10],
-            n=num_repeats[8]
-        )
-        self.reduce_layer0 = SimConv(
-            in_channels=channels_list[4],
-            out_channels=channels_list[5],
-            kernel_size=1,
-            stride=1
-        )
-        self.upsample0 = Transpose(
-            in_channels=channels_list[5],
-            out_channels=channels_list[5],
-        )
-        self.reduce_layer1 = SimConv(
-            in_channels=channels_list[5],
-            out_channels=channels_list[6],
-            kernel_size=1,
-            stride=1
-        )
-        self.upsample1 = Transpose(
-            in_channels=channels_list[6],
-            out_channels=channels_list[6]
-        )
-        self.downsample2 = SimConv(
-            in_channels=channels_list[6],
-            out_channels=channels_list[7],
-            kernel_size=3,
-            stride=2
-        )
-        self.downsample1 = SimConv(
-            in_channels=channels_list[8],
-            out_channels=channels_list[9],
-            kernel_size=3,
-            stride=2
-        )
-    def forward(self, input):
-        (x2, x1, x0) = input
-        fpn_out0 = self.reduce_layer0(x0)
-        upsample_feat0 = self.upsample0(fpn_out0)
-        f_concat_layer0 = torch.cat([upsample_feat0, x1], 1)
-        f_out0 = self.Rep_p4(f_concat_layer0)
-        fpn_out1 = self.reduce_layer1(f_out0)
-        upsample_feat1 = self.upsample1(fpn_out1)
-        f_concat_layer1 = torch.cat([upsample_feat1, x2], 1)
-        pan_out2 = self.Rep_p3(f_concat_layer1)
-        down_feat1 = self.downsample2(pan_out2)
-        p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
-        pan_out1 = self.Rep_n3(p_concat_layer1)
-        down_feat0 = self.downsample1(pan_out1)
-        p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
-        pan_out0 = self.Rep_n4(p_concat_layer2)
-        outputs = [pan_out2, pan_out1, pan_out0]
-        return outputs

yolov6/models/yolo.py DELETED Viewed

@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import math
-import torch.nn as nn
-from yolov6.layers.common import *
-from yolov6.utils.torch_utils import initialize_weights
-from yolov6.models.efficientrep import EfficientRep
-from yolov6.models.reppan import RepPANNeck
-from yolov6.models.effidehead import Detect, build_effidehead_layer
-class Model(nn.Module):
-    '''YOLOv6 model with backbone, neck and head.
-    The default parts are EfficientRep Backbone, Rep-PAN and
-    Efficient Decoupled Head.
-    '''
-    def __init__(self, config, channels=3, num_classes=None, anchors=None):  # model, input channels, number of classes
-        super().__init__()
-        # Build network
-        num_layers = config.model.head.num_layers
-        self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)
-        # Init Detect head
-        begin_indices = config.model.head.begin_indices
-        out_indices_head = config.model.head.out_indices
-        self.stride = self.detect.stride
-        self.detect.i = begin_indices
-        self.detect.f = out_indices_head
-        self.detect.initialize_biases()
-        # Init weights
-        initialize_weights(self)
-    def forward(self, x):
-        x = self.backbone(x)
-        x = self.neck(x)
-        x = self.detect(x)
-        return x
-    def _apply(self, fn):
-        self = super()._apply(fn)
-        self.detect.stride = fn(self.detect.stride)
-        self.detect.grid = list(map(fn, self.detect.grid))
-        return self
-def make_divisible(x, divisor):
-    # Upward revision the value x to make it evenly divisible by the divisor.
-    return math.ceil(x / divisor) * divisor
-def build_network(config, channels, num_classes, anchors, num_layers):
-    depth_mul = config.model.depth_multiple
-    width_mul = config.model.width_multiple
-    num_repeat_backbone = config.model.backbone.num_repeats
-    channels_list_backbone = config.model.backbone.out_channels
-    num_repeat_neck = config.model.neck.num_repeats
-    channels_list_neck = config.model.neck.out_channels
-    num_anchors = config.model.head.anchors
-    num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)]
-    channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)]
-    backbone = EfficientRep(
-        in_channels=channels,
-        channels_list=channels_list,
-        num_repeats=num_repeat
-    )
-    neck = RepPANNeck(
-        channels_list=channels_list,
-        num_repeats=num_repeat
-    )
-    head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes)
-    head = Detect(num_classes, anchors, num_layers, head_layers=head_layers)
-    return backbone, neck, head
-def build_model(cfg, num_classes, device):
-    model = Model(cfg, channels=3, num_classes=num_classes, anchors=cfg.model.head.anchors).to(device)
-    return model

yolov6/solver/build.py DELETED Viewed

@@ -1,42 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-import math
-import torch
-import torch.nn as nn
-def build_optimizer(cfg, model):
-    """ Build optimizer from cfg file."""
-    g_bnw, g_w, g_b = [], [], []
-    for v in model.modules():
-        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
-            g_b.append(v.bias)
-        if isinstance(v, nn.BatchNorm2d):
-            g_bnw.append(v.weight)
-        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
-            g_w.append(v.weight)
-    assert cfg.solver.optim == 'SGD' or 'Adam', 'ERROR: unknown optimizer, use SGD defaulted'
-    if cfg.solver.optim == 'SGD':
-        optimizer = torch.optim.SGD(g_bnw, lr=cfg.solver.lr0, momentum=cfg.solver.momentum, nesterov=True)
-    elif cfg.solver.optim == 'Adam':
-        optimizer = torch.optim.Adam(g_bnw, lr=cfg.solver.lr0, betas=(cfg.solver.momentum, 0.999))
-    optimizer.add_param_group({'params': g_w, 'weight_decay': cfg.solver.weight_decay})
-    optimizer.add_param_group({'params': g_b})
-    del g_bnw, g_w, g_b
-    return optimizer
-def build_lr_scheduler(cfg, optimizer, epochs):
-    """Build learning rate scheduler from cfg file."""
-    if cfg.solver.lr_scheduler == 'Cosine':
-        lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg.solver.lrf - 1) + 1
-    else:
-        LOGGER.error('unknown lr scheduler, use Cosine defaulted')
-    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-    return scheduler, lf

yolov6/utils/Arial.ttf DELETED Viewed

Binary file (773 kB)

yolov6/utils/checkpoint.py DELETED Viewed

@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-import shutil
-import torch
-import os.path as osp
-from yolov6.utils.events import LOGGER
-from yolov6.utils.torch_utils import fuse_model
-def load_state_dict(weights, model, map_location=None):
-    """Load weights from checkpoint file, only assign weights those layers' name and shape are match."""
-    ckpt = torch.load(weights, map_location=map_location)
-    state_dict = ckpt['model'].float().state_dict()
-    model_state_dict = model.state_dict()
-    state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict and v.shape == model_state_dict[k].shape}
-    model.load_state_dict(state_dict, strict=False)
-    del ckpt, state_dict, model_state_dict
-    return model
-def load_checkpoint(weights, map_location=None, inplace=True, fuse=True):
-    """Load model from checkpoint file."""
-    LOGGER.info("Loading checkpoint from {}".format(weights))
-    ckpt = torch.load(weights, map_location=map_location)  # load
-    model = ckpt['ema' if ckpt.get('ema') else 'model'].float()
-    if fuse:
-        LOGGER.info("\nFusing model...")
-        model = fuse_model(model).eval()
-    else:
-        model = model.eval()
-    return model
-def save_checkpoint(ckpt, is_best, save_dir, model_name=""):
-    """ Save checkpoint to the disk."""
-    if not osp.exists(save_dir):
-        os.makedirs(save_dir)
-    filename = osp.join(save_dir, model_name + '.pt')
-    torch.save(ckpt, filename)
-    if is_best:
-        best_filename = osp.join(save_dir, 'best_ckpt.pt')
-        shutil.copyfile(filename, best_filename)
-def strip_optimizer(ckpt_dir, epoch):
-    for s in ['best', 'last']:
-        ckpt_path = osp.join(ckpt_dir, '{}_ckpt.pt'.format(s))
-        if not osp.exists(ckpt_path):
-            continue
-        ckpt = torch.load(ckpt_path, map_location=torch.device('cpu'))
-        if ckpt.get('ema'):
-            ckpt['model'] = ckpt['ema']  # replace model with ema
-        for k in ['optimizer', 'ema', 'updates']:  # keys
-            ckpt[k] = None
-        ckpt['epoch'] = epoch
-        ckpt['model'].half()  # to FP16
-        for p in ckpt['model'].parameters():
-            p.requires_grad = False
-        torch.save(ckpt, ckpt_path)

yolov6/utils/config.py DELETED Viewed

@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# The code is based on
-# https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
-# Copyright (c) OpenMMLab.
-import os.path as osp
-import shutil
-import sys
-import tempfile
-from importlib import import_module
-from addict import Dict
-class ConfigDict(Dict):
-    def __missing__(self, name):
-        raise KeyError(name)
-    def __getattr__(self, name):
-        try:
-            value = super(ConfigDict, self).__getattr__(name)
-        except KeyError:
-            ex = AttributeError("'{}' object has no attribute '{}'".format(
-                self.__class__.__name__, name))
-        except Exception as e:
-            ex = e
-        else:
-            return value
-        raise ex
-class Config(object):
-    @staticmethod
-    def _file2dict(filename):
-        filename = str(filename)
-        if filename.endswith('.py'):
-            with tempfile.TemporaryDirectory() as temp_config_dir:
-                shutil.copyfile(filename,
-                                osp.join(temp_config_dir, '_tempconfig.py'))
-                sys.path.insert(0, temp_config_dir)
-                mod = import_module('_tempconfig')
-                sys.path.pop(0)
-                cfg_dict = {
-                    name: value
-                    for name, value in mod.__dict__.items()
-                    if not name.startswith('__')
-                }
-                # delete imported module
-                del sys.modules['_tempconfig']
-        else:
-            raise IOError('Only .py type are supported now!')
-        cfg_text = filename + '\n'
-        with open(filename, 'r') as f:
-            cfg_text += f.read()
-        return cfg_dict, cfg_text
-    @staticmethod
-    def fromfile(filename):
-        cfg_dict, cfg_text = Config._file2dict(filename)
-        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)
-    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
-        if cfg_dict is None:
-            cfg_dict = dict()
-        elif not isinstance(cfg_dict, dict):
-            raise TypeError('cfg_dict must be a dict, but got {}'.format(
-                type(cfg_dict)))
-        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
-        super(Config, self).__setattr__('_filename', filename)
-        if cfg_text:
-            text = cfg_text
-        elif filename:
-            with open(filename, 'r') as f:
-                text = f.read()
-        else:
-            text = ''
-        super(Config, self).__setattr__('_text', text)
-    @property
-    def filename(self):
-        return self._filename
-    @property
-    def text(self):
-        return self._text
-    def __repr__(self):
-        return 'Config (path: {}): {}'.format(self.filename,
-                                              self._cfg_dict.__repr__())
-    def __getattr__(self, name):
-        return getattr(self._cfg_dict, name)
-    def __setattr__(self, name, value):
-        if isinstance(value, dict):
-            value = ConfigDict(value)
-        self._cfg_dict.__setattr__(name, value)

yolov6/utils/ema.py DELETED Viewed

@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# The code is based on
-# https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py
-import math
-from copy import deepcopy
-import torch
-import torch.nn as nn
-class ModelEMA:
-    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
-    Keep a moving average of everything in the model state_dict (parameters and buffers).
-    This is intended to allow functionality like
-    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
-    A smoothed version of the weights is necessary for some training schemes to perform well.
-    This class is sensitive where it is initialized in the sequence of model init,
-    GPU assignment and distributed training wrappers.
-    """
-    def __init__(self, model, decay=0.9999, updates=0):
-        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
-        self.updates = updates
-        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
-        for param in self.ema.parameters():
-            param.requires_grad_(False)
-    def update(self, model):
-        with torch.no_grad():
-            self.updates += 1
-            decay = self.decay(self.updates)
-            state_dict = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
-            for k, item in self.ema.state_dict().items():
-                if item.dtype.is_floating_point:
-                    item *= decay
-                    item += (1 - decay) * state_dict[k].detach()
-    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
-        copy_attr(self.ema, model, include, exclude)
-def copy_attr(a, b, include=(), exclude=()):
-    """Copy attributes from one instance and set them to another instance."""
-    for k, item in b.__dict__.items():
-        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
-            continue
-        else:
-            setattr(a, k, item)
-def is_parallel(model):
-    # Return True if model's type is DP or DDP, else False.
-    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
-def de_parallel(model):
-    # De-parallelize a model. Return single-GPU model if model's type is DP or DDP.
-    return model.module if is_parallel(model) else model

yolov6/utils/envs.py DELETED Viewed

@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import os
-import random
-import numpy as np
-import torch
-import torch.backends.cudnn as cudnn
-from yolov6.utils.events import LOGGER
-def get_envs():
-    """Get PyTorch needed environments from system envirionments."""
-    local_rank = int(os.getenv('LOCAL_RANK', -1))
-    rank = int(os.getenv('RANK', -1))
-    world_size = int(os.getenv('WORLD_SIZE', 1))
-    return local_rank, rank, world_size
-def select_device(device):
-    """Set devices' information to the program.
-    Args:
-        device: a string, like 'cpu' or '1,2,3,4'
-    Returns:
-        torch.device
-    """
-    if device == 'cpu':
-        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-        LOGGER.info('Using CPU for training... ')
-    elif device:
-        os.environ['CUDA_VISIBLE_DEVICES'] = device
-        assert torch.cuda.is_available()
-        nd = len(device.strip().split(','))
-        LOGGER.info(f'Using {nd} GPU for training... ')
-    cuda = device != 'cpu' and torch.cuda.is_available()
-    device = torch.device('cuda:0' if cuda else 'cpu')
-    return device
-def set_random_seed(seed, deterministic=False):
-    """ Set random state to random libray, numpy, torch and cudnn.
-    Args:
-        seed: int value.
-        deterministic: bool value.
-    """
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if deterministic:
-        cudnn.deterministic = True
-        cudnn.benchmark = False
-    else:
-        cudnn.deterministic = False
-        cudnn.benchmark = True

yolov6/utils/events.py DELETED Viewed

@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import os
-import yaml
-import logging
-import shutil
-def set_logging(name=None):
-    rank = int(os.getenv('RANK', -1))
-    logging.basicConfig(format="%(message)s", level=logging.INFO if (rank in (-1, 0)) else logging.WARNING)
-    return logging.getLogger(name)
-LOGGER = set_logging(__name__)
-NCOLS = shutil.get_terminal_size().columns
-def load_yaml(file_path):
-    """Load data from yaml file."""
-    if isinstance(file_path, str):
-        with open(file_path, errors='ignore') as f:
-            data_dict = yaml.safe_load(f)
-    return data_dict
-def save_yaml(data_dict, save_path):
-    """Save data to yaml file"""
-    with open(save_path, 'w') as f:
-        yaml.safe_dump(data_dict, f, sort_keys=False)
-def write_tblog(tblogger, epoch, results, losses):
-    """Display mAP and loss information to log."""
-    tblogger.add_scalar("val/mAP@0.5", results[0], epoch + 1)
-    tblogger.add_scalar("val/mAP@0.50:0.95", results[1], epoch + 1)
-    tblogger.add_scalar("train/iou_loss", losses[0], epoch + 1)
-    tblogger.add_scalar("train/l1_loss", losses[1], epoch + 1)
-    tblogger.add_scalar("train/obj_loss", losses[2], epoch + 1)
-    tblogger.add_scalar("train/cls_loss", losses[3], epoch + 1)

yolov6/utils/figure_iou.py DELETED Viewed

@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import math
-import torch
-class IOUloss:
-    """ Calculate IoU loss.
-    """
-    def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-7):
-        """ Setting of the class.
-        Args:
-            box_format: (string), must be one of 'xywh' or 'xyxy'.
-            iou_type: (string), can be one of 'ciou', 'diou', 'giou' or 'siou'
-            reduction: (string), specifies the reduction to apply to the output, must be one of 'none', 'mean','sum'.
-            eps: (float), a value to avoid divide by zero error.
-        """
-        self.box_format = box_format
-        self.iou_type = iou_type.lower()
-        self.reduction = reduction
-        self.eps = eps
-    def __call__(self, box1, box2):
-        """ calculate iou. box1 and box2 are torch tensor with shape [M, 4] and [Nm 4].
-        """
-        box2 = box2.T
-        if self.box_format == 'xyxy':
-            b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-            b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-        elif self.box_format == 'xywh':
-            b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-            b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-            b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-            b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
-        # Intersection area
-        inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-                (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
-        # Union Area
-        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + self.eps
-        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + self.eps
-        union = w1 * h1 + w2 * h2 - inter + self.eps
-        iou = inter / union
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex width
-        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
-        if self.iou_type == 'giou':
-            c_area = cw * ch + self.eps  # convex area
-            iou = iou - (c_area - union) / c_area
-        elif self.iou_type in ['diou', 'ciou']:
-            c2 = cw ** 2 + ch ** 2 + self.eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
-                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
-            if self.iou_type == 'diou':
-                iou = iou - rho2 / c2
-            elif self.iou_type == 'ciou':
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
-                with torch.no_grad():
-                    alpha = v / (v - iou + (1 + self.eps))
-                iou = iou - (rho2 / c2 + v * alpha)
-        elif self.iou_type == 'siou':
-            # SIoU Loss https://arxiv.org/pdf/2205.12740.pdf
-            s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
-            s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
-            sigma = torch.pow(s_cw ** 2 + s_ch ** 2, 0.5)
-            sin_alpha_1 = torch.abs(s_cw) / sigma
-            sin_alpha_2 = torch.abs(s_ch) / sigma
-            threshold = pow(2, 0.5) / 2
-            sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
-            angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2)
-            rho_x = (s_cw / cw) ** 2
-            rho_y = (s_ch / ch) ** 2
-            gamma = angle_cost - 2
-            distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp(gamma * rho_y)
-            omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
-            omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)
-            shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow(1 - torch.exp(-1 * omiga_h), 4)
-            iou = iou - 0.5 * (distance_cost + shape_cost)
-        loss = 1.0 - iou
-        if self.reduction == 'sum':
-            loss = loss.sum()
-        elif self.reduction == 'mean':
-            loss = loss.mean()
-        return loss
-def pairwise_bbox_iou(box1, box2, box_format='xywh'):
-    """Calculate iou.
-    This code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/boxes.py
-    """
-    if box_format == 'xyxy':
-        lt = torch.max(box1[:, None, :2], box2[:, :2])
-        rb = torch.min(box1[:, None, 2:], box2[:, 2:])
-        area_1 = torch.prod(box1[:, 2:] - box1[:, :2], 1)
-        area_2 = torch.prod(box2[:, 2:] - box2[:, :2], 1)
-    elif box_format == 'xywh':
-        lt = torch.max(
-            (box1[:, None, :2] - box1[:, None, 2:] / 2),
-            (box2[:, :2] - box2[:, 2:] / 2),
-        )
-        rb = torch.min(
-            (box1[:, None, :2] + box1[:, None, 2:] / 2),
-            (box2[:, :2] + box2[:, 2:] / 2),
-        )
-        area_1 = torch.prod(box1[:, 2:], 1)
-        area_2 = torch.prod(box2[:, 2:], 1)
-    valid = (lt < rb).type(lt.type()).prod(dim=2)
-    inter = torch.prod(rb - lt, 2) * valid
-    return inter / (area_1[:, None] + area_2 - inter)

yolov6/utils/general.py DELETED Viewed

@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import os
-import glob
-from pathlib import Path
-def increment_name(path):
-    "increase save directory's id"
-    path = Path(path)
-    sep = ''
-    if path.exists():
-        path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
-        for n in range(1, 9999):
-            p = f'{path}{sep}{n}{suffix}'
-            if not os.path.exists(p):
-                break
-        path = Path(p)
-    return path
-def find_latest_checkpoint(search_dir='.'):
-    # Find the most recent saved checkpoint in search_dir
-    checkpoint_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
-    return max(checkpoint_list, key=os.path.getctime) if checkpoint_list else ''

yolov6/utils/nms.py DELETED Viewed

@@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-# The code is based on
-# https://github.com/ultralytics/yolov5/blob/master/utils/general.py
-import os
-import time
-import numpy as np
-import cv2
-import torch
-import torchvision
-# Settings
-torch.set_printoptions(linewidth=320, precision=5, profile='long')
-np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
-cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
-os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
-def xywh2xyxy(x):
-    # Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right
-    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
-    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
-    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
-    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
-    return y
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
-    """Runs Non-Maximum Suppression (NMS) on inference results.
-    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
-    Args:
-        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
-        conf_thres: (float) confidence threshold.
-        iou_thres: (float) iou threshold.
-        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
-        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
-        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
-        max_det:(int), max number of output bboxes.
-    Returns:
-         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
-    """
-    num_classes = prediction.shape[2] - 5  # number of classes
-    pred_candidates = prediction[..., 4] > conf_thres  # candidates
-    # Check the parameters.
-    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
-    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
-    # Function settings.
-    max_wh = 4096  # maximum box width and height
-    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
-    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
-    multi_label &= num_classes > 1  # multiple labels per box
-    tik = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
-    for img_idx, x in enumerate(prediction):  # image index, image inference
-        x = x[pred_candidates[img_idx]]  # confidence
-        # If no box remains, skip the next process.
-        if not x.shape[0]:
-            continue
-        # confidence multiply the objectness
-        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-        # (center x, center y, width, height) to (x1, y1, x2, y2)
-        box = xywh2xyxy(x[:, :4])
-        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
-        if multi_label:
-            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
-            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
-        else:  # Only keep the class with highest scores.
-            conf, class_idx = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
-        # Filter by class, only keep boxes whose category is in classes.
-        if classes is not None:
-            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
-        # Check shape
-        num_box = x.shape[0]  # number of boxes
-        if not num_box:  # no boxes kept.
-            continue
-        elif num_box > max_nms:  # excess max boxes' number.
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
-        # Batched NMS
-        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
-        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
-        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        if keep_box_idx.shape[0] > max_det:  # limit detections
-            keep_box_idx = keep_box_idx[:max_det]
-        output[img_idx] = x[keep_box_idx]
-        if (time.time() - tik) > time_limit:
-            print(f'WARNING: NMS cost time exceed the limited {time_limit}s.')
-            break  # time limit exceeded
-    return output

yolov6/utils/torch_utils.py DELETED Viewed

@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-import time
-from contextlib import contextmanager
-from copy import deepcopy
-import torch
-import torch.distributed as dist
-import torch.nn as nn
-import torch.nn.functional as F
-from yolov6.utils.events import LOGGER
-try:
-    import thop  # for FLOPs computation
-except ImportError:
-    thop = None
-@contextmanager
-def torch_distributed_zero_first(local_rank: int):
-    """
-    Decorator to make all processes in distributed training wait for each local_master to do something.
-    """
-    if local_rank not in [-1, 0]:
-        dist.barrier(device_ids=[local_rank])
-    yield
-    if local_rank == 0:
-        dist.barrier(device_ids=[0])
-def time_sync():
-    # Waits for all kernels in all streams on a CUDA device to complete if cuda is available.
-    if torch.cuda.is_available():
-        torch.cuda.synchronize()
-    return time.time()
-def initialize_weights(model):
-    for m in model.modules():
-        t = type(m)
-        if t is nn.Conv2d:
-            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-        elif t is nn.BatchNorm2d:
-            m.eps = 1e-3
-            m.momentum = 0.03
-        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
-            m.inplace = True
-def fuse_conv_and_bn(conv, bn):
-    # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
-    fusedconv = (
-        nn.Conv2d(
-            conv.in_channels,
-            conv.out_channels,
-            kernel_size=conv.kernel_size,
-            stride=conv.stride,
-            padding=conv.padding,
-            groups=conv.groups,
-            bias=True,
-        )
-        .requires_grad_(False)
-        .to(conv.weight.device)
-    )
-    # prepare filters
-    w_conv = conv.weight.clone().view(conv.out_channels, -1)
-    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
-    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
-    # prepare spatial bias
-    b_conv = (
-        torch.zeros(conv.weight.size(0), device=conv.weight.device)
-        if conv.bias is None
-        else conv.bias
-    )
-    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
-        torch.sqrt(bn.running_var + bn.eps)
-    )
-    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
-    return fusedconv
-def fuse_model(model):
-    from yolov6.layers.common import Conv
-    for m in model.modules():
-        if type(m) is Conv and hasattr(m, "bn"):
-            m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
-            delattr(m, "bn")  # remove batchnorm
-            m.forward = m.forward_fuse  # update forward
-    return model
-def get_model_info(model, img_size=640):
-    """Get model Params and GFlops.
-    Code base on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/model_utils.py
-    """
-    from thop import profile
-    stride = 32
-    img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
-    flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
-    params /= 1e6
-    flops /= 1e9
-    img_size = img_size if isinstance(img_size, list) else [img_size, img_size]
-    flops *= img_size[0] * img_size[1] / stride / stride * 2  # Gflops
-    info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
-    return info