Spaces:

nateraw
/

yolov6

Runtime error

App Files Files Community

yourusername commited on Jul 8, 2022

Commit

2a27594

1 Parent(s): e23d478

:beers: cheers

Browse files

Files changed (45) hide show

.gitattributes +2 -0
.gitignore +160 -0
README.md +1 -1
app.py +120 -0
coco.yaml +20 -0
data/coco.yaml +20 -0
data/dataset.yaml +11 -0
data/voc.yaml +11 -0
example_1.jpg +3 -0
example_1.mp4 +3 -0
example_2.jpg +3 -0
example_2.mp4 +3 -0
example_3.jpg +3 -0
example_3.mp4 +3 -0
inferer.py +238 -0
packages.txt +1 -0
pyproject.toml +7 -0
requirements.txt +15 -0
yolov6/core/engine.py +273 -0
yolov6/core/evaler.py +256 -0
yolov6/core/inferer.py +231 -0
yolov6/data/data_augment.py +193 -0
yolov6/data/data_load.py +113 -0
yolov6/data/datasets.py +550 -0
yolov6/data/vis_dataset.py +57 -0
yolov6/data/voc2yolo.py +99 -0
yolov6/layers/common.py +501 -0
yolov6/layers/dbb_transforms.py +50 -0
yolov6/models/efficientrep.py +102 -0
yolov6/models/effidehead.py +211 -0
yolov6/models/end2end.py +147 -0
yolov6/models/loss.py +411 -0
yolov6/models/reppan.py +108 -0
yolov6/models/yolo.py +83 -0
yolov6/solver/build.py +42 -0
yolov6/utils/Arial.ttf +0 -0
yolov6/utils/checkpoint.py +60 -0
yolov6/utils/config.py +101 -0
yolov6/utils/ema.py +59 -0
yolov6/utils/envs.py +54 -0
yolov6/utils/events.py +41 -0
yolov6/utils/figure_iou.py +114 -0
yolov6/utils/general.py +24 -0
yolov6/utils/nms.py +106 -0
yolov6/utils/torch_utils.py +110 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Yolov6
-emoji: 💩
 colorFrom: blue
 colorTo: purple
 sdk: gradio

 ---
 title: Yolov6
+emoji: 🔥😎🔥
 colorFrom: blue
 colorTo: purple
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import subprocess
+import tempfile
+import time
+from pathlib import Path
+import cv2
+import gradio as gr
+from inferer import Inferer
+pipeline = Inferer("nateraw/yolov6s")
+def fn_image(image, conf_thres, iou_thres):
+    return pipeline(image, conf_thres, iou_thres)
+def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
+    start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
+    end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
+    suffix = Path(video_file).suffix
+    clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
+    subprocess.call(
+        f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
+    )
+    # Reader of clip file
+    cap = cv2.VideoCapture(clip_temp_file.name)
+    # This is an intermediary temp file where we'll write the video to
+    # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness
+    # with ffmpeg at the end of the function here.
+    with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
+        out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720))
+        num_frames = 0
+        max_frames = duration * 30
+        while cap.isOpened():
+            try:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+            except Exception as e:
+                print(e)
+                continue
+            out.write(pipeline(frame, conf_thres, iou_thres))
+            num_frames += 1
+            print("Processed {} frames".format(num_frames))
+            if num_frames == max_frames:
+                break
+        out.release()
+        # Aforementioned hackiness
+        out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
+        subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
+    return out_file.name
+image_interface = gr.Interface(
+    fn=fn_image,
+    inputs=[
+        "image",
+        gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
+        gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
+    ],
+    outputs=gr.Image(type="file"),
+    examples=[["example_1.jpg", 0.5, 0.5], ["example_2.jpg", 0.25, 0.45], ["example_3.jpg", 0.25, 0.45]],
+    title="YOLOv6",
+    description=(
+        "Gradio demo for YOLOv6 for object detection on images. To use it, simply upload your image or click one of the"
+        " examples to load them. Read more at the links below."
+    ),
+    article=(
+        "<div style='text-align: center;'><a href='https://github.com/meituan/YOLOv6' target='_blank'>Github Repo</a> |"
+        " <center><img src='https://visitor-badge.glitch.me/badge?page_id=nateraw_yolov6' alt='visitor"
+        " badge'></center></div>"
+    ),
+    allow_flagging=False,
+    allow_screenshot=False,
+)
+video_interface = gr.Interface(
+    fn=fn_video,
+    inputs=[
+        gr.Video(type="file"),
+        gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
+        gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
+        gr.Slider(0, 10, value=0, label="Start Second", step=1),
+        gr.Slider(0, 3, value=2, label="Duration", step=1),
+    ],
+    outputs=gr.Video(type="file", format="mp4"),
+    examples=[
+        ["example_1.mp4", 0.25, 0.45, 0, 2],
+        ["example_2.mp4", 0.25, 0.45, 5, 3],
+        ["example_3.mp4", 0.25, 0.45, 6, 3],
+    ],
+    title="YOLOv6",
+    description=(
+        "Gradio demo for YOLOv6 for object detection on videos. To use it, simply upload your video or click one of the"
+        " examples to load them. Read more at the links below."
+    ),
+    article=(
+        "<div style='text-align: center;'><a href='https://github.com/meituan/YOLOv6' target='_blank'>Github Repo</a> |"
+        " <center><img src='https://visitor-badge.glitch.me/badge?page_id=nateraw_yolov6' alt='visitor"
+        " badge'></center></div>"
+    ),
+    allow_flagging=False,
+    allow_screenshot=False,
+)
+if __name__ == "__main__":
+    gr.TabbedInterface(
+        [video_interface, image_interface],
+        ["Run on Videos!", "Run on Images!"],
+    ).launch()

coco.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+# COCO 2017 dataset http://cocodataset.org
+train: ../coco/images/train2017 # 118287 images
+val: ../coco/images/val2017  # 5000 images
+test: ../coco/images/test2017
+anno_path: ../coco/annotations/instances_val2017.json
+# number of classes
+nc: 80
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: True
+# class names
+names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]

data/coco.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+# COCO 2017 dataset http://cocodataset.org
+train: ../coco/images/train2017 # 118287 images
+val: ../coco/images/val2017  # 5000 images
+test: ../coco/images/test2017
+anno_path: ../coco/annotations/instances_val2017.json
+# number of classes
+nc: 80
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: True
+# class names
+names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]

data/dataset.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR
+train: ../custom_dataset/images/train # train images
+val: ../custom_dataset/images/val # val images
+test: ../custom_dataset/images/test # test images (optional)
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: False
+# Classes
+nc: 20  # number of classes
+names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names

data/voc.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR
+train: VOCdevkit/voc_07_12/images/train # train images
+val: VOCdevkit/voc_07_12/images/val # val images
+test: VOCdevkit/voc_07_12/images/val # test images (optional)
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: False
+# Classes
+nc: 20  # number of classes
+names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names

example_1.jpg ADDED Viewed

Git LFS Details

SHA256: fb0f943cc1b027c64e5dbbc486b1d62d431244565e68b730e2ff8264d51116db
Pointer size: 130 Bytes
Size of remote file: 80.7 kB

example_1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52e97530eb82cb036d6cd3dc6f141fbeaa15461b3346a11649a64bda9be7e828
+size 3890679

example_2.jpg ADDED Viewed

Git LFS Details

SHA256: 90fad7bfc9fab88492d9722ea1aa14ee77514d8c10dbda86086bd37d8a05acf0
Pointer size: 131 Bytes
Size of remote file: 144 kB

example_2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6223d01dcc060f8598d0a79da33baaa6d4049087d650224e25771a670aee0a6a
+size 4137103

example_3.jpg ADDED Viewed

Git LFS Details

SHA256: 2fb0ce55959869151baba2d1197fb786615539d9184ea880983f7f6c4908e822
Pointer size: 131 Bytes
Size of remote file: 118 kB

example_3.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3974097c49918132965c02a121ec45e525d53216f61ccdcdd4a5247a193468ff
+size 4991487

inferer.py ADDED Viewed

	@@ -0,0 +1,238 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import math
+import os.path as osp
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from PIL import Image, ImageFont
+from yolov6.data.data_augment import letterbox
+from yolov6.layers.common import DetectBackend
+from yolov6.utils.events import LOGGER, load_yaml
+from yolov6.utils.nms import non_max_suppression
+class Inferer:
+    def __init__(self, model_id, device="cpu", yaml="coco.yaml", img_size=640, half=False):
+        self.__dict__.update(locals())
+        # Init model
+        self.img_size = img_size
+        cuda = device != "cpu" and torch.cuda.is_available()
+        self.device = torch.device("cuda:0" if cuda else "cpu")
+        self.model = DetectBackend(hf_hub_download(model_id, "model.pt"), device=self.device)
+        self.stride = self.model.stride
+        self.class_names = load_yaml(yaml)["names"]
+        self.img_size = self.check_img_size(self.img_size, s=self.stride)  # check image size
+        # Half precision
+        if half & (self.device.type != "cpu"):
+            self.model.model.half()
+        else:
+            self.model.model.float()
+            half = False
+        if self.device.type != "cpu":
+            self.model(
+                torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))
+            )  # warmup
+        # Switch model to deploy status
+        self.model_switch(self.model, self.img_size)
+    def model_switch(self, model, img_size):
+        """Model switch to deploy status"""
+        from yolov6.layers.common import RepVGGBlock
+        for layer in model.modules():
+            if isinstance(layer, RepVGGBlock):
+                layer.switch_to_deploy()
+        LOGGER.info("Switch model to deploy modality.")
+    def __call__(
+        self,
+        path_or_image,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic_nms=False,
+        max_det=1000,
+        hide_labels=False,
+        hide_conf=False,
+    ):
+        """Model Inference and results visualization"""
+        img, img_src = self.precess_image(path_or_image, self.img_size, self.stride, self.half)
+        img = img.to(self.device)
+        if len(img.shape) == 3:
+            img = img[None]
+            # expand for batch dim
+        pred_results = self.model(img)
+        det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+        gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+        img_ori = img_src
+        # check image and font
+        assert (
+            img_ori.data.contiguous
+        ), "Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im)."
+        self.font_check()
+        if len(det):
+            det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
+            for *xyxy, conf, cls in reversed(det):
+                class_num = int(cls)  # integer class
+                label = (
+                    None
+                    if hide_labels
+                    else (self.class_names[class_num] if hide_conf else f"{self.class_names[class_num]} {conf:.2f}")
+                )
+                self.plot_box_and_label(
+                    img_ori,
+                    max(round(sum(img_ori.shape) / 2 * 0.003), 2),
+                    xyxy,
+                    label,
+                    color=self.generate_colors(class_num, True),
+                )
+            img_src = np.asarray(img_ori)
+            return img_src
+    @staticmethod
+    def precess_image(path_or_image, img_size, stride, half):
+        """Process image before image inference."""
+        if isinstance(path_or_image, str):
+            try:
+                img_src = cv2.imread(path_or_image)
+                assert img_src is not None, f"Invalid image: {path_or_image}"
+            except Exception as e:
+                LOGGER.warning(e)
+        elif isinstance(path_or_image, np.ndarray):
+            img_src = path_or_image
+        elif isinstance(path_or_image, Image.Image):
+            img_src = np.array(path_or_image)
+        image = letterbox(img_src, img_size, stride=stride)[0]
+        # Convert
+        image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        image = torch.from_numpy(np.ascontiguousarray(image))
+        image = image.half() if half else image.float()  # uint8 to fp16/32
+        image /= 255  # 0 - 255 to 0.0 - 1.0
+        return image, img_src
+    @staticmethod
+    def rescale(ori_shape, boxes, target_shape):
+        """Rescale the output to the original image shape"""
+        ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+        padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+        boxes[:, [0, 2]] -= padding[0]
+        boxes[:, [1, 3]] -= padding[1]
+        boxes[:, :4] /= ratio
+        boxes[:, 0].clamp_(0, target_shape[1])  # x1
+        boxes[:, 1].clamp_(0, target_shape[0])  # y1
+        boxes[:, 2].clamp_(0, target_shape[1])  # x2
+        boxes[:, 3].clamp_(0, target_shape[0])  # y2
+        return boxes
+    def check_img_size(self, img_size, s=32, floor=0):
+        """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
+        if isinstance(img_size, int):  # integer i.e. img_size=640
+            new_size = max(self.make_divisible(img_size, int(s)), floor)
+        elif isinstance(img_size, list):  # list i.e. img_size=[640, 480]
+            new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
+        else:
+            raise Exception(f"Unsupported type of img_size: {type(img_size)}")
+        if new_size != img_size:
+            print(f"WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}")
+        return new_size if isinstance(img_size, list) else [new_size] * 2
+    def make_divisible(self, x, divisor):
+        # Upward revision the value x to make it evenly divisible by the divisor.
+        return math.ceil(x / divisor) * divisor
+    @staticmethod
+    def plot_box_and_label(image, lw, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+        cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+        if label:
+            tf = max(lw - 1, 1)  # font thickness
+            w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+            outside = p1[1] - h - 3 >= 0  # label fits outside box
+            p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+            cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+            cv2.putText(
+                image,
+                label,
+                (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
+                0,
+                lw / 3,
+                txt_color,
+                thickness=tf,
+                lineType=cv2.LINE_AA,
+            )
+    @staticmethod
+    def font_check(font="./yolov6/utils/Arial.ttf", size=10):
+        # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
+        assert osp.exists(font), f"font path not exists: {font}"
+        try:
+            return ImageFont.truetype(str(font) if font.exists() else font.name, size)
+        except Exception as e:  # download if missing
+            return ImageFont.truetype(str(font), size)
+    @staticmethod
+    def box_convert(x):
+        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
+        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+        y[:, 2] = x[:, 2] - x[:, 0]  # width
+        y[:, 3] = x[:, 3] - x[:, 1]  # height
+        return y
+    @staticmethod
+    def generate_colors(i, bgr=False):
+        hex = (
+            "FF3838",
+            "FF9D97",
+            "FF701F",
+            "FFB21D",
+            "CFD231",
+            "48F90A",
+            "92CC17",
+            "3DDB86",
+            "1A9334",
+            "00D4BB",
+            "2C99A8",
+            "00C2FF",
+            "344593",
+            "6473FF",
+            "0018EC",
+            "8438FF",
+            "520085",
+            "CB38FF",
+            "FF95C8",
+            "FF37C7",
+        )
+        palette = []
+        for iter in hex:
+            h = "#" + iter
+            palette.append(tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)))
+        num = len(palette)
+        color = palette[int(i) % num]
+        return (color[2], color[1], color[0]) if bgr else color

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

pyproject.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[tool.black]
+line-length = 120
+target_version = ['py37', 'py38', 'py39', 'py310']
+preview = true
+[tool.isort]
+profile = "black"

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+huggingface_hub
+gradio
+torch>=1.8.0
+torchvision>=0.9.0
+numpy>=1.18.5
+opencv-python>=4.1.2
+PyYAML>=5.3.1
+scipy>=1.4.1
+# tqdm>=4.41.0
+# addict>=2.4.0
+# tensorboard>=2.7.0
+# pycocotools>=2.0
+# onnx>=1.10.0  # ONNX export
+# onnx-simplifier>=0.3.6 # ONNX simplifier
+# thop  # FLOPs computation

yolov6/core/engine.py ADDED Viewed

	@@ -0,0 +1,273 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+import time
+from copy import deepcopy
+import os.path as osp
+from tqdm import tqdm
+import numpy as np
+import torch
+from torch.cuda import amp
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.tensorboard import SummaryWriter
+import tools.eval as eval
+from yolov6.data.data_load import create_dataloader
+from yolov6.models.yolo import build_model
+from yolov6.models.loss import ComputeLoss
+from yolov6.utils.events import LOGGER, NCOLS, load_yaml, write_tblog
+from yolov6.utils.ema import ModelEMA, de_parallel
+from yolov6.utils.checkpoint import load_state_dict, save_checkpoint, strip_optimizer
+from yolov6.solver.build import build_optimizer, build_lr_scheduler
+class Trainer:
+    def __init__(self, args, cfg, device):
+        self.args = args
+        self.cfg = cfg
+        self.device = device
+        if args.resume:
+            self.ckpt = torch.load(args.resume, map_location='cpu')
+        self.rank = args.rank
+        self.local_rank = args.local_rank
+        self.world_size = args.world_size
+        self.main_process = self.rank in [-1, 0]
+        self.save_dir = args.save_dir
+        # get data loader
+        self.data_dict = load_yaml(args.data_path)
+        self.num_classes = self.data_dict['nc']
+        self.train_loader, self.val_loader = self.get_data_loader(args, cfg, self.data_dict)
+        # get model and optimizer
+        model = self.get_model(args, cfg, self.num_classes, device)
+        self.optimizer = self.get_optimizer(args, cfg, model)
+        self.scheduler, self.lf = self.get_lr_scheduler(args, cfg, self.optimizer)
+        self.ema = ModelEMA(model) if self.main_process else None
+        # tensorboard
+        self.tblogger = SummaryWriter(self.save_dir) if self.main_process else None
+        self.start_epoch = 0
+        #resume
+        if hasattr(self, "ckpt"):
+            resume_state_dict = self.ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+            model.load_state_dict(resume_state_dict, strict=True)  # load
+            self.start_epoch = self.ckpt['epoch'] + 1
+            self.optimizer.load_state_dict(self.ckpt['optimizer'])
+            if self.main_process:
+                self.ema.ema.load_state_dict(self.ckpt['ema'].float().state_dict())
+                self.ema.updates = self.ckpt['updates']
+        self.model = self.parallel_model(args, model, device)
+        self.model.nc, self.model.names = self.data_dict['nc'], self.data_dict['names']
+        self.max_epoch = args.epochs
+        self.max_stepnum = len(self.train_loader)
+        self.batch_size = args.batch_size
+        self.img_size = args.img_size
+    # Training Process
+    def train(self):
+        try:
+            self.train_before_loop()
+            for self.epoch in range(self.start_epoch, self.max_epoch):
+                self.train_in_loop()
+        except Exception as _:
+            LOGGER.error('ERROR in training loop or eval/save model.')
+            raise
+        finally:
+            self.train_after_loop()
+    # Training loop for each epoch
+    def train_in_loop(self):
+        try:
+            self.prepare_for_steps()
+            for self.step, self.batch_data in self.pbar:
+                self.train_in_steps()
+                self.print_details()
+        except Exception as _:
+            LOGGER.error('ERROR in training steps.')
+            raise
+        try:
+            self.eval_and_save()
+        except Exception as _:
+            LOGGER.error('ERROR in evaluate and save model.')
+            raise
+    # Training loop for batchdata
+    def train_in_steps(self):
+        images, targets = self.prepro_data(self.batch_data, self.device)
+        # forward
+        with amp.autocast(enabled=self.device != 'cpu'):
+            preds = self.model(images)
+            total_loss, loss_items = self.compute_loss(preds, targets)
+            if self.rank != -1:
+                total_loss *= self.world_size
+        # backward
+        self.scaler.scale(total_loss).backward()
+        self.loss_items = loss_items
+        self.update_optimizer()
+    def eval_and_save(self):
+        remaining_epochs = self.max_epoch - self.epoch
+        eval_interval = self.args.eval_interval if remaining_epochs > self.args.heavy_eval_range else 1
+        is_val_epoch = (not self.args.eval_final_only or (remaining_epochs == 1)) and (self.epoch % eval_interval == 0)
+        if self.main_process:
+            self.ema.update_attr(self.model, include=['nc', 'names', 'stride']) # update attributes for ema model
+            if is_val_epoch:
+                self.eval_model()
+                self.ap = self.evaluate_results[0] * 0.1 + self.evaluate_results[1] * 0.9
+                self.best_ap = max(self.ap, self.best_ap)
+            # save ckpt
+            ckpt = {
+                    'model': deepcopy(de_parallel(self.model)).half(),
+                    'ema': deepcopy(self.ema.ema).half(),
+                    'updates': self.ema.updates,
+                    'optimizer': self.optimizer.state_dict(),
+                    'epoch': self.epoch,
+                    }
+            save_ckpt_dir = osp.join(self.save_dir, 'weights')
+            save_checkpoint(ckpt, (is_val_epoch) and (self.ap == self.best_ap), save_ckpt_dir, model_name='last_ckpt')
+            del ckpt
+            # log for tensorboard
+            write_tblog(self.tblogger, self.epoch, self.evaluate_results, self.mean_loss)
+    def eval_model(self):
+        results = eval.run(self.data_dict,
+                           batch_size=self.batch_size // self.world_size * 2,
+                           img_size=self.img_size,
+                           model=self.ema.ema,
+                           dataloader=self.val_loader,
+                           save_dir=self.save_dir,
+                           task='train')
+        LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}")
+        self.evaluate_results = results[:2]
+    def train_before_loop(self):
+        LOGGER.info('Training start...')
+        self.start_time = time.time()
+        self.warmup_stepnum = max(round(self.cfg.solver.warmup_epochs * self.max_stepnum), 1000)
+        self.scheduler.last_epoch = self.start_epoch - 1
+        self.last_opt_step = -1
+        self.scaler = amp.GradScaler(enabled=self.device != 'cpu')
+        self.best_ap, self.ap = 0.0, 0.0
+        self.evaluate_results = (0, 0) # AP50, AP50_95
+        self.compute_loss = ComputeLoss(iou_type=self.cfg.model.head.iou_type)
+    def prepare_for_steps(self):
+        if self.epoch > self.start_epoch:
+            self.scheduler.step()
+        self.model.train()
+        if self.rank != -1:
+            self.train_loader.sampler.set_epoch(self.epoch)
+        self.mean_loss = torch.zeros(4, device=self.device)
+        self.optimizer.zero_grad()
+        LOGGER.info(('\n' + '%10s' * 5) % ('Epoch', 'iou_loss', 'l1_loss', 'obj_loss', 'cls_loss'))
+        self.pbar = enumerate(self.train_loader)
+        if self.main_process:
+            self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
+    # Print loss after each steps
+    def print_details(self):
+        if self.main_process:
+            self.mean_loss = (self.mean_loss * self.step + self.loss_items) / (self.step + 1)
+            self.pbar.set_description(('%10s' + '%10.4g' * 4) % (f'{self.epoch}/{self.max_epoch - 1}', \
+                                                                *(self.mean_loss)))
+    # Empty cache if training finished
+    def train_after_loop(self):
+        if self.main_process:
+            LOGGER.info(f'\nTraining completed in {(time.time() - self.start_time) / 3600:.3f} hours.')
+            save_ckpt_dir = osp.join(self.save_dir, 'weights')
+            strip_optimizer(save_ckpt_dir, self.epoch)  # strip optimizers for saved pt model
+        if self.device != 'cpu':
+            torch.cuda.empty_cache()
+    def update_optimizer(self):
+        curr_step = self.step + self.max_stepnum * self.epoch
+        self.accumulate = max(1, round(64 / self.batch_size))
+        if curr_step <= self.warmup_stepnum:
+            self.accumulate = max(1, np.interp(curr_step, [0, self.warmup_stepnum], [1, 64 / self.batch_size]).round())
+            for k, param in enumerate(self.optimizer.param_groups):
+                warmup_bias_lr = self.cfg.solver.warmup_bias_lr if k == 2 else 0.0
+                param['lr'] = np.interp(curr_step, [0, self.warmup_stepnum], [warmup_bias_lr, param['initial_lr'] * self.lf(self.epoch)])
+                if 'momentum' in param:
+                    param['momentum'] = np.interp(curr_step, [0, self.warmup_stepnum], [self.cfg.solver.warmup_momentum, self.cfg.solver.momentum])
+        if curr_step - self.last_opt_step >= self.accumulate:
+            self.scaler.step(self.optimizer)
+            self.scaler.update()
+            self.optimizer.zero_grad()
+            if self.ema:
+                self.ema.update(self.model)
+            self.last_opt_step = curr_step
+    @staticmethod
+    def get_data_loader(args, cfg, data_dict):
+        train_path, val_path = data_dict['train'], data_dict['val']
+        # check data
+        nc = int(data_dict['nc'])
+        class_names = data_dict['names']
+        assert len(class_names) == nc, f'the length of class names does not match the number of classes defined'
+        grid_size = max(int(max(cfg.model.head.strides)), 32)
+        # create train dataloader
+        train_loader = create_dataloader(train_path, args.img_size, args.batch_size // args.world_size, grid_size,
+                                         hyp=dict(cfg.data_aug), augment=True, rect=False, rank=args.local_rank,
+                                         workers=args.workers, shuffle=True, check_images=args.check_images,
+                                         check_labels=args.check_labels, data_dict=data_dict, task='train')[0]
+        # create val dataloader
+        val_loader = None
+        if args.rank in [-1, 0]:
+            val_loader = create_dataloader(val_path, args.img_size, args.batch_size // args.world_size * 2, grid_size,
+                                           hyp=dict(cfg.data_aug), rect=True, rank=-1, pad=0.5,
+                                           workers=args.workers, check_images=args.check_images,
+                                           check_labels=args.check_labels, data_dict=data_dict, task='val')[0]
+        return train_loader, val_loader
+    @staticmethod
+    def prepro_data(batch_data, device):
+        images = batch_data[0].to(device, non_blocking=True).float() / 255
+        targets = batch_data[1].to(device)
+        return images, targets
+    def get_model(self, args, cfg, nc, device):
+        model = build_model(cfg, nc, device)
+        weights = cfg.model.pretrained
+        if weights:  # finetune if pretrained model is set
+            LOGGER.info(f'Loading state_dict from {weights} for fine-tuning...')
+            model = load_state_dict(weights, model, map_location=device)
+        LOGGER.info('Model: {}'.format(model))
+        return model
+    @staticmethod
+    def parallel_model(args, model, device):
+        # If DP mode
+        dp_mode = device.type != 'cpu' and args.rank == -1
+        if dp_mode and torch.cuda.device_count() > 1:
+            LOGGER.warning('WARNING: DP not recommended, use DDP instead.\n')
+            model = torch.nn.DataParallel(model)
+        # If DDP mode
+        ddp_mode = device.type != 'cpu' and args.rank != -1
+        if ddp_mode:
+            model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank)
+        return model
+    def get_optimizer(self, args, cfg, model):
+        accumulate = max(1, round(64 / args.batch_size))
+        cfg.solver.weight_decay *= args.batch_size * accumulate / 64
+        optimizer = build_optimizer(cfg, model)
+        return optimizer
+    @staticmethod
+    def get_lr_scheduler(args, cfg, optimizer):
+        epochs = args.epochs
+        lr_scheduler, lf = build_lr_scheduler(cfg, optimizer, epochs)
+        return lr_scheduler, lf

yolov6/core/evaler.py ADDED Viewed

	@@ -0,0 +1,256 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+from tqdm import tqdm
+import numpy as np
+import json
+import torch
+import yaml
+from pathlib import Path
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from yolov6.data.data_load import create_dataloader
+from yolov6.utils.events import LOGGER, NCOLS
+from yolov6.utils.nms import non_max_suppression
+from yolov6.utils.checkpoint import load_checkpoint
+from yolov6.utils.torch_utils import time_sync, get_model_info
+'''
+python tools/eval.py --task 'train'/'val'/'speed'
+'''
+class Evaler:
+    def __init__(self,
+                 data,
+                 batch_size=32,
+                 img_size=640,
+                 conf_thres=0.001,
+                 iou_thres=0.65,
+                 device='',
+                 half=True,
+                 save_dir=''):
+        self.data = data
+        self.batch_size = batch_size
+        self.img_size = img_size
+        self.conf_thres = conf_thres
+        self.iou_thres = iou_thres
+        self.device = device
+        self.half = half
+        self.save_dir = save_dir
+    def init_model(self, model, weights, task):
+        if task != 'train':
+            model = load_checkpoint(weights, map_location=self.device)
+            self.stride = int(model.stride.max())
+            if self.device.type != 'cpu':
+                model(torch.zeros(1, 3, self.img_size, self.img_size).to(self.device).type_as(next(model.parameters())))
+            # switch to deploy
+            from yolov6.layers.common import RepVGGBlock
+            for layer in model.modules():
+                if isinstance(layer, RepVGGBlock):
+                    layer.switch_to_deploy()
+            LOGGER.info("Switch model to deploy modality.")
+            LOGGER.info("Model Summary: {}".format(get_model_info(model, self.img_size)))
+        model.half() if self.half else model.float()
+        return model
+    def init_data(self, dataloader, task):
+        '''Initialize dataloader.
+        Returns a dataloader for task val or speed.
+        '''
+        self.is_coco = self.data.get("is_coco", False)
+        self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        if task != 'train':
+            pad = 0.0 if task == 'speed' else 0.5
+            dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'],
+                                           self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, rect=True,
+                                           data_dict=self.data, task=task)[0]
+        return dataloader
+    def predict_model(self, model, dataloader, task):
+        '''Model prediction
+        Predicts the whole dataset and gets the prediced results and inference time.
+        '''
+        self.speed_result = torch.zeros(4, device=self.device)
+        pred_results = []
+        pbar = tqdm(dataloader, desc="Inferencing model in val datasets.", ncols=NCOLS)
+        for imgs, targets, paths, shapes in pbar:
+            # pre-process
+            t1 = time_sync()
+            imgs = imgs.to(self.device, non_blocking=True)
+            imgs = imgs.half() if self.half else imgs.float()
+            imgs /= 255
+            self.speed_result[1] += time_sync() - t1  # pre-process time
+            # Inference
+            t2 = time_sync()
+            outputs = model(imgs)
+            self.speed_result[2] += time_sync() - t2  # inference time
+            # post-process
+            t3 = time_sync()
+            outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True)
+            self.speed_result[3] += time_sync() - t3  # post-process time
+            self.speed_result[0] += len(outputs)
+            # save result
+            pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids))
+        return pred_results
+    def eval_model(self, pred_results, model, dataloader, task):
+        '''Evaluate models
+        For task speed, this function only evaluates the speed of model and outputs inference time.
+        For task val, this function evaluates the speed and mAP by pycocotools, and returns
+        inference time and mAP value.
+        '''
+        LOGGER.info(f'\nEvaluating speed.')
+        self.eval_speed(task)
+        LOGGER.info(f'\nEvaluating mAP by pycocotools.')
+        if task != 'speed' and len(pred_results):
+            if 'anno_path' in self.data:
+                anno_json = self.data['anno_path']
+            else:
+                # generated coco format labels in dataset initialization
+                dataset_root = os.path.dirname(os.path.dirname(self.data['val']))
+                base_name = os.path.basename(self.data['val'])
+                anno_json = os.path.join(dataset_root, 'annotations', f'instances_{base_name}.json')
+            pred_json = os.path.join(self.save_dir, "predictions.json")
+            LOGGER.info(f'Saving {pred_json}...')
+            with open(pred_json, 'w') as f:
+                json.dump(pred_results, f)
+            anno = COCO(anno_json)
+            pred = anno.loadRes(pred_json)
+            cocoEval = COCOeval(anno, pred, 'bbox')
+            if self.is_coco:
+                imgIds = [int(os.path.basename(x).split(".")[0])
+                            for x in dataloader.dataset.img_paths]
+                cocoEval.params.imgIds = imgIds
+            cocoEval.evaluate()
+            cocoEval.accumulate()
+            cocoEval.summarize()
+            map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+            # Return results
+            model.float()  # for training
+            if task != 'train':
+                LOGGER.info(f"Results saved to {self.save_dir}")
+            return (map50, map)
+        return (0.0, 0.0)
+    def eval_speed(self, task):
+        '''Evaluate model inference speed.'''
+        if task != 'train':
+            n_samples = self.speed_result[0].item()
+            pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples
+            for n, v in zip(["pre-process", "inference", "NMS"],[pre_time, inf_time, nms_time]):
+                LOGGER.info("Average {} time: {:.2f} ms".format(n, v))
+    def box_convert(self, x):
+        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
+        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+        y[:, 2] = x[:, 2] - x[:, 0]  # width
+        y[:, 3] = x[:, 3] - x[:, 1]  # height
+        return y
+    def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
+        # Rescale coords (xyxy) from img1_shape to img0_shape
+        if ratio_pad is None:  # calculate from img0_shape
+            gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+            pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+        else:
+            gain = ratio_pad[0][0]
+            pad = ratio_pad[1]
+        coords[:, [0, 2]] -= pad[0]  # x padding
+        coords[:, [1, 3]] -= pad[1]  # y padding
+        coords[:, :4] /= gain
+        if isinstance(coords, torch.Tensor):  # faster individually
+            coords[:, 0].clamp_(0, img0_shape[1])  # x1
+            coords[:, 1].clamp_(0, img0_shape[0])  # y1
+            coords[:, 2].clamp_(0, img0_shape[1])  # x2
+            coords[:, 3].clamp_(0, img0_shape[0])  # y2
+        else:  # np.array (faster grouped)
+            coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1])  # x1, x2
+            coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0])  # y1, y2
+        return coords
+    def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
+        pred_results = []
+        for i, pred in enumerate(outputs):
+            if len(pred) == 0:
+                continue
+            path, shape = Path(paths[i]), shapes[i][0]
+            self.scale_coords(imgs[i].shape[1:], pred[:, :4], shape, shapes[i][1])
+            image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+            bboxes = self.box_convert(pred[:, 0:4])
+            bboxes[:, :2] -= bboxes[:, 2:] / 2
+            cls = pred[:, 5]
+            scores = pred[:, 4]
+            for ind in range(pred.shape[0]):
+                category_id = ids[int(cls[ind])]
+                bbox = [round(x, 3) for x in bboxes[ind].tolist()]
+                score = round(scores[ind].item(), 5)
+                pred_data = {
+                    "image_id": image_id,
+                    "category_id": category_id,
+                    "bbox": bbox,
+                    "score": score
+                }
+                pred_results.append(pred_data)
+        return pred_results
+    @staticmethod
+    def check_task(task):
+        if task not in ['train','val','speed']:
+            raise Exception("task argument error: only support 'train' / 'val' / 'speed' task.")
+    @staticmethod
+    def reload_thres(conf_thres, iou_thres, task):
+        '''Sets conf and iou threshold for task val/speed'''
+        if task != 'train':
+            if task == 'val':
+                conf_thres = 0.001
+            if task == 'speed':
+                conf_thres = 0.25
+                iou_thres = 0.45
+        return conf_thres, iou_thres
+    @staticmethod
+    def reload_device(device, model, task):
+        # device = 'cpu' or '0' or '0,1,2,3'
+        if task == 'train':
+            device = next(model.parameters()).device
+        else:
+            if device == 'cpu':
+                os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+            elif device:
+                os.environ['CUDA_VISIBLE_DEVICES'] = device
+                assert torch.cuda.is_available()
+            cuda = device != 'cpu' and torch.cuda.is_available()
+            device = torch.device('cuda:0' if cuda else 'cpu')
+        return device
+    @staticmethod
+    def reload_dataset(data):
+        with open(data, errors='ignore') as yaml_file:
+            data = yaml.safe_load(yaml_file)
+        val = data.get('val')
+        if not os.path.exists(val):
+            raise Exception('Dataset not found.')
+        return data
+    @staticmethod
+    def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
+    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
+        x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
+            21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+            41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+            59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
+            80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+        return x

yolov6/core/inferer.py ADDED Viewed

	@@ -0,0 +1,231 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+import os.path as osp
+import math
+from tqdm import tqdm
+import numpy as np
+import cv2
+import torch
+from PIL import ImageFont
+from yolov6.utils.events import LOGGER, load_yaml
+from yolov6.layers.common import DetectBackend
+from yolov6.data.data_augment import letterbox
+from yolov6.utils.nms import non_max_suppression
+from yolov6.utils.torch_utils import get_model_info
+class Inferer:
+    def __init__(self, source, weights, device, yaml, img_size, half):
+        import glob
+        from yolov6.data.datasets import IMG_FORMATS
+        self.__dict__.update(locals())
+        # Init model
+        self.device = device
+        self.img_size = img_size
+        cuda = self.device != 'cpu' and torch.cuda.is_available()
+        self.device = torch.device('cuda:0' if cuda else 'cpu')
+        self.model = DetectBackend(weights, device=self.device)
+        self.stride = self.model.stride
+        self.class_names = load_yaml(yaml)['names']
+        self.img_size = self.check_img_size(self.img_size, s=self.stride)  # check image size
+        # Half precision
+        if half & (self.device.type != 'cpu'):
+            self.model.model.half()
+        else:
+            self.model.model.float()
+            half = False
+        if self.device.type != 'cpu':
+            self.model(torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters())))  # warmup
+        # Load data
+        if os.path.isdir(source):
+            img_paths = sorted(glob.glob(os.path.join(source, '*.*')))  # dir
+        elif os.path.isfile(source):
+            img_paths = [source]  # files
+        else:
+            raise Exception(f'Invalid path: {source}')
+        self.img_paths = [img_path for img_path in img_paths if img_path.split('.')[-1].lower() in IMG_FORMATS]
+        # Switch model to deploy status
+        self.model_switch(self.model, self.img_size)
+    def model_switch(self, model, img_size):
+        ''' Model switch to deploy status '''
+        from yolov6.layers.common import RepVGGBlock
+        for layer in model.modules():
+            if isinstance(layer, RepVGGBlock):
+                layer.switch_to_deploy()
+        LOGGER.info("Switch model to deploy modality.")
+    def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf):
+        ''' Model Inference and results visualization '''
+        for img_path in tqdm(self.img_paths):
+            img, img_src = self.precess_image(img_path, self.img_size, self.stride, self.half)
+            img = img.to(self.device)
+            if len(img.shape) == 3:
+                img = img[None]
+                # expand for batch dim
+            pred_results = self.model(img)
+            det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+            save_path = osp.join(save_dir, osp.basename(img_path))  # im.jpg
+            txt_path = osp.join(save_dir, 'labels', osp.splitext(osp.basename(img_path))[0])
+            gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            img_ori = img_src
+            # check image and font
+            assert img_ori.data.contiguous, 'Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im).'
+            self.font_check()
+            if len(det):
+                det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
+                for *xyxy, conf, cls in reversed(det):
+                    if save_txt:  # Write to file
+                        xywh = (self.box_convert(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf)
+                        with open(txt_path + '.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+                    if save_img:
+                        class_num = int(cls)  # integer class
+                        label = None if hide_labels else (self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}')
+                        self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True))
+                img_src = np.asarray(img_ori)
+                # Save results (image with detections)
+                if save_img:
+                    cv2.imwrite(save_path, img_src)
+    @staticmethod
+    def precess_image(path, img_size, stride, half):
+        '''Process image before image inference.'''
+        try:
+            img_src = cv2.imread(path)
+            assert img_src is not None, f'Invalid image: {path}'
+        except Exception as e:
+            LOGGER.warning(e)
+        image = letterbox(img_src, img_size, stride=stride)[0]
+        # Convert
+        image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        image = torch.from_numpy(np.ascontiguousarray(image))
+        image = image.half() if half else image.float()  # uint8 to fp16/32
+        image /= 255  # 0 - 255 to 0.0 - 1.0
+        return image, img_src
+    @staticmethod
+    def rescale(ori_shape, boxes, target_shape):
+        '''Rescale the output to the original image shape'''
+        ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+        padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
+        boxes[:, [0, 2]] -= padding[0]
+        boxes[:, [1, 3]] -= padding[1]
+        boxes[:, :4] /= ratio
+        boxes[:, 0].clamp_(0, target_shape[1])  # x1
+        boxes[:, 1].clamp_(0, target_shape[0])  # y1
+        boxes[:, 2].clamp_(0, target_shape[1])  # x2
+        boxes[:, 3].clamp_(0, target_shape[0])  # y2
+        return boxes
+    def check_img_size(self, img_size, s=32, floor=0):
+        """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
+        if isinstance(img_size, int):  # integer i.e. img_size=640
+            new_size = max(self.make_divisible(img_size, int(s)), floor)
+        elif isinstance(img_size, list):  # list i.e. img_size=[640, 480]
+            new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
+        else:
+            raise Exception(f"Unsupported type of img_size: {type(img_size)}")
+        if new_size != img_size:
+            print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}')
+        return new_size if isinstance(img_size,list) else [new_size]*2
+    def make_divisible(self, x, divisor):
+        # Upward revision the value x to make it evenly divisible by the divisor.
+        return math.ceil(x / divisor) * divisor
+    @staticmethod
+    def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+        cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
+        if label:
+            tf = max(lw - 1, 1)  # font thickness
+            w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]  # text width, height
+            outside = p1[1] - h - 3 >= 0  # label fits outside box
+            p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+            cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA)  # filled
+            cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 3, txt_color,
+                        thickness=tf, lineType=cv2.LINE_AA)
+    @staticmethod
+    def font_check(font='./yolov6/utils/Arial.ttf', size=10):
+        # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
+        assert osp.exists(font), f'font path not exists: {font}'
+        try:
+            return ImageFont.truetype(str(font) if font.exists() else font.name, size)
+        except Exception as e:  # download if missing
+            return ImageFont.truetype(str(font), size)
+    @staticmethod
+    def box_convert(x):
+        # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
+        y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+        y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+        y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+        y[:, 2] = x[:, 2] - x[:, 0]  # width
+        y[:, 3] = x[:, 3] - x[:, 1]  # height
+        return y
+    @staticmethod
+    def generate_colors(i, bgr=False):
+        hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
+               '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
+        palette = []
+        for iter in hex:
+            h = '#' + iter
+            palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
+        num = len(palette)
+        color = palette[int(i) % num]
+        return (color[2], color[1], color[0]) if bgr else color
+class VideoInferer(Inferer):
+    def setup_source(self, source):
+        # Load data
+        if os.path.isfile(source):
+            self.vid_path = source
+            self.vid_name = '.'.join(os.path.basename(source).split('.')[:-1])
+        else:
+            raise Exception(f'Invalid path: {source}')
+        self.cap = cv2.VideoCapture(self.vid_path)
+    def iterator_length(self):
+        return int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    def img_iterator(self):
+        cur_fid = 0
+        ret, frame = self.cap.read()
+        while ret:
+            yield frame, f'{self.vid_name}_frame_{cur_fid:06}.jpg'
+            ret, frame = self.cap.read()
+            cur_fid += 1

yolov6/data/data_augment.py ADDED Viewed

	@@ -0,0 +1,193 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This code is based on
+# https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
+import math
+import random
+import cv2
+import numpy as np
+def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
+    # HSV color-space augmentation
+    if hgain or sgain or vgain:
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
+        dtype = im.dtype  # uint8
+        x = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, r, (dw, dh)
+def mixup(im, labels, im2, labels2):
+    # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+    im = (im * r + im2 * (1 - r)).astype(np.uint8)
+    labels = np.concatenate((labels, labels2), 0)
+    return im, labels
+def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
+    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
+    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
+def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
+                  new_shape=(640, 640)):
+    n = len(labels)
+    height, width = new_shape
+    M, s = get_transform_matrix(img.shape[:2], (height, width), degrees, scale, shear, translate)
+    if (M != np.eye(3)).any():  # image changed
+        img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+    # Transform label coordinates
+    if n:
+        new = np.zeros((n, 4))
+        xy = np.ones((n * 4, 3))
+        xy[:, :2] = labels[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = xy @ M.T  # transform
+        xy = xy[:, :2].reshape(n, 8)  # perspective rescale or affine
+        # create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+        # clip
+        new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+        new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
+        # filter candidates
+        i = box_candidates(box1=labels[:, 1:5].T * s, box2=new.T, area_thr=0.1)
+        labels = labels[i]
+        labels[:, 1:5] = new[i]
+    return img, labels
+def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate):
+    new_height, new_width = new_shape
+    # Center
+    C = np.eye(3)
+    C[0, 2] = -img_shape[1] / 2  # x translation (pixels)
+    C[1, 2] = -img_shape[0] / 2  # y translation (pixels)
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.uniform(-degrees, degrees)
+    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+    s = random.uniform(1 - scale, 1 + scale)
+    # s = 2 ** random.uniform(-scale, scale)
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_width  # x translation (pixels)
+    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_height  # y transla ion (pixels)
+    # Combined rotation matrix
+    M = T @ S @ R @ C  # order of operations (right to left) is IMPORTANT
+    return M, s
+def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp):
+    assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images."
+    labels4 = []
+    s = img_size
+    yc, xc = (int(random.uniform(s//2, 3*s//2)) for _ in range(2))  # mosaic center x, y
+    for i in range(len(imgs)):
+        # Load image
+        img, h, w = imgs[i], hs[i], ws[i]
+        # place img in img4
+        if i == 0:  # top left
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+        elif i == 1:  # top right
+            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+        elif i == 2:  # bottom left
+            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+        elif i == 3:  # bottom right
+            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        padw = x1a - x1b
+        padh = y1a - y1b
+        # Labels
+        labels_per_img = labels[i].copy()
+        if labels_per_img.size:
+            boxes = np.copy(labels_per_img[:, 1:])
+            boxes[:, 0] = w * (labels_per_img[:, 1] - labels_per_img[:, 3] / 2) + padw  # top left x
+            boxes[:, 1] = h * (labels_per_img[:, 2] - labels_per_img[:, 4] / 2) + padh  # top left y
+            boxes[:, 2] = w * (labels_per_img[:, 1] + labels_per_img[:, 3] / 2) + padw  # bottom right x
+            boxes[:, 3] = h * (labels_per_img[:, 2] + labels_per_img[:, 4] / 2) + padh  # bottom right y
+            labels_per_img[:, 1:] = boxes
+        labels4.append(labels_per_img)
+    # Concat/clip labels
+    labels4 = np.concatenate(labels4, 0)
+    for x in (labels4[:, 1:]):
+        np.clip(x, 0, 2 * s, out=x)
+    # Augment
+    img4, labels4 = random_affine(img4, labels4,
+                                  degrees=hyp['degrees'],
+                                  translate=hyp['translate'],
+                                  scale=hyp['scale'],
+                                  shear=hyp['shear'])
+    return img4, labels4

yolov6/data/data_load.py ADDED Viewed

	@@ -0,0 +1,113 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This code is based on
+# https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
+import os
+from torch.utils.data import dataloader, distributed
+from .datasets import TrainValDataset
+from yolov6.utils.events import LOGGER
+from yolov6.utils.torch_utils import torch_distributed_zero_first
+def create_dataloader(
+    path,
+    img_size,
+    batch_size,
+    stride,
+    hyp=None,
+    augment=False,
+    check_images=False,
+    check_labels=False,
+    pad=0.0,
+    rect=False,
+    rank=-1,
+    workers=8,
+    shuffle=False,
+    data_dict=None,
+    task="Train",
+):
+    """Create general dataloader.
+    Returns dataloader and dataset
+    """
+    if rect and shuffle:
+        LOGGER.warning(
+            "WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False"
+        )
+        shuffle = False
+    with torch_distributed_zero_first(rank):
+        dataset = TrainValDataset(
+            path,
+            img_size,
+            batch_size,
+            augment=augment,
+            hyp=hyp,
+            rect=rect,
+            check_images=check_images,
+            check_labels=check_labels,
+            stride=int(stride),
+            pad=pad,
+            rank=rank,
+            data_dict=data_dict,
+            task=task,
+        )
+    batch_size = min(batch_size, len(dataset))
+    workers = min(
+        [
+            os.cpu_count() // int(os.getenv("WORLD_SIZE", 1)),
+            batch_size if batch_size > 1 else 0,
+            workers,
+        ]
+    )  # number of workers
+    sampler = (
+        None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    )
+    return (
+        TrainValDataLoader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=shuffle and sampler is None,
+            num_workers=workers,
+            sampler=sampler,
+            pin_memory=True,
+            collate_fn=TrainValDataset.collate_fn,
+        ),
+        dataset,
+    )
+class TrainValDataLoader(dataloader.DataLoader):
+    """Dataloader that reuses workers
+    Uses same syntax as vanilla DataLoader
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
+        self.iterator = super().__iter__()
+    def __len__(self):
+        return len(self.batch_sampler.sampler)
+    def __iter__(self):
+        for i in range(len(self)):
+            yield next(self.iterator)
+class _RepeatSampler:
+    """Sampler that repeats forever
+    Args:
+        sampler (Sampler)
+    """
+    def __init__(self, sampler):
+        self.sampler = sampler
+    def __iter__(self):
+        while True:
+            yield from iter(self.sampler)

yolov6/data/datasets.py ADDED Viewed

	@@ -0,0 +1,550 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import glob
+import os
+import os.path as osp
+import random
+import json
+import time
+import hashlib
+from multiprocessing.pool import Pool
+import cv2
+import numpy as np
+import torch
+from PIL import ExifTags, Image, ImageOps
+from torch.utils.data import Dataset
+from tqdm import tqdm
+from .data_augment import (
+    augment_hsv,
+    letterbox,
+    mixup,
+    random_affine,
+    mosaic_augmentation,
+)
+from yolov6.utils.events import LOGGER
+# Parameters
+IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
+# Get orientation exif tag
+for k, v in ExifTags.TAGS.items():
+    if v == "Orientation":
+        ORIENTATION = k
+        break
+class TrainValDataset(Dataset):
+    # YOLOv6 train_loader/val_loader, loads images and labels for training and validation
+    def __init__(
+        self,
+        img_dir,
+        img_size=640,
+        batch_size=16,
+        augment=False,
+        hyp=None,
+        rect=False,
+        check_images=False,
+        check_labels=False,
+        stride=32,
+        pad=0.0,
+        rank=-1,
+        data_dict=None,
+        task="train",
+    ):
+        assert task.lower() in ("train", "val", "speed"), f"Not supported task: {task}"
+        t1 = time.time()
+        self.__dict__.update(locals())
+        self.main_process = self.rank in (-1, 0)
+        self.task = self.task.capitalize()
+        self.class_names = data_dict["names"]
+        self.img_paths, self.labels = self.get_imgs_labels(self.img_dir)
+        if self.rect:
+            shapes = [self.img_info[p]["shape"] for p in self.img_paths]
+            self.shapes = np.array(shapes, dtype=np.float64)
+            self.batch_indices = np.floor(
+                np.arange(len(shapes)) / self.batch_size
+            ).astype(
+                np.int
+            )  # batch indices of each image
+            self.sort_files_shapes()
+        t2 = time.time()
+        if self.main_process:
+            LOGGER.info(f"%.1fs for dataset initialization." % (t2 - t1))
+    def __len__(self):
+        """Get the length of dataset"""
+        return len(self.img_paths)
+    def __getitem__(self, index):
+        """Fetching a data sample for a given key.
+        This function applies mosaic and mixup augments during training.
+        During validation, letterbox augment is applied.
+        """
+        # Mosaic Augmentation
+        if self.augment and random.random() < self.hyp["mosaic"]:
+            img, labels = self.get_mosaic(index)
+            shapes = None
+            # MixUp augmentation
+            if random.random() < self.hyp["mixup"]:
+                img_other, labels_other = self.get_mosaic(
+                    random.randint(0, len(self.img_paths) - 1)
+                )
+                img, labels = mixup(img, labels, img_other, labels_other)
+        else:
+            # Load image
+            img, (h0, w0), (h, w) = self.load_image(index)
+            # Letterbox
+            shape = (
+                self.batch_shapes[self.batch_indices[index]]
+                if self.rect
+                else self.img_size
+            )  # final letterboxed shape
+            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
+            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+            labels = self.labels[index].copy()
+            if labels.size:
+                w *= ratio
+                h *= ratio
+                # new boxes
+                boxes = np.copy(labels[:, 1:])
+                boxes[:, 0] = (
+                    w * (labels[:, 1] - labels[:, 3] / 2) + pad[0]
+                )  # top left x
+                boxes[:, 1] = (
+                    h * (labels[:, 2] - labels[:, 4] / 2) + pad[1]
+                )  # top left y
+                boxes[:, 2] = (
+                    w * (labels[:, 1] + labels[:, 3] / 2) + pad[0]
+                )  # bottom right x
+                boxes[:, 3] = (
+                    h * (labels[:, 2] + labels[:, 4] / 2) + pad[1]
+                )  # bottom right y
+                labels[:, 1:] = boxes
+            if self.augment:
+                img, labels = random_affine(
+                    img,
+                    labels,
+                    degrees=self.hyp["degrees"],
+                    translate=self.hyp["translate"],
+                    scale=self.hyp["scale"],
+                    shear=self.hyp["shear"],
+                    new_shape=(self.img_size, self.img_size),
+                )
+        if len(labels):
+            h, w = img.shape[:2]
+            labels[:, [1, 3]] = labels[:, [1, 3]].clip(0, w - 1e-3)  # x1, x2
+            labels[:, [2, 4]] = labels[:, [2, 4]].clip(0, h - 1e-3)  # y1, y2
+            boxes = np.copy(labels[:, 1:])
+            boxes[:, 0] = ((labels[:, 1] + labels[:, 3]) / 2) / w  # x center
+            boxes[:, 1] = ((labels[:, 2] + labels[:, 4]) / 2) / h  # y center
+            boxes[:, 2] = (labels[:, 3] - labels[:, 1]) / w  # width
+            boxes[:, 3] = (labels[:, 4] - labels[:, 2]) / h  # height
+            labels[:, 1:] = boxes
+        if self.augment:
+            img, labels = self.general_augment(img, labels)
+        labels_out = torch.zeros((len(labels), 6))
+        if len(labels):
+            labels_out[:, 1:] = torch.from_numpy(labels)
+        # Convert
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+        return torch.from_numpy(img), labels_out, self.img_paths[index], shapes
+    def load_image(self, index):
+        """Load image.
+        This function loads image by cv2, resize original image to target shape(img_size) with keeping ratio.
+        Returns:
+            Image, original shape of image, resized image shape
+        """
+        path = self.img_paths[index]
+        im = cv2.imread(path)
+        assert im is not None, f"Image Not Found {path}, workdir: {os.getcwd()}"
+        h0, w0 = im.shape[:2]  # origin shape
+        r = self.img_size / max(h0, w0)
+        if r != 1:
+            im = cv2.resize(
+                im,
+                (int(w0 * r), int(h0 * r)),
+                interpolation=cv2.INTER_AREA
+                if r < 1 and not self.augment
+                else cv2.INTER_LINEAR,
+            )
+        return im, (h0, w0), im.shape[:2]
+    @staticmethod
+    def collate_fn(batch):
+        """Merges a list of samples to form a mini-batch of Tensor(s)"""
+        img, label, path, shapes = zip(*batch)
+        for i, l in enumerate(label):
+            l[:, 0] = i  # add target image index for build_targets()
+        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
+    def get_imgs_labels(self, img_dir):
+        assert osp.exists(img_dir), f"{img_dir} is an invalid directory path!"
+        valid_img_record = osp.join(
+            osp.dirname(img_dir), "." + osp.basename(img_dir) + ".json"
+        )
+        NUM_THREADS = min(8, os.cpu_count())
+        img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True)
+        img_paths = sorted(
+            p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS
+        )
+        assert img_paths, f"No images found in {img_dir}."
+        img_hash = self.get_hash(img_paths)
+        if osp.exists(valid_img_record):
+            with open(valid_img_record, "r") as f:
+                cache_info = json.load(f)
+                if "image_hash" in cache_info and cache_info["image_hash"] == img_hash:
+                    img_info = cache_info["information"]
+                else:
+                    self.check_images = True
+        else:
+            self.check_images = True
+        # check images
+        if self.check_images and self.main_process:
+            img_info = {}
+            nc, msgs = 0, []  # number corrupt, messages
+            LOGGER.info(
+                f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): "
+            )
+            with Pool(NUM_THREADS) as pool:
+                pbar = tqdm(
+                    pool.imap(TrainValDataset.check_image, img_paths),
+                    total=len(img_paths),
+                )
+                for img_path, shape_per_img, nc_per_img, msg in pbar:
+                    if nc_per_img == 0:  # not corrupted
+                        img_info[img_path] = {"shape": shape_per_img}
+                    nc += nc_per_img
+                    if msg:
+                        msgs.append(msg)
+                    pbar.desc = f"{nc} image(s) corrupted"
+            pbar.close()
+            if msgs:
+                LOGGER.info("\n".join(msgs))
+            cache_info = {"information": img_info, "image_hash": img_hash}
+            # save valid image paths.
+            with open(valid_img_record, "w") as f:
+                json.dump(cache_info, f)
+        # check and load anns
+        label_dir = osp.join(
+            osp.dirname(osp.dirname(img_dir)), "labels", osp.basename(img_dir)
+        )
+        assert osp.exists(label_dir), f"{label_dir} is an invalid directory path!"
+        img_paths = list(img_info.keys())
+        label_paths = sorted(
+            osp.join(label_dir, osp.splitext(osp.basename(p))[0] + ".txt")
+            for p in img_paths
+        )
+        label_hash = self.get_hash(label_paths)
+        if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash:
+            self.check_labels = True
+        if self.check_labels:
+            cache_info["label_hash"] = label_hash
+            nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number corrupt, messages
+            LOGGER.info(
+                f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): "
+            )
+            with Pool(NUM_THREADS) as pool:
+                pbar = pool.imap(
+                    TrainValDataset.check_label_files, zip(img_paths, label_paths)
+                )
+                pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar
+                for (
+                    img_path,
+                    labels_per_file,
+                    nc_per_file,
+                    nm_per_file,
+                    nf_per_file,
+                    ne_per_file,
+                    msg,
+                ) in pbar:
+                    if nc_per_file == 0:
+                        img_info[img_path]["labels"] = labels_per_file
+                    else:
+                        img_info.pop(img_path)
+                    nc += nc_per_file
+                    nm += nm_per_file
+                    nf += nf_per_file
+                    ne += ne_per_file
+                    if msg:
+                        msgs.append(msg)
+                    if self.main_process:
+                        pbar.desc = f"{nf} label(s) found, {nm} label(s) missing, {ne} label(s) empty, {nc} invalid label files"
+            if self.main_process:
+                pbar.close()
+                with open(valid_img_record, "w") as f:
+                    json.dump(cache_info, f)
+            if msgs:
+                LOGGER.info("\n".join(msgs))
+            if nf == 0:
+                LOGGER.warning(
+                    f"WARNING: No labels found in {osp.dirname(self.img_paths[0])}. "
+                )
+        if self.task.lower() == "val":
+            if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset.
+                assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml"
+            else:
+                assert (
+                    self.class_names
+                ), "Class names is required when converting labels to coco format for evaluating."
+                save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), "annotations")
+                if not osp.exists(save_dir):
+                    os.mkdir(save_dir)
+                save_path = osp.join(
+                    save_dir, "instances_" + osp.basename(img_dir) + ".json"
+                )
+                TrainValDataset.generate_coco_format_labels(
+                    img_info, self.class_names, save_path
+                )
+        img_paths, labels = list(
+            zip(
+                *[
+                    (
+                        img_path,
+                        np.array(info["labels"], dtype=np.float32)
+                        if info["labels"]
+                        else np.zeros((0, 5), dtype=np.float32),
+                    )
+                    for img_path, info in img_info.items()
+                ]
+            )
+        )
+        self.img_info = img_info
+        LOGGER.info(
+            f"{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. "
+        )
+        return img_paths, labels
+    def get_mosaic(self, index):
+        """Gets images and labels after mosaic augments"""
+        indices = [index] + random.choices(
+            range(0, len(self.img_paths)), k=3
+        )  # 3 additional image indices
+        random.shuffle(indices)
+        imgs, hs, ws, labels = [], [], [], []
+        for index in indices:
+            img, _, (h, w) = self.load_image(index)
+            labels_per_img = self.labels[index]
+            imgs.append(img)
+            hs.append(h)
+            ws.append(w)
+            labels.append(labels_per_img)
+        img, labels = mosaic_augmentation(self.img_size, imgs, hs, ws, labels, self.hyp)
+        return img, labels
+    def general_augment(self, img, labels):
+        """Gets images and labels after general augment
+        This function applies hsv, random ud-flip and random lr-flips augments.
+        """
+        nl = len(labels)
+        # HSV color-space
+        augment_hsv(
+            img,
+            hgain=self.hyp["hsv_h"],
+            sgain=self.hyp["hsv_s"],
+            vgain=self.hyp["hsv_v"],
+        )
+        # Flip up-down
+        if random.random() < self.hyp["flipud"]:
+            img = np.flipud(img)
+            if nl:
+                labels[:, 2] = 1 - labels[:, 2]
+        # Flip left-right
+        if random.random() < self.hyp["fliplr"]:
+            img = np.fliplr(img)
+            if nl:
+                labels[:, 1] = 1 - labels[:, 1]
+        return img, labels
+    def sort_files_shapes(self):
+        # Sort by aspect ratio
+        batch_num = self.batch_indices[-1] + 1
+        s = self.shapes  # wh
+        ar = s[:, 1] / s[:, 0]  # aspect ratio
+        irect = ar.argsort()
+        self.img_paths = [self.img_paths[i] for i in irect]
+        self.labels = [self.labels[i] for i in irect]
+        self.shapes = s[irect]  # wh
+        ar = ar[irect]
+        # Set training image shapes
+        shapes = [[1, 1]] * batch_num
+        for i in range(batch_num):
+            ari = ar[self.batch_indices == i]
+            mini, maxi = ari.min(), ari.max()
+            if maxi < 1:
+                shapes[i] = [maxi, 1]
+            elif mini > 1:
+                shapes[i] = [1, 1 / mini]
+        self.batch_shapes = (
+            np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype(
+                np.int
+            )
+            * self.stride
+        )
+    @staticmethod
+    def check_image(im_file):
+        # verify an image.
+        nc, msg = 0, ""
+        try:
+            im = Image.open(im_file)
+            im.verify()  # PIL verify
+            shape = im.size  # (width, height)
+            im_exif = im._getexif()
+            if im_exif and ORIENTATION in im_exif:
+                rotation = im_exif[ORIENTATION]
+                if rotation in (6, 8):
+                    shape = (shape[1], shape[0])
+            assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+            assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+            if im.format.lower() in ("jpg", "jpeg"):
+                with open(im_file, "rb") as f:
+                    f.seek(-2, 2)
+                    if f.read() != b"\xff\xd9":  # corrupt JPEG
+                        ImageOps.exif_transpose(Image.open(im_file)).save(
+                            im_file, "JPEG", subsampling=0, quality=100
+                        )
+                        msg += f"WARNING: {im_file}: corrupt JPEG restored and saved"
+            return im_file, shape, nc, msg
+        except Exception as e:
+            nc = 1
+            msg = f"WARNING: {im_file}: ignoring corrupt image: {e}"
+            return im_file, None, nc, msg
+    @staticmethod
+    def check_label_files(args):
+        img_path, lb_path = args
+        nm, nf, ne, nc, msg = 0, 0, 0, 0, ""  # number (missing, found, empty, message
+        try:
+            if osp.exists(lb_path):
+                nf = 1  # label found
+                with open(lb_path, "r") as f:
+                    labels = [
+                        x.split() for x in f.read().strip().splitlines() if len(x)
+                    ]
+                    labels = np.array(labels, dtype=np.float32)
+                if len(labels):
+                    assert all(
+                        len(l) == 5 for l in labels
+                    ), f"{lb_path}: wrong label format."
+                    assert (
+                        labels >= 0
+                    ).all(), f"{lb_path}: Label values error: all values in label file must > 0"
+                    assert (
+                        labels[:, 1:] <= 1
+                    ).all(), f"{lb_path}: Label values error: all coordinates must be normalized"
+                    _, indices = np.unique(labels, axis=0, return_index=True)
+                    if len(indices) < len(labels):  # duplicate row check
+                        labels = labels[indices]  # remove duplicates
+                        msg += f"WARNING: {lb_path}: {len(labels) - len(indices)} duplicate labels removed"
+                    labels = labels.tolist()
+                else:
+                    ne = 1  # label empty
+                    labels = []
+            else:
+                nm = 1  # label missing
+                labels = []
+            return img_path, labels, nc, nm, nf, ne, msg
+        except Exception as e:
+            nc = 1
+            msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}"
+            return img_path, None, nc, nm, nf, ne, msg
+    @staticmethod
+    def generate_coco_format_labels(img_info, class_names, save_path):
+        # for evaluation with pycocotools
+        dataset = {"categories": [], "annotations": [], "images": []}
+        for i, class_name in enumerate(class_names):
+            dataset["categories"].append(
+                {"id": i, "name": class_name, "supercategory": ""}
+            )
+        ann_id = 0
+        LOGGER.info(f"Convert to COCO format")
+        for i, (img_path, info) in enumerate(tqdm(img_info.items())):
+            labels = info["labels"] if info["labels"] else []
+            img_id = osp.splitext(osp.basename(img_path))[0]
+            img_id = int(img_id) if img_id.isnumeric() else img_id
+            img_w, img_h = info["shape"]
+            dataset["images"].append(
+                {
+                    "file_name": os.path.basename(img_path),
+                    "id": img_id,
+                    "width": img_w,
+                    "height": img_h,
+                }
+            )
+            if labels:
+                for label in labels:
+                    c, x, y, w, h = label[:5]
+                    # convert x,y,w,h to x1,y1,x2,y2
+                    x1 = (x - w / 2) * img_w
+                    y1 = (y - h / 2) * img_h
+                    x2 = (x + w / 2) * img_w
+                    y2 = (y + h / 2) * img_h
+                    # cls_id starts from 0
+                    cls_id = int(c)
+                    w = max(0, x2 - x1)
+                    h = max(0, y2 - y1)
+                    dataset["annotations"].append(
+                        {
+                            "area": h * w,
+                            "bbox": [x1, y1, w, h],
+                            "category_id": cls_id,
+                            "id": ann_id,
+                            "image_id": img_id,
+                            "iscrowd": 0,
+                            # mask
+                            "segmentation": [],
+                        }
+                    )
+                    ann_id += 1
+        with open(save_path, "w") as f:
+            json.dump(dataset, f)
+            LOGGER.info(
+                f"Convert to COCO format finished. Resutls saved in {save_path}"
+            )
+    @staticmethod
+    def get_hash(paths):
+        """Get the hash value of paths"""
+        assert isinstance(paths, list), "Only support list currently."
+        h = hashlib.md5("".join(paths).encode())
+        return h.hexdigest()

yolov6/data/vis_dataset.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# coding=utf-8
+# Description:  visualize yolo label image.
+import argparse
+import os
+import cv2
+import numpy as np
+IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
+def main(args):
+    img_dir, label_dir, class_names = args.img_dir, args.label_dir, args.class_names
+    label_map = dict()
+    for class_id, classname in enumerate(class_names):
+        label_map[class_id] = classname
+    for file in os.listdir(img_dir):
+        if file.split('.')[-1] not in IMG_FORMATS:
+            print(f'[Warning]: Non-image file {file}')
+            continue
+        img_path = os.path.join(img_dir, file)
+        label_path = os.path.join(label_dir, file[: file.rindex('.')] + '.txt')
+        try:
+            img_data = cv2.imread(img_path)
+            height, width, _ = img_data.shape
+            color = [tuple(np.random.choice(range(256), size=3)) for i in class_names]
+            thickness = 2
+            with open(label_path, 'r') as f:
+                for bbox in f:
+                    cls, x_c, y_c, w, h = [float(v) if i > 0 else int(v) for i, v in enumerate(bbox.split('\n')[0].split(' '))]
+                    x_tl = int((x_c - w / 2) * width)
+                    y_tl = int((y_c - h / 2) * height)
+                    cv2.rectangle(img_data, (x_tl, y_tl), (x_tl + int(w * width), y_tl + int(h * height)), tuple([int(x) for x in color[cls]]), thickness)
+                    cv2.putText(img_data, label_map[cls], (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 1, tuple([int(x) for x in color[cls]]), thickness)
+            cv2.imshow('image', img_data)
+            cv2.waitKey(0)
+        except Exception as e:
+            print(f'[Error]: {e} {img_path}')
+    print('======All Done!======')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--img_dir', default='VOCdevkit/voc_07_12/images')
+    parser.add_argument('--label_dir', default='VOCdevkit/voc_07_12/labels')
+    parser.add_argument('--class_names', default=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'])
+    args = parser.parse_args()
+    print(args)
+    main(args)

yolov6/data/voc2yolo.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import xml.etree.ElementTree as ET
+from tqdm import tqdm
+import os
+import shutil
+import argparse
+# VOC dataset (refer https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml)
+# VOC2007 trainval: 446MB, 5012 images
+# VOC2007 test:     438MB, 4953 images
+# VOC2012 trainval: 1.95GB, 17126 images
+VOC_NAMES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
+def convert_label(path, lb_path, year, image_id):
+    def convert_box(size, box):
+        dw, dh = 1. / size[0], 1. / size[1]
+        x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+        return x * dw, y * dh, w * dw, h * dh
+    in_file = open(os.path.join(path, f'VOC{year}/Annotations/{image_id}.xml'))
+    out_file = open(lb_path, 'w')
+    tree = ET.parse(in_file)
+    root = tree.getroot()
+    size = root.find('size')
+    w = int(size.find('width').text)
+    h = int(size.find('height').text)
+    for obj in root.iter('object'):
+        cls = obj.find('name').text
+        if cls in VOC_NAMES and not int(obj.find('difficult').text) == 1:
+            xmlbox = obj.find('bndbox')
+            bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+            cls_id = VOC_NAMES.index(cls)  # class id
+            out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+def gen_voc07_12(voc_path):
+    '''
+    Generate voc07+12 setting dataset:
+    train: # train images 16551 images
+        - images/train2012
+        - images/train2007
+        - images/val2012
+        - images/val2007
+    val: # val images (relative to 'path')  4952 images
+        - images/test2007
+    '''
+    dataset_root = os.path.join(voc_path, 'voc_07_12')
+    if not os.path.exists(dataset_root):
+        os.makedirs(dataset_root)
+    dataset_settings = {'train': ['train2007', 'val2007', 'train2012', 'val2012'], 'val':['test2007']}
+    for item in ['images', 'labels']:
+        for data_type, data_list in dataset_settings.items():
+            for data_name in data_list:
+                ori_path = os.path.join(voc_path, item, data_name)
+                new_path = os.path.join(dataset_root, item, data_type)
+                if not os.path.exists(new_path):
+                    os.makedirs(new_path)
+                print(f'[INFO]: Copying {ori_path} to {new_path}')
+                for file in os.listdir(ori_path):
+                    shutil.copy(os.path.join(ori_path, file), new_path)
+def main(args):
+    voc_path = args.voc_path
+    for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+        imgs_path = os.path.join(voc_path, 'images', f'{image_set}')
+        lbs_path = os.path.join(voc_path, 'labels', f'{image_set}')
+        try:
+            with open(os.path.join(voc_path, f'VOC{year}/ImageSets/Main/{image_set}.txt'), 'r') as f:
+                image_ids = f.read().strip().split()
+            if not os.path.exists(imgs_path):
+                os.makedirs(imgs_path)
+            if not os.path.exists(lbs_path):
+                os.makedirs(lbs_path)
+            for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+                f = os.path.join(voc_path, f'VOC{year}/JPEGImages/{id}.jpg')  # old img path
+                lb_path = os.path.join(lbs_path, f'{id}.txt')  # new label path
+                convert_label(voc_path, lb_path, year, id)  # convert labels to YOLO format
+                if os.path.exists(f):
+                    shutil.move(f, imgs_path)       # move image
+        except Exception as e:
+            print(f'[Warning]: {e} {year}{image_set} convert fail!')
+    gen_voc07_12(voc_path)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--voc_path', default='VOCdevkit')
+    args = parser.parse_args()
+    print(args)
+    main(args)

yolov6/layers/common.py ADDED Viewed

	@@ -0,0 +1,501 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import warnings
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from yolov6.layers.dbb_transforms import *
+class SiLU(nn.Module):
+    '''Activation of SiLU'''
+    @staticmethod
+    def forward(x):
+        return x * torch.sigmoid(x)
+class Conv(nn.Module):
+    '''Normal Conv with SiLU activation'''
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
+        super().__init__()
+        padding = kernel_size // 2
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias=bias,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.act = nn.SiLU()
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def forward_fuse(self, x):
+        return self.act(self.conv(x))
+class SimConv(nn.Module):
+    '''Normal Conv with ReLU activation'''
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
+        super().__init__()
+        padding = kernel_size // 2
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias=bias,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.act = nn.ReLU()
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def forward_fuse(self, x):
+        return self.act(self.conv(x))
+class SimSPPF(nn.Module):
+    '''Simplified SPPF with ReLU activation'''
+    def __init__(self, in_channels, out_channels, kernel_size=5):
+        super().__init__()
+        c_ = in_channels // 2  # hidden channels
+        self.cv1 = SimConv(in_channels, c_, 1, 1)
+        self.cv2 = SimConv(c_ * 4, out_channels, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+class Transpose(nn.Module):
+    '''Normal Transpose, default for upsampling'''
+    def __init__(self, in_channels, out_channels, kernel_size=2, stride=2):
+        super().__init__()
+        self.upsample_transpose = torch.nn.ConvTranspose2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            bias=True
+        )
+    def forward(self, x):
+        return self.upsample_transpose(x)
+class Concat(nn.Module):
+    def __init__(self, dimension=1):
+        super().__init__()
+        self.d = dimension
+    def forward(self, x):
+        return torch.cat(x, self.d)
+def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
+    '''Basic cell for rep-style block, including conv and bn'''
+    result = nn.Sequential()
+    result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
+                                                  kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False))
+    result.add_module('bn', nn.BatchNorm2d(num_features=out_channels))
+    return result
+class RepBlock(nn.Module):
+    '''
+        RepBlock is a stage block with rep-style basic block
+    '''
+    def __init__(self, in_channels, out_channels, n=1):
+        super().__init__()
+        self.conv1 = RepVGGBlock(in_channels, out_channels)
+        self.block = nn.Sequential(*(RepVGGBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
+    def forward(self, x):
+        x = self.conv1(x)
+        if self.block is not None:
+            x = self.block(x)
+        return x
+class RepVGGBlock(nn.Module):
+    '''RepVGGBlock is a basic rep-style block, including training and deploy status
+    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
+    '''
+    def __init__(self, in_channels, out_channels, kernel_size=3,
+                 stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False):
+        super(RepVGGBlock, self).__init__()
+        """ Initialization of the class.
+        Args:
+            in_channels (int): Number of channels in the input image
+            out_channels (int): Number of channels produced by the convolution
+            kernel_size (int or tuple): Size of the convolving kernel
+            stride (int or tuple, optional): Stride of the convolution. Default: 1
+            padding (int or tuple, optional): Zero-padding added to both sides of
+                the input. Default: 1
+            dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
+            groups (int, optional): Number of blocked connections from input
+                channels to output channels. Default: 1
+            padding_mode (string, optional): Default: 'zeros'
+            deploy: Whether to be deploy status or training status. Default: False
+            use_se: Whether to use se. Default: False
+        """
+        self.deploy = deploy
+        self.groups = groups
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        assert kernel_size == 3
+        assert padding == 1
+        padding_11 = padding - kernel_size // 2
+        self.nonlinearity = nn.ReLU()
+        if use_se:
+            raise NotImplementedError("se block not supported yet")
+        else:
+            self.se = nn.Identity()
+        if deploy:
+            self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
+                                         padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
+        else:
+            self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None
+            self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
+            self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)
+    def forward(self, inputs):
+        '''Forward process'''
+        if hasattr(self, 'rbr_reparam'):
+            return self.nonlinearity(self.se(self.rbr_reparam(inputs)))
+        if self.rbr_identity is None:
+            id_out = 0
+        else:
+            id_out = self.rbr_identity(inputs)
+        return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
+        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+    def _fuse_bn_tensor(self, branch):
+        if branch is None:
+            return 0, 0
+        if isinstance(branch, nn.Sequential):
+            kernel = branch.conv.weight
+            running_mean = branch.bn.running_mean
+            running_var = branch.bn.running_var
+            gamma = branch.bn.weight
+            beta = branch.bn.bias
+            eps = branch.bn.eps
+        else:
+            assert isinstance(branch, nn.BatchNorm2d)
+            if not hasattr(self, 'id_tensor'):
+                input_dim = self.in_channels // self.groups
+                kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
+                for i in range(self.in_channels):
+                    kernel_value[i, i % input_dim, 1, 1] = 1
+                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
+            kernel = self.id_tensor
+            running_mean = branch.running_mean
+            running_var = branch.running_var
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+    def switch_to_deploy(self):
+        if hasattr(self, 'rbr_reparam'):
+            return
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels,
+                                     kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride,
+                                     padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True)
+        self.rbr_reparam.weight.data = kernel
+        self.rbr_reparam.bias.data = bias
+        for para in self.parameters():
+            para.detach_()
+        self.__delattr__('rbr_dense')
+        self.__delattr__('rbr_1x1')
+        if hasattr(self, 'rbr_identity'):
+            self.__delattr__('rbr_identity')
+        if hasattr(self, 'id_tensor'):
+            self.__delattr__('id_tensor')
+        self.deploy = True
+def conv_bn_v2(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,
+               padding_mode='zeros'):
+    conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                           stride=stride, padding=padding, dilation=dilation, groups=groups,
+                           bias=False, padding_mode=padding_mode)
+    bn_layer = nn.BatchNorm2d(num_features=out_channels, affine=True)
+    se = nn.Sequential()
+    se.add_module('conv', conv_layer)
+    se.add_module('bn', bn_layer)
+    return se
+class IdentityBasedConv1x1(nn.Conv2d):
+    def __init__(self, channels, groups=1):
+        super(IdentityBasedConv1x1, self).__init__(in_channels=channels, out_channels=channels, kernel_size=1, stride=1, padding=0, groups=groups, bias=False)
+        assert channels % groups == 0
+        input_dim = channels // groups
+        id_value = np.zeros((channels, input_dim, 1, 1))
+        for i in range(channels):
+            id_value[i, i % input_dim, 0, 0] = 1
+        self.id_tensor = torch.from_numpy(id_value).type_as(self.weight)
+        nn.init.zeros_(self.weight)
+    def forward(self, input):
+        kernel = self.weight + self.id_tensor.to(self.weight.device)
+        result = F.conv2d(input, kernel, None, stride=1, padding=0, dilation=self.dilation, groups=self.groups)
+        return result
+    def get_actual_kernel(self):
+        return self.weight + self.id_tensor.to(self.weight.device)
+class BNAndPadLayer(nn.Module):
+    def __init__(self,
+                 pad_pixels,
+                 num_features,
+                 eps=1e-5,
+                 momentum=0.1,
+                 affine=True,
+                 track_running_stats=True):
+        super(BNAndPadLayer, self).__init__()
+        self.bn = nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats)
+        self.pad_pixels = pad_pixels
+    def forward(self, input):
+        output = self.bn(input)
+        if self.pad_pixels > 0:
+            if self.bn.affine:
+                pad_values = self.bn.bias.detach() - self.bn.running_mean * self.bn.weight.detach() / torch.sqrt(self.bn.running_var + self.bn.eps)
+            else:
+                pad_values = - self.bn.running_mean / torch.sqrt(self.bn.running_var + self.bn.eps)
+            output = F.pad(output, [self.pad_pixels] * 4)
+            pad_values = pad_values.view(1, -1, 1, 1)
+            output[:, :, 0:self.pad_pixels, :] = pad_values
+            output[:, :, -self.pad_pixels:, :] = pad_values
+            output[:, :, :, 0:self.pad_pixels] = pad_values
+            output[:, :, :, -self.pad_pixels:] = pad_values
+        return output
+    @property
+    def bn_weight(self):
+        return self.bn.weight
+    @property
+    def bn_bias(self):
+        return self.bn.bias
+    @property
+    def running_mean(self):
+        return self.bn.running_mean
+    @property
+    def running_var(self):
+        return self.bn.running_var
+    @property
+    def eps(self):
+        return self.bn.eps
+class DBBBlock(nn.Module):
+    '''
+        RepBlock is a stage block with rep-style basic block
+    '''
+    def __init__(self, in_channels, out_channels, n=1):
+        super().__init__()
+        self.conv1 = DiverseBranchBlock(in_channels, out_channels)
+        self.block = nn.Sequential(*(DiverseBranchBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
+    def forward(self, x):
+        x = self.conv1(x)
+        if self.block is not None:
+            x = self.block(x)
+        return x
+class DiverseBranchBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3,
+                 stride=1, padding=1, dilation=1, groups=1,
+                 internal_channels_1x1_3x3=None,
+                 deploy=False, nonlinear=nn.ReLU(), single_init=False):
+        super(DiverseBranchBlock, self).__init__()
+        self.deploy = deploy
+        if nonlinear is None:
+            self.nonlinear = nn.Identity()
+        else:
+            self.nonlinear = nonlinear
+        self.kernel_size = kernel_size
+        self.out_channels = out_channels
+        self.groups = groups
+        assert padding == kernel_size // 2
+        if deploy:
+            self.dbb_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
+                                      padding=padding, dilation=dilation, groups=groups, bias=True)
+        else:
+            self.dbb_origin = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups)
+            self.dbb_avg = nn.Sequential()
+            if groups < out_channels:
+                self.dbb_avg.add_module('conv',
+                                        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1,
+                                                  stride=1, padding=0, groups=groups, bias=False))
+                self.dbb_avg.add_module('bn', BNAndPadLayer(pad_pixels=padding, num_features=out_channels))
+                self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0))
+                self.dbb_1x1 = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride,
+                                       padding=0, groups=groups)
+            else:
+                self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding))
+            self.dbb_avg.add_module('avgbn', nn.BatchNorm2d(out_channels))
+            if internal_channels_1x1_3x3 is None:
+                internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels   # For mobilenet, it is better to have 2X internal channels
+            self.dbb_1x1_kxk = nn.Sequential()
+            if internal_channels_1x1_3x3 == in_channels:
+                self.dbb_1x1_kxk.add_module('idconv1', IdentityBasedConv1x1(channels=in_channels, groups=groups))
+            else:
+                self.dbb_1x1_kxk.add_module('conv1', nn.Conv2d(in_channels=in_channels, out_channels=internal_channels_1x1_3x3,
+                                                            kernel_size=1, stride=1, padding=0, groups=groups, bias=False))
+            self.dbb_1x1_kxk.add_module('bn1', BNAndPadLayer(pad_pixels=padding, num_features=internal_channels_1x1_3x3, affine=True))
+            self.dbb_1x1_kxk.add_module('conv2', nn.Conv2d(in_channels=internal_channels_1x1_3x3, out_channels=out_channels,
+                                                            kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=False))
+            self.dbb_1x1_kxk.add_module('bn2', nn.BatchNorm2d(out_channels))
+        #   The experiments reported in the paper used the default initialization of bn.weight (all as 1). But changing the initialization may be useful in some cases.
+        if single_init:
+            #   Initialize the bn.weight of dbb_origin as 1 and others as 0. This is not the default setting.
+            self.single_init()
+    def get_equivalent_kernel_bias(self):
+        k_origin, b_origin = transI_fusebn(self.dbb_origin.conv.weight, self.dbb_origin.bn)
+        if hasattr(self, 'dbb_1x1'):
+            k_1x1, b_1x1 = transI_fusebn(self.dbb_1x1.conv.weight, self.dbb_1x1.bn)
+            k_1x1 = transVI_multiscale(k_1x1, self.kernel_size)
+        else:
+            k_1x1, b_1x1 = 0, 0
+        if hasattr(self.dbb_1x1_kxk, 'idconv1'):
+            k_1x1_kxk_first = self.dbb_1x1_kxk.idconv1.get_actual_kernel()
+        else:
+            k_1x1_kxk_first = self.dbb_1x1_kxk.conv1.weight
+        k_1x1_kxk_first, b_1x1_kxk_first = transI_fusebn(k_1x1_kxk_first, self.dbb_1x1_kxk.bn1)
+        k_1x1_kxk_second, b_1x1_kxk_second = transI_fusebn(self.dbb_1x1_kxk.conv2.weight, self.dbb_1x1_kxk.bn2)
+        k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(k_1x1_kxk_first, b_1x1_kxk_first, k_1x1_kxk_second, b_1x1_kxk_second, groups=self.groups)
+        k_avg = transV_avg(self.out_channels, self.kernel_size, self.groups)
+        k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg.to(self.dbb_avg.avgbn.weight.device), self.dbb_avg.avgbn)
+        if hasattr(self.dbb_avg, 'conv'):
+            k_1x1_avg_first, b_1x1_avg_first = transI_fusebn(self.dbb_avg.conv.weight, self.dbb_avg.bn)
+            k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(k_1x1_avg_first, b_1x1_avg_first, k_1x1_avg_second, b_1x1_avg_second, groups=self.groups)
+        else:
+            k_1x1_avg_merged, b_1x1_avg_merged = k_1x1_avg_second, b_1x1_avg_second
+        return transII_addbranch((k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged), (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged))
+    def switch_to_deploy(self):
+        if hasattr(self, 'dbb_reparam'):
+            return
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.dbb_reparam = nn.Conv2d(in_channels=self.dbb_origin.conv.in_channels, out_channels=self.dbb_origin.conv.out_channels,
+                                     kernel_size=self.dbb_origin.conv.kernel_size, stride=self.dbb_origin.conv.stride,
+                                     padding=self.dbb_origin.conv.padding, dilation=self.dbb_origin.conv.dilation, groups=self.dbb_origin.conv.groups, bias=True)
+        self.dbb_reparam.weight.data = kernel
+        self.dbb_reparam.bias.data = bias
+        for para in self.parameters():
+            para.detach_()
+        self.__delattr__('dbb_origin')
+        self.__delattr__('dbb_avg')
+        if hasattr(self, 'dbb_1x1'):
+            self.__delattr__('dbb_1x1')
+        self.__delattr__('dbb_1x1_kxk')
+    def forward(self, inputs):
+        if hasattr(self, 'dbb_reparam'):
+            return self.nonlinear(self.dbb_reparam(inputs))
+        out = self.dbb_origin(inputs)
+        if hasattr(self, 'dbb_1x1'):
+            out += self.dbb_1x1(inputs)
+        out += self.dbb_avg(inputs)
+        out += self.dbb_1x1_kxk(inputs)
+        return self.nonlinear(out)
+    def init_gamma(self, gamma_value):
+        if hasattr(self, "dbb_origin"):
+            torch.nn.init.constant_(self.dbb_origin.bn.weight, gamma_value)
+        if hasattr(self, "dbb_1x1"):
+            torch.nn.init.constant_(self.dbb_1x1.bn.weight, gamma_value)
+        if hasattr(self, "dbb_avg"):
+            torch.nn.init.constant_(self.dbb_avg.avgbn.weight, gamma_value)
+        if hasattr(self, "dbb_1x1_kxk"):
+            torch.nn.init.constant_(self.dbb_1x1_kxk.bn2.weight, gamma_value)
+    def single_init(self):
+        self.init_gamma(0.0)
+        if hasattr(self, "dbb_origin"):
+            torch.nn.init.constant_(self.dbb_origin.bn.weight, 1.0)
+class DetectBackend(nn.Module):
+    def __init__(self, weights='yolov6s.pt', device=None, dnn=True):
+        super().__init__()
+        assert isinstance(weights, str) and Path(weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.'
+        from yolov6.utils.checkpoint import load_checkpoint
+        model = load_checkpoint(weights, map_location=device)
+        stride = int(model.stride.max())
+        self.__dict__.update(locals())  # assign all variables to self
+    def forward(self, im, val=False):
+        y = self.model(im)
+        if isinstance(y, np.ndarray):
+            y = torch.tensor(y, device=self.device)
+        return y

yolov6/layers/dbb_transforms.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import numpy as np
+import torch.nn.functional as F
+def transI_fusebn(kernel, bn):
+    gamma = bn.weight
+    std = (bn.running_var + bn.eps).sqrt()
+    return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std
+def transII_addbranch(kernels, biases):
+    return sum(kernels), sum(biases)
+def transIII_1x1_kxk(k1, b1, k2, b2, groups):
+    if groups == 1:
+        k = F.conv2d(k2, k1.permute(1, 0, 2, 3))      #
+        b_hat = (k2 * b1.reshape(1, -1, 1, 1)).sum((1, 2, 3))
+    else:
+        k_slices = []
+        b_slices = []
+        k1_T = k1.permute(1, 0, 2, 3)
+        k1_group_width = k1.size(0) // groups
+        k2_group_width = k2.size(0) // groups
+        for g in range(groups):
+            k1_T_slice = k1_T[:, g*k1_group_width:(g+1)*k1_group_width, :, :]
+            k2_slice = k2[g*k2_group_width:(g+1)*k2_group_width, :, :, :]
+            k_slices.append(F.conv2d(k2_slice, k1_T_slice))
+            b_slices.append((k2_slice * b1[g*k1_group_width:(g+1)*k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3)))
+        k, b_hat = transIV_depthconcat(k_slices, b_slices)
+    return k, b_hat + b2
+def transIV_depthconcat(kernels, biases):
+    return torch.cat(kernels, dim=0), torch.cat(biases)
+def transV_avg(channels, kernel_size, groups):
+    input_dim = channels // groups
+    k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
+    k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
+    return k
+#   This has not been tested with non-square kernels (kernel.size(2) != kernel.size(3)) nor even-size kernels
+def transVI_multiscale(kernel, target_kernel_size):
+    H_pixels_to_pad = (target_kernel_size - kernel.size(2)) // 2
+    W_pixels_to_pad = (target_kernel_size - kernel.size(3)) // 2
+    return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad])

yolov6/models/efficientrep.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from torch import nn
+from yolov6.layers.common import RepVGGBlock, RepBlock, SimSPPF
+class EfficientRep(nn.Module):
+    '''EfficientRep Backbone
+    EfficientRep is handcrafted by hardware-aware neural network design.
+    With rep-style struct, EfficientRep is friendly to high-computation hardware(e.g. GPU).
+    '''
+    def __init__(
+        self,
+        in_channels=3,
+        channels_list=None,
+        num_repeats=None,
+    ):
+        super().__init__()
+        assert channels_list is not None
+        assert num_repeats is not None
+        self.stem = RepVGGBlock(
+            in_channels=in_channels,
+            out_channels=channels_list[0],
+            kernel_size=3,
+            stride=2
+        )
+        self.ERBlock_2 = nn.Sequential(
+            RepVGGBlock(
+                in_channels=channels_list[0],
+                out_channels=channels_list[1],
+                kernel_size=3,
+                stride=2
+            ),
+            RepBlock(
+                in_channels=channels_list[1],
+                out_channels=channels_list[1],
+                n=num_repeats[1]
+            )
+        )
+        self.ERBlock_3 = nn.Sequential(
+            RepVGGBlock(
+                in_channels=channels_list[1],
+                out_channels=channels_list[2],
+                kernel_size=3,
+                stride=2
+            ),
+            RepBlock(
+                in_channels=channels_list[2],
+                out_channels=channels_list[2],
+                n=num_repeats[2]
+            )
+        )
+        self.ERBlock_4 = nn.Sequential(
+            RepVGGBlock(
+                in_channels=channels_list[2],
+                out_channels=channels_list[3],
+                kernel_size=3,
+                stride=2
+            ),
+            RepBlock(
+                in_channels=channels_list[3],
+                out_channels=channels_list[3],
+                n=num_repeats[3]
+            )
+        )
+        self.ERBlock_5 = nn.Sequential(
+            RepVGGBlock(
+                in_channels=channels_list[3],
+                out_channels=channels_list[4],
+                kernel_size=3,
+                stride=2,
+            ),
+            RepBlock(
+                in_channels=channels_list[4],
+                out_channels=channels_list[4],
+                n=num_repeats[4]
+            ),
+            SimSPPF(
+                in_channels=channels_list[4],
+                out_channels=channels_list[4],
+                kernel_size=5
+            )
+        )
+    def forward(self, x):
+        outputs = []
+        x = self.stem(x)
+        x = self.ERBlock_2(x)
+        x = self.ERBlock_3(x)
+        outputs.append(x)
+        x = self.ERBlock_4(x)
+        outputs.append(x)
+        x = self.ERBlock_5(x)
+        outputs.append(x)
+        return tuple(outputs)

yolov6/models/effidehead.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import torch
+import torch.nn as nn
+import math
+from yolov6.layers.common import *
+class Detect(nn.Module):
+    '''Efficient Decoupled Head
+    With hardware-aware degisn, the decoupled head is optimized with
+    hybridchannels methods.
+    '''
+    def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_layers=None):  # detection layer
+        super().__init__()
+        assert head_layers is not None
+        self.nc = num_classes  # number of classes
+        self.no = num_classes + 5  # number of outputs per anchor
+        self.nl = num_layers  # number of detection layers
+        if isinstance(anchors, (list, tuple)):
+            self.na = len(anchors[0]) // 2
+        else:
+            self.na = anchors
+        self.anchors = anchors
+        self.grid = [torch.zeros(1)] * num_layers
+        self.prior_prob = 1e-2
+        self.inplace = inplace
+        stride = [8, 16, 32]  # strides computed during build
+        self.stride = torch.tensor(stride)
+        # Init decouple head
+        self.cls_convs = nn.ModuleList()
+        self.reg_convs = nn.ModuleList()
+        self.cls_preds = nn.ModuleList()
+        self.reg_preds = nn.ModuleList()
+        self.obj_preds = nn.ModuleList()
+        self.stems = nn.ModuleList()
+        # Efficient decoupled head layers
+        for i in range(num_layers):
+            idx = i*6
+            self.stems.append(head_layers[idx])
+            self.cls_convs.append(head_layers[idx+1])
+            self.reg_convs.append(head_layers[idx+2])
+            self.cls_preds.append(head_layers[idx+3])
+            self.reg_preds.append(head_layers[idx+4])
+            self.obj_preds.append(head_layers[idx+5])
+    def initialize_biases(self):
+        for conv in self.cls_preds:
+            b = conv.bias.view(self.na, -1)
+            b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        for conv in self.obj_preds:
+            b = conv.bias.view(self.na, -1)
+            b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+    def forward(self, x):
+        z = []
+        for i in range(self.nl):
+            x[i] = self.stems[i](x[i])
+            cls_x = x[i]
+            reg_x = x[i]
+            cls_feat = self.cls_convs[i](cls_x)
+            cls_output = self.cls_preds[i](cls_feat)
+            reg_feat = self.reg_convs[i](reg_x)
+            reg_output = self.reg_preds[i](reg_feat)
+            obj_output = self.obj_preds[i](reg_feat)
+            if self.training:
+                x[i] = torch.cat([reg_output, obj_output, cls_output], 1)
+                bs, _, ny, nx = x[i].shape
+                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            else:
+                y = torch.cat([reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1)
+                bs, _, ny, nx = y.shape
+                y = y.view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+                if self.grid[i].shape[2:4] != y.shape[2:4]:
+                    d = self.stride.device
+                    yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
+                    self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float()
+                if self.inplace:
+                    y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh
+                else:
+                    xy = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
+                    wh = torch.exp(y[..., 2:4]) * self.stride[i]  # wh
+                    y = torch.cat((xy, wh, y[..., 4:]), -1)
+                z.append(y.view(bs, -1, self.no))
+        return x if self.training else torch.cat(z, 1)
+def build_effidehead_layer(channels_list, num_anchors, num_classes):
+    head_layers = nn.Sequential(
+        # stem0
+        Conv(
+            in_channels=channels_list[6],
+            out_channels=channels_list[6],
+            kernel_size=1,
+            stride=1
+        ),
+        # cls_conv0
+        Conv(
+            in_channels=channels_list[6],
+            out_channels=channels_list[6],
+            kernel_size=3,
+            stride=1
+        ),
+        # reg_conv0
+        Conv(
+            in_channels=channels_list[6],
+            out_channels=channels_list[6],
+            kernel_size=3,
+            stride=1
+        ),
+        # cls_pred0
+        nn.Conv2d(
+            in_channels=channels_list[6],
+            out_channels=num_classes * num_anchors,
+            kernel_size=1
+        ),
+        # reg_pred0
+        nn.Conv2d(
+            in_channels=channels_list[6],
+            out_channels=4 * num_anchors,
+            kernel_size=1
+        ),
+        # obj_pred0
+        nn.Conv2d(
+            in_channels=channels_list[6],
+            out_channels=1 * num_anchors,
+            kernel_size=1
+        ),
+        # stem1
+        Conv(
+            in_channels=channels_list[8],
+            out_channels=channels_list[8],
+            kernel_size=1,
+            stride=1
+        ),
+        # cls_conv1
+        Conv(
+            in_channels=channels_list[8],
+            out_channels=channels_list[8],
+            kernel_size=3,
+            stride=1
+        ),
+        # reg_conv1
+        Conv(
+            in_channels=channels_list[8],
+            out_channels=channels_list[8],
+            kernel_size=3,
+            stride=1
+        ),
+        # cls_pred1
+        nn.Conv2d(
+            in_channels=channels_list[8],
+            out_channels=num_classes * num_anchors,
+            kernel_size=1
+        ),
+        # reg_pred1
+        nn.Conv2d(
+            in_channels=channels_list[8],
+            out_channels=4 * num_anchors,
+            kernel_size=1
+        ),
+        # obj_pred1
+        nn.Conv2d(
+            in_channels=channels_list[8],
+            out_channels=1 * num_anchors,
+            kernel_size=1
+        ),
+        # stem2
+        Conv(
+            in_channels=channels_list[10],
+            out_channels=channels_list[10],
+            kernel_size=1,
+            stride=1
+        ),
+        # cls_conv2
+        Conv(
+            in_channels=channels_list[10],
+            out_channels=channels_list[10],
+            kernel_size=3,
+            stride=1
+        ),
+        # reg_conv2
+        Conv(
+            in_channels=channels_list[10],
+            out_channels=channels_list[10],
+            kernel_size=3,
+            stride=1
+        ),
+        # cls_pred2
+        nn.Conv2d(
+            in_channels=channels_list[10],
+            out_channels=num_classes * num_anchors,
+            kernel_size=1
+        ),
+        # reg_pred2
+        nn.Conv2d(
+            in_channels=channels_list[10],
+            out_channels=4 * num_anchors,
+            kernel_size=1
+        ),
+        # obj_pred2
+        nn.Conv2d(
+            in_channels=channels_list[10],
+            out_channels=1 * num_anchors,
+            kernel_size=1
+        )
+    )
+    return head_layers

yolov6/models/end2end.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import torch
+import torch.nn as nn
+import random
+class ORT_NMS(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx,
+                boxes,
+                scores,
+                max_output_boxes_per_class=torch.tensor([100]),
+                iou_threshold=torch.tensor([0.45]),
+                score_threshold=torch.tensor([0.25])):
+        device = boxes.device
+        batch = scores.shape[0]
+        num_det = random.randint(0, 100)
+        batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
+        idxs = torch.arange(100, 100 + num_det).to(device)
+        zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
+        selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
+        selected_indices = selected_indices.to(torch.int64)
+        return selected_indices
+    @staticmethod
+    def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
+        return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
+class TRT_NMS(torch.autograd.Function):
+    @staticmethod
+    def forward(
+        ctx,
+        boxes,
+        scores,
+        background_class=-1,
+        box_coding=1,
+        iou_threshold=0.45,
+        max_output_boxes=100,
+        plugin_version="1",
+        score_activation=0,
+        score_threshold=0.25,
+    ):
+        batch_size, num_boxes, num_classes = scores.shape
+        num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
+        det_boxes = torch.randn(batch_size, max_output_boxes, 4)
+        det_scores = torch.randn(batch_size, max_output_boxes)
+        det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
+        return num_det, det_boxes, det_scores, det_classes
+    @staticmethod
+    def symbolic(g,
+                 boxes,
+                 scores,
+                 background_class=-1,
+                 box_coding=1,
+                 iou_threshold=0.45,
+                 max_output_boxes=100,
+                 plugin_version="1",
+                 score_activation=0,
+                 score_threshold=0.25):
+        out = g.op("TRT::EfficientNMS_TRT",
+                     boxes,
+                     scores,
+                     background_class_i=background_class,
+                     box_coding_i=box_coding,
+                     iou_threshold_f=iou_threshold,
+                     max_output_boxes_i=max_output_boxes,
+                     plugin_version_s=plugin_version,
+                     score_activation_i=score_activation,
+                     score_threshold_f=score_threshold,
+                     outputs=4)
+        nums, boxes, scores, classes = out
+        return nums,boxes,scores,classes
+class ONNX_ORT(nn.Module):
+    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
+        super().__init__()
+        self.device = device if device else torch.device("cpu")
+        self.max_obj = torch.tensor([max_obj]).to(device)
+        self.iou_threshold = torch.tensor([iou_thres]).to(device)
+        self.score_threshold = torch.tensor([score_thres]).to(device)
+        self.max_wh = max_wh
+        self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
+                                           dtype=torch.float32,
+                                           device=self.device)
+    def forward(self, x):
+        box = x[:, :, :4]
+        conf = x[:, :, 4:5]
+        score = x[:, :, 5:]
+        score *= conf
+        box @= self.convert_matrix
+        objScore, objCls = score.max(2, keepdim=True)
+        dis = objCls.float() * self.max_wh
+        nmsbox = box + dis
+        objScore1 = objScore.transpose(1, 2).contiguous()
+        selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold)
+        X, Y = selected_indices[:, 0], selected_indices[:, 2]
+        resBoxes = box[X, Y, :]
+        resClasses = objCls[X, Y, :].float()
+        resScores = objScore[X, Y, :]
+        X = X.unsqueeze(1).float()
+        return torch.cat([X, resBoxes, resClasses, resScores], 1)
+class ONNX_TRT(nn.Module):
+    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None):
+        super().__init__()
+        assert max_wh is None
+        self.device = device if device else torch.device('cpu')
+        self.background_class = -1,
+        self.box_coding = 1,
+        self.iou_threshold = iou_thres
+        self.max_obj = max_obj
+        self.plugin_version = '1'
+        self.score_activation = 0
+        self.score_threshold = score_thres
+    def forward(self, x):
+        box = x[:, :, :4]
+        conf = x[:, :, 4:5]
+        score = x[:, :, 5:]
+        score *= conf
+        num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding,
+                                                                    self.iou_threshold, self.max_obj,
+                                                                    self.plugin_version, self.score_activation,
+                                                                    self.score_threshold)
+        return num_det, det_boxes, det_scores, det_classes
+class End2End(nn.Module):
+    def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
+        super().__init__()
+        device = device if device else torch.device('cpu')
+        self.model = model.to(device)
+        self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
+        self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
+        self.end2end.eval()
+    def forward(self, x):
+        x = self.model(x)
+        x = self.end2end(x)
+        return x

yolov6/models/loss.py ADDED Viewed

	@@ -0,0 +1,411 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# The code is based on
+# https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
+# Copyright (c) Megvii, Inc. and its affiliates.
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+from yolov6.utils.figure_iou import IOUloss, pairwise_bbox_iou
+class ComputeLoss:
+    '''Loss computation func.
+    This func contains SimOTA and siou loss.
+    '''
+    def __init__(self,
+                 reg_weight=5.0,
+                 iou_weight=3.0,
+                 cls_weight=1.0,
+                 center_radius=2.5,
+                 eps=1e-7,
+                 in_channels=[256, 512, 1024],
+                 strides=[8, 16, 32],
+                 n_anchors=1,
+                 iou_type='ciou'
+                 ):
+        self.reg_weight = reg_weight
+        self.iou_weight = iou_weight
+        self.cls_weight = cls_weight
+        self.center_radius = center_radius
+        self.eps = eps
+        self.n_anchors = n_anchors
+        self.strides = strides
+        self.grids = [torch.zeros(1)] * len(in_channels)
+        # Define criteria
+        self.l1_loss = nn.L1Loss(reduction="none")
+        self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
+        self.iou_loss = IOUloss(iou_type=iou_type, reduction="none")
+    def __call__(
+        self,
+        outputs,
+        targets
+    ):
+        dtype = outputs[0].type()
+        device = targets.device
+        loss_cls, loss_obj, loss_iou, loss_l1 = torch.zeros(1, device=device), torch.zeros(1, device=device), \
+            torch.zeros(1, device=device), torch.zeros(1, device=device)
+        num_classes = outputs[0].shape[-1] - 5
+        outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides = self.get_outputs_and_grids(
+            outputs, self.strides, dtype, device)
+        total_num_anchors = outputs.shape[1]
+        bbox_preds = outputs[:, :, :4]  # [batch, n_anchors_all, 4]
+        bbox_preds_org = outputs_origin[:, :, :4]  # [batch, n_anchors_all, 4]
+        obj_preds = outputs[:, :, 4].unsqueeze(-1)  # [batch, n_anchors_all, 1]
+        cls_preds = outputs[:, :, 5:]  # [batch, n_anchors_all, n_cls]
+        # targets
+        batch_size = bbox_preds.shape[0]
+        targets_list = np.zeros((batch_size, 1, 5)).tolist()
+        for i, item in enumerate(targets.cpu().numpy().tolist()):
+            targets_list[int(item[0])].append(item[1:])
+        max_len = max((len(l) for l in targets_list))
+        targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
+        num_targets_list = (targets.sum(dim=2) > 0).sum(dim=1)  # number of objects
+        num_fg, num_gts = 0, 0
+        cls_targets, reg_targets, l1_targets, obj_targets, fg_masks = [], [], [], [], []
+        for batch_idx in range(batch_size):
+            num_gt = int(num_targets_list[batch_idx])
+            num_gts += num_gt
+            if num_gt == 0:
+                cls_target = outputs.new_zeros((0, num_classes))
+                reg_target = outputs.new_zeros((0, 4))
+                l1_target = outputs.new_zeros((0, 4))
+                obj_target = outputs.new_zeros((total_num_anchors, 1))
+                fg_mask = outputs.new_zeros(total_num_anchors).bool()
+            else:
+                gt_bboxes_per_image = targets[batch_idx, :num_gt, 1:5].mul_(gt_bboxes_scale)
+                gt_classes = targets[batch_idx, :num_gt, 0]
+                bboxes_preds_per_image = bbox_preds[batch_idx]
+                cls_preds_per_image = cls_preds[batch_idx]
+                obj_preds_per_image = obj_preds[batch_idx]
+                try:
+                    (
+                        gt_matched_classes,
+                        fg_mask,
+                        pred_ious_this_matching,
+                        matched_gt_inds,
+                        num_fg_img,
+                    ) = self.get_assignments(
+                        batch_idx,
+                        num_gt,
+                        total_num_anchors,
+                        gt_bboxes_per_image,
+                        gt_classes,
+                        bboxes_preds_per_image,
+                        cls_preds_per_image,
+                        obj_preds_per_image,
+                        expanded_strides,
+                        xy_shifts,
+                        num_classes
+                    )
+                except RuntimeError:
+                    print(
+                        "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
+                           CPU mode is applied in this batch. If you want to avoid this issue, \
+                           try to reduce the batch size or image size."
+                    )
+                    torch.cuda.empty_cache()
+                    print("------------CPU Mode for This Batch-------------")
+                    _gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
+                    _gt_classes = gt_classes.cpu().float()
+                    _bboxes_preds_per_image = bboxes_preds_per_image.cpu().float()
+                    _cls_preds_per_image = cls_preds_per_image.cpu().float()
+                    _obj_preds_per_image = obj_preds_per_image.cpu().float()
+                    _expanded_strides = expanded_strides.cpu().float()
+                    _xy_shifts = xy_shifts.cpu()
+                    (
+                        gt_matched_classes,
+                        fg_mask,
+                        pred_ious_this_matching,
+                        matched_gt_inds,
+                        num_fg_img,
+                    ) = self.get_assignments(
+                        batch_idx,
+                        num_gt,
+                        total_num_anchors,
+                        _gt_bboxes_per_image,
+                        _gt_classes,
+                        _bboxes_preds_per_image,
+                        _cls_preds_per_image,
+                        _obj_preds_per_image,
+                        _expanded_strides,
+                        _xy_shifts,
+                        num_classes
+                    )
+                    gt_matched_classes = gt_matched_classes.cuda()
+                    fg_mask = fg_mask.cuda()
+                    pred_ious_this_matching = pred_ious_this_matching.cuda()
+                    matched_gt_inds = matched_gt_inds.cuda()
+                torch.cuda.empty_cache()
+                num_fg += num_fg_img
+                if num_fg_img > 0:
+                    cls_target = F.one_hot(
+                        gt_matched_classes.to(torch.int64), num_classes
+                    ) * pred_ious_this_matching.unsqueeze(-1)
+                    obj_target = fg_mask.unsqueeze(-1)
+                    reg_target = gt_bboxes_per_image[matched_gt_inds]
+                    l1_target = self.get_l1_target(
+                        outputs.new_zeros((num_fg_img, 4)),
+                        gt_bboxes_per_image[matched_gt_inds],
+                        expanded_strides[0][fg_mask],
+                        xy_shifts=xy_shifts[0][fg_mask],
+                    )
+            cls_targets.append(cls_target)
+            reg_targets.append(reg_target)
+            obj_targets.append(obj_target)
+            l1_targets.append(l1_target)
+            fg_masks.append(fg_mask)
+        cls_targets = torch.cat(cls_targets, 0)
+        reg_targets = torch.cat(reg_targets, 0)
+        obj_targets = torch.cat(obj_targets, 0)
+        l1_targets = torch.cat(l1_targets, 0)
+        fg_masks = torch.cat(fg_masks, 0)
+        num_fg = max(num_fg, 1)
+        # loss
+        loss_iou += (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks].T, reg_targets)).sum() / num_fg
+        loss_l1 += (self.l1_loss(bbox_preds_org.view(-1, 4)[fg_masks], l1_targets)).sum() / num_fg
+        loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets*1.0)).sum() / num_fg
+        loss_cls += (self.bcewithlog_loss(cls_preds.view(-1, num_classes)[fg_masks], cls_targets)).sum() / num_fg
+        total_losses = self.reg_weight * loss_iou + loss_l1 + loss_obj + loss_cls
+        return total_losses, torch.cat((self.reg_weight * loss_iou, loss_l1, loss_obj, loss_cls)).detach()
+    def decode_output(self, output, k, stride, dtype, device):
+        grid = self.grids[k].to(device)
+        batch_size = output.shape[0]
+        hsize, wsize = output.shape[2:4]
+        if grid.shape[2:4] != output.shape[2:4]:
+            yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)])
+            grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype).to(device)
+            self.grids[k] = grid
+        output = output.reshape(batch_size, self.n_anchors * hsize * wsize, -1)
+        output_origin = output.clone()
+        grid = grid.view(1, -1, 2)
+        output[..., :2] = (output[..., :2] + grid) * stride
+        output[..., 2:4] = torch.exp(output[..., 2:4]) * stride
+        return output, output_origin, grid, hsize, wsize
+    def get_outputs_and_grids(self, outputs, strides, dtype, device):
+        xy_shifts = []
+        expanded_strides = []
+        outputs_new = []
+        outputs_origin = []
+        for k, output in enumerate(outputs):
+            output, output_origin, grid, feat_h, feat_w = self.decode_output(
+                output, k, strides[k], dtype, device)
+            xy_shift = grid
+            expanded_stride = torch.full((1, grid.shape[1], 1), strides[k], dtype=grid.dtype, device=grid.device)
+            xy_shifts.append(xy_shift)
+            expanded_strides.append(expanded_stride)
+            outputs_new.append(output)
+            outputs_origin.append(output_origin)
+        xy_shifts = torch.cat(xy_shifts, 1)  # [1, n_anchors_all, 2]
+        expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1]
+        outputs_origin = torch.cat(outputs_origin, 1)
+        outputs = torch.cat(outputs_new, 1)
+        feat_h *= strides[-1]
+        feat_w *= strides[-1]
+        gt_bboxes_scale = torch.Tensor([[feat_w, feat_h, feat_w, feat_h]]).type_as(outputs)
+        return outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides
+    def get_l1_target(self, l1_target, gt, stride, xy_shifts, eps=1e-8):
+        l1_target[:, 0:2] = gt[:, 0:2] / stride - xy_shifts
+        l1_target[:, 2:4] = torch.log(gt[:, 2:4] / stride + eps)
+        return l1_target
+    @torch.no_grad()
+    def get_assignments(
+        self,
+        batch_idx,
+        num_gt,
+        total_num_anchors,
+        gt_bboxes_per_image,
+        gt_classes,
+        bboxes_preds_per_image,
+        cls_preds_per_image,
+        obj_preds_per_image,
+        expanded_strides,
+        xy_shifts,
+        num_classes
+    ):
+        fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(
+            gt_bboxes_per_image,
+            expanded_strides,
+            xy_shifts,
+            total_num_anchors,
+            num_gt,
+        )
+        bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
+        cls_preds_ = cls_preds_per_image[fg_mask]
+        obj_preds_ = obj_preds_per_image[fg_mask]
+        num_in_boxes_anchor = bboxes_preds_per_image.shape[0]
+        # cost
+        pair_wise_ious = pairwise_bbox_iou(gt_bboxes_per_image, bboxes_preds_per_image, box_format='xywh')
+        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
+        gt_cls_per_image = (
+            F.one_hot(gt_classes.to(torch.int64), num_classes)
+            .float()
+            .unsqueeze(1)
+            .repeat(1, num_in_boxes_anchor, 1)
+        )
+        with torch.cuda.amp.autocast(enabled=False):
+            cls_preds_ = (
+                cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
+                * obj_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
+            )
+            pair_wise_cls_loss = F.binary_cross_entropy(
+                cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
+            ).sum(-1)
+        del cls_preds_, obj_preds_
+        cost = (
+            self.cls_weight * pair_wise_cls_loss
+            + self.iou_weight * pair_wise_ious_loss
+            + 100000.0 * (~is_in_boxes_and_center)
+        )
+        (
+            num_fg,
+            gt_matched_classes,
+            pred_ious_this_matching,
+            matched_gt_inds,
+        ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask)
+        del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss
+        return (
+            gt_matched_classes,
+            fg_mask,
+            pred_ious_this_matching,
+            matched_gt_inds,
+            num_fg,
+        )
+    def get_in_boxes_info(
+        self,
+        gt_bboxes_per_image,
+        expanded_strides,
+        xy_shifts,
+        total_num_anchors,
+        num_gt,
+    ):
+        expanded_strides_per_image = expanded_strides[0]
+        xy_shifts_per_image = xy_shifts[0] * expanded_strides_per_image
+        xy_centers_per_image = (
+            (xy_shifts_per_image + 0.5 * expanded_strides_per_image)
+            .unsqueeze(0)
+            .repeat(num_gt, 1, 1)
+        )  # [n_anchor, 2] -> [n_gt, n_anchor, 2]
+        gt_bboxes_per_image_lt = (
+            (gt_bboxes_per_image[:, 0:2] - 0.5 * gt_bboxes_per_image[:, 2:4])
+            .unsqueeze(1)
+            .repeat(1, total_num_anchors, 1)
+        )
+        gt_bboxes_per_image_rb = (
+            (gt_bboxes_per_image[:, 0:2] + 0.5 * gt_bboxes_per_image[:, 2:4])
+            .unsqueeze(1)
+            .repeat(1, total_num_anchors, 1)
+        )  # [n_gt, 2] -> [n_gt, n_anchor, 2]
+        b_lt = xy_centers_per_image - gt_bboxes_per_image_lt
+        b_rb = gt_bboxes_per_image_rb - xy_centers_per_image
+        bbox_deltas = torch.cat([b_lt, b_rb], 2)
+        is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0
+        is_in_boxes_all = is_in_boxes.sum(dim=0) > 0
+        # in fixed center
+        gt_bboxes_per_image_lt = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
+            1, total_num_anchors, 1
+        ) - self.center_radius * expanded_strides_per_image.unsqueeze(0)
+        gt_bboxes_per_image_rb = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
+            1, total_num_anchors, 1
+        ) + self.center_radius * expanded_strides_per_image.unsqueeze(0)
+        c_lt = xy_centers_per_image - gt_bboxes_per_image_lt
+        c_rb = gt_bboxes_per_image_rb - xy_centers_per_image
+        center_deltas = torch.cat([c_lt, c_rb], 2)
+        is_in_centers = center_deltas.min(dim=-1).values > 0.0
+        is_in_centers_all = is_in_centers.sum(dim=0) > 0
+        # in boxes and in centers
+        is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all
+        is_in_boxes_and_center = (
+            is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
+        )
+        return is_in_boxes_anchor, is_in_boxes_and_center
+    def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
+        matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)
+        ious_in_boxes_matrix = pair_wise_ious
+        n_candidate_k = min(10, ious_in_boxes_matrix.size(1))
+        topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1)
+        dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)
+        dynamic_ks = dynamic_ks.tolist()
+        for gt_idx in range(num_gt):
+            _, pos_idx = torch.topk(
+                cost[gt_idx], k=dynamic_ks[gt_idx], largest=False
+            )
+            matching_matrix[gt_idx][pos_idx] = 1
+        del topk_ious, dynamic_ks, pos_idx
+        anchor_matching_gt = matching_matrix.sum(0)
+        if (anchor_matching_gt > 1).sum() > 0:
+            _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
+            matching_matrix[:, anchor_matching_gt > 1] *= 0
+            matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1
+        fg_mask_inboxes = matching_matrix.sum(0) > 0
+        num_fg = fg_mask_inboxes.sum().item()
+        fg_mask[fg_mask.clone()] = fg_mask_inboxes
+        matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)
+        gt_matched_classes = gt_classes[matched_gt_inds]
+        pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[
+            fg_mask_inboxes
+        ]
+        return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds

yolov6/models/reppan.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import torch
+from torch import nn
+from yolov6.layers.common import RepBlock, SimConv, Transpose
+class RepPANNeck(nn.Module):
+    """RepPANNeck Module
+    EfficientRep is the default backbone of this model.
+    RepPANNeck has the balance of feature fusion ability and hardware efficiency.
+    """
+    def __init__(
+        self,
+        channels_list=None,
+        num_repeats=None
+    ):
+        super().__init__()
+        assert channels_list is not None
+        assert num_repeats is not None
+        self.Rep_p4 = RepBlock(
+            in_channels=channels_list[3] + channels_list[5],
+            out_channels=channels_list[5],
+            n=num_repeats[5],
+        )
+        self.Rep_p3 = RepBlock(
+            in_channels=channels_list[2] + channels_list[6],
+            out_channels=channels_list[6],
+            n=num_repeats[6]
+        )
+        self.Rep_n3 = RepBlock(
+            in_channels=channels_list[6] + channels_list[7],
+            out_channels=channels_list[8],
+            n=num_repeats[7],
+        )
+        self.Rep_n4 = RepBlock(
+            in_channels=channels_list[5] + channels_list[9],
+            out_channels=channels_list[10],
+            n=num_repeats[8]
+        )
+        self.reduce_layer0 = SimConv(
+            in_channels=channels_list[4],
+            out_channels=channels_list[5],
+            kernel_size=1,
+            stride=1
+        )
+        self.upsample0 = Transpose(
+            in_channels=channels_list[5],
+            out_channels=channels_list[5],
+        )
+        self.reduce_layer1 = SimConv(
+            in_channels=channels_list[5],
+            out_channels=channels_list[6],
+            kernel_size=1,
+            stride=1
+        )
+        self.upsample1 = Transpose(
+            in_channels=channels_list[6],
+            out_channels=channels_list[6]
+        )
+        self.downsample2 = SimConv(
+            in_channels=channels_list[6],
+            out_channels=channels_list[7],
+            kernel_size=3,
+            stride=2
+        )
+        self.downsample1 = SimConv(
+            in_channels=channels_list[8],
+            out_channels=channels_list[9],
+            kernel_size=3,
+            stride=2
+        )
+    def forward(self, input):
+        (x2, x1, x0) = input
+        fpn_out0 = self.reduce_layer0(x0)
+        upsample_feat0 = self.upsample0(fpn_out0)
+        f_concat_layer0 = torch.cat([upsample_feat0, x1], 1)
+        f_out0 = self.Rep_p4(f_concat_layer0)
+        fpn_out1 = self.reduce_layer1(f_out0)
+        upsample_feat1 = self.upsample1(fpn_out1)
+        f_concat_layer1 = torch.cat([upsample_feat1, x2], 1)
+        pan_out2 = self.Rep_p3(f_concat_layer1)
+        down_feat1 = self.downsample2(pan_out2)
+        p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
+        pan_out1 = self.Rep_n3(p_concat_layer1)
+        down_feat0 = self.downsample1(pan_out1)
+        p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
+        pan_out0 = self.Rep_n4(p_concat_layer2)
+        outputs = [pan_out2, pan_out1, pan_out0]
+        return outputs

yolov6/models/yolo.py ADDED Viewed

	@@ -0,0 +1,83 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import math
+import torch.nn as nn
+from yolov6.layers.common import *
+from yolov6.utils.torch_utils import initialize_weights
+from yolov6.models.efficientrep import EfficientRep
+from yolov6.models.reppan import RepPANNeck
+from yolov6.models.effidehead import Detect, build_effidehead_layer
+class Model(nn.Module):
+    '''YOLOv6 model with backbone, neck and head.
+    The default parts are EfficientRep Backbone, Rep-PAN and
+    Efficient Decoupled Head.
+    '''
+    def __init__(self, config, channels=3, num_classes=None, anchors=None):  # model, input channels, number of classes
+        super().__init__()
+        # Build network
+        num_layers = config.model.head.num_layers
+        self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)
+        # Init Detect head
+        begin_indices = config.model.head.begin_indices
+        out_indices_head = config.model.head.out_indices
+        self.stride = self.detect.stride
+        self.detect.i = begin_indices
+        self.detect.f = out_indices_head
+        self.detect.initialize_biases()
+        # Init weights
+        initialize_weights(self)
+    def forward(self, x):
+        x = self.backbone(x)
+        x = self.neck(x)
+        x = self.detect(x)
+        return x
+    def _apply(self, fn):
+        self = super()._apply(fn)
+        self.detect.stride = fn(self.detect.stride)
+        self.detect.grid = list(map(fn, self.detect.grid))
+        return self
+def make_divisible(x, divisor):
+    # Upward revision the value x to make it evenly divisible by the divisor.
+    return math.ceil(x / divisor) * divisor
+def build_network(config, channels, num_classes, anchors, num_layers):
+    depth_mul = config.model.depth_multiple
+    width_mul = config.model.width_multiple
+    num_repeat_backbone = config.model.backbone.num_repeats
+    channels_list_backbone = config.model.backbone.out_channels
+    num_repeat_neck = config.model.neck.num_repeats
+    channels_list_neck = config.model.neck.out_channels
+    num_anchors = config.model.head.anchors
+    num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)]
+    channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)]
+    backbone = EfficientRep(
+        in_channels=channels,
+        channels_list=channels_list,
+        num_repeats=num_repeat
+    )
+    neck = RepPANNeck(
+        channels_list=channels_list,
+        num_repeats=num_repeat
+    )
+    head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes)
+    head = Detect(num_classes, anchors, num_layers, head_layers=head_layers)
+    return backbone, neck, head
+def build_model(cfg, num_classes, device):
+    model = Model(cfg, channels=3, num_classes=num_classes, anchors=cfg.model.head.anchors).to(device)
+    return model

yolov6/solver/build.py ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+import math
+import torch
+import torch.nn as nn
+def build_optimizer(cfg, model):
+    """ Build optimizer from cfg file."""
+    g_bnw, g_w, g_b = [], [], []
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
+            g_b.append(v.bias)
+        if isinstance(v, nn.BatchNorm2d):
+            g_bnw.append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
+            g_w.append(v.weight)
+    assert cfg.solver.optim == 'SGD' or 'Adam', 'ERROR: unknown optimizer, use SGD defaulted'
+    if cfg.solver.optim == 'SGD':
+        optimizer = torch.optim.SGD(g_bnw, lr=cfg.solver.lr0, momentum=cfg.solver.momentum, nesterov=True)
+    elif cfg.solver.optim == 'Adam':
+        optimizer = torch.optim.Adam(g_bnw, lr=cfg.solver.lr0, betas=(cfg.solver.momentum, 0.999))
+    optimizer.add_param_group({'params': g_w, 'weight_decay': cfg.solver.weight_decay})
+    optimizer.add_param_group({'params': g_b})
+    del g_bnw, g_w, g_b
+    return optimizer
+def build_lr_scheduler(cfg, optimizer, epochs):
+    """Build learning rate scheduler from cfg file."""
+    if cfg.solver.lr_scheduler == 'Cosine':
+        lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg.solver.lrf - 1) + 1
+    else:
+        LOGGER.error('unknown lr scheduler, use Cosine defaulted')
+    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+    return scheduler, lf

yolov6/utils/Arial.ttf ADDED Viewed

Binary file (773 kB). View file

yolov6/utils/checkpoint.py ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+import shutil
+import torch
+import os.path as osp
+from yolov6.utils.events import LOGGER
+from yolov6.utils.torch_utils import fuse_model
+def load_state_dict(weights, model, map_location=None):
+    """Load weights from checkpoint file, only assign weights those layers' name and shape are match."""
+    ckpt = torch.load(weights, map_location=map_location)
+    state_dict = ckpt['model'].float().state_dict()
+    model_state_dict = model.state_dict()
+    state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict and v.shape == model_state_dict[k].shape}
+    model.load_state_dict(state_dict, strict=False)
+    del ckpt, state_dict, model_state_dict
+    return model
+def load_checkpoint(weights, map_location=None, inplace=True, fuse=True):
+    """Load model from checkpoint file."""
+    LOGGER.info("Loading checkpoint from {}".format(weights))
+    ckpt = torch.load(weights, map_location=map_location)  # load
+    model = ckpt['ema' if ckpt.get('ema') else 'model'].float()
+    if fuse:
+        LOGGER.info("\nFusing model...")
+        model = fuse_model(model).eval()
+    else:
+        model = model.eval()
+    return model
+def save_checkpoint(ckpt, is_best, save_dir, model_name=""):
+    """ Save checkpoint to the disk."""
+    if not osp.exists(save_dir):
+        os.makedirs(save_dir)
+    filename = osp.join(save_dir, model_name + '.pt')
+    torch.save(ckpt, filename)
+    if is_best:
+        best_filename = osp.join(save_dir, 'best_ckpt.pt')
+        shutil.copyfile(filename, best_filename)
+def strip_optimizer(ckpt_dir, epoch):
+    for s in ['best', 'last']:
+        ckpt_path = osp.join(ckpt_dir, '{}_ckpt.pt'.format(s))
+        if not osp.exists(ckpt_path):
+            continue
+        ckpt = torch.load(ckpt_path, map_location=torch.device('cpu'))
+        if ckpt.get('ema'):
+            ckpt['model'] = ckpt['ema']  # replace model with ema
+        for k in ['optimizer', 'ema', 'updates']:  # keys
+            ckpt[k] = None
+        ckpt['epoch'] = epoch
+        ckpt['model'].half()  # to FP16
+        for p in ckpt['model'].parameters():
+            p.requires_grad = False
+        torch.save(ckpt, ckpt_path)

yolov6/utils/config.py ADDED Viewed

	@@ -0,0 +1,101 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# The code is based on
+# https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
+# Copyright (c) OpenMMLab.
+import os.path as osp
+import shutil
+import sys
+import tempfile
+from importlib import import_module
+from addict import Dict
+class ConfigDict(Dict):
+    def __missing__(self, name):
+        raise KeyError(name)
+    def __getattr__(self, name):
+        try:
+            value = super(ConfigDict, self).__getattr__(name)
+        except KeyError:
+            ex = AttributeError("'{}' object has no attribute '{}'".format(
+                self.__class__.__name__, name))
+        except Exception as e:
+            ex = e
+        else:
+            return value
+        raise ex
+class Config(object):
+    @staticmethod
+    def _file2dict(filename):
+        filename = str(filename)
+        if filename.endswith('.py'):
+            with tempfile.TemporaryDirectory() as temp_config_dir:
+                shutil.copyfile(filename,
+                                osp.join(temp_config_dir, '_tempconfig.py'))
+                sys.path.insert(0, temp_config_dir)
+                mod = import_module('_tempconfig')
+                sys.path.pop(0)
+                cfg_dict = {
+                    name: value
+                    for name, value in mod.__dict__.items()
+                    if not name.startswith('__')
+                }
+                # delete imported module
+                del sys.modules['_tempconfig']
+        else:
+            raise IOError('Only .py type are supported now!')
+        cfg_text = filename + '\n'
+        with open(filename, 'r') as f:
+            cfg_text += f.read()
+        return cfg_dict, cfg_text
+    @staticmethod
+    def fromfile(filename):
+        cfg_dict, cfg_text = Config._file2dict(filename)
+        return Config(cfg_dict, cfg_text=cfg_text, filename=filename)
+    def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
+        if cfg_dict is None:
+            cfg_dict = dict()
+        elif not isinstance(cfg_dict, dict):
+            raise TypeError('cfg_dict must be a dict, but got {}'.format(
+                type(cfg_dict)))
+        super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
+        super(Config, self).__setattr__('_filename', filename)
+        if cfg_text:
+            text = cfg_text
+        elif filename:
+            with open(filename, 'r') as f:
+                text = f.read()
+        else:
+            text = ''
+        super(Config, self).__setattr__('_text', text)
+    @property
+    def filename(self):
+        return self._filename
+    @property
+    def text(self):
+        return self._text
+    def __repr__(self):
+        return 'Config (path: {}): {}'.format(self.filename,
+                                              self._cfg_dict.__repr__())
+    def __getattr__(self, name):
+        return getattr(self._cfg_dict, name)
+    def __setattr__(self, name, value):
+        if isinstance(value, dict):
+            value = ConfigDict(value)
+        self._cfg_dict.__setattr__(name, value)

yolov6/utils/ema.py ADDED Viewed

	@@ -0,0 +1,59 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# The code is based on
+# https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py
+import math
+from copy import deepcopy
+import torch
+import torch.nn as nn
+class ModelEMA:
+    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    This is intended to allow functionality like
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    A smoothed version of the weights is necessary for some training schemes to perform well.
+    This class is sensitive where it is initialized in the sequence of model init,
+    GPU assignment and distributed training wrappers.
+    """
+    def __init__(self, model, decay=0.9999, updates=0):
+        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
+        self.updates = updates
+        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
+        for param in self.ema.parameters():
+            param.requires_grad_(False)
+    def update(self, model):
+        with torch.no_grad():
+            self.updates += 1
+            decay = self.decay(self.updates)
+            state_dict = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
+            for k, item in self.ema.state_dict().items():
+                if item.dtype.is_floating_point:
+                    item *= decay
+                    item += (1 - decay) * state_dict[k].detach()
+    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
+        copy_attr(self.ema, model, include, exclude)
+def copy_attr(a, b, include=(), exclude=()):
+    """Copy attributes from one instance and set them to another instance."""
+    for k, item in b.__dict__.items():
+        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
+            continue
+        else:
+            setattr(a, k, item)
+def is_parallel(model):
+    # Return True if model's type is DP or DDP, else False.
+    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+def de_parallel(model):
+    # De-parallelize a model. Return single-GPU model if model's type is DP or DDP.
+    return model.module if is_parallel(model) else model

yolov6/utils/envs.py ADDED Viewed

	@@ -0,0 +1,54 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import random
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+from yolov6.utils.events import LOGGER
+def get_envs():
+    """Get PyTorch needed environments from system envirionments."""
+    local_rank = int(os.getenv('LOCAL_RANK', -1))
+    rank = int(os.getenv('RANK', -1))
+    world_size = int(os.getenv('WORLD_SIZE', 1))
+    return local_rank, rank, world_size
+def select_device(device):
+    """Set devices' information to the program.
+    Args:
+        device: a string, like 'cpu' or '1,2,3,4'
+    Returns:
+        torch.device
+    """
+    if device == 'cpu':
+        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+        LOGGER.info('Using CPU for training... ')
+    elif device:
+        os.environ['CUDA_VISIBLE_DEVICES'] = device
+        assert torch.cuda.is_available()
+        nd = len(device.strip().split(','))
+        LOGGER.info(f'Using {nd} GPU for training... ')
+    cuda = device != 'cpu' and torch.cuda.is_available()
+    device = torch.device('cuda:0' if cuda else 'cpu')
+    return device
+def set_random_seed(seed, deterministic=False):
+    """ Set random state to random libray, numpy, torch and cudnn.
+    Args:
+        seed: int value.
+        deterministic: bool value.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if deterministic:
+        cudnn.deterministic = True
+        cudnn.benchmark = False
+    else:
+        cudnn.deterministic = False
+        cudnn.benchmark = True

yolov6/utils/events.py ADDED Viewed

	@@ -0,0 +1,41 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import yaml
+import logging
+import shutil
+def set_logging(name=None):
+    rank = int(os.getenv('RANK', -1))
+    logging.basicConfig(format="%(message)s", level=logging.INFO if (rank in (-1, 0)) else logging.WARNING)
+    return logging.getLogger(name)
+LOGGER = set_logging(__name__)
+NCOLS = shutil.get_terminal_size().columns
+def load_yaml(file_path):
+    """Load data from yaml file."""
+    if isinstance(file_path, str):
+        with open(file_path, errors='ignore') as f:
+            data_dict = yaml.safe_load(f)
+    return data_dict
+def save_yaml(data_dict, save_path):
+    """Save data to yaml file"""
+    with open(save_path, 'w') as f:
+        yaml.safe_dump(data_dict, f, sort_keys=False)
+def write_tblog(tblogger, epoch, results, losses):
+    """Display mAP and loss information to log."""
+    tblogger.add_scalar("val/mAP@0.5", results[0], epoch + 1)
+    tblogger.add_scalar("val/mAP@0.50:0.95", results[1], epoch + 1)
+    tblogger.add_scalar("train/iou_loss", losses[0], epoch + 1)
+    tblogger.add_scalar("train/l1_loss", losses[1], epoch + 1)
+    tblogger.add_scalar("train/obj_loss", losses[2], epoch + 1)
+    tblogger.add_scalar("train/cls_loss", losses[3], epoch + 1)

yolov6/utils/figure_iou.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import math
+import torch
+class IOUloss:
+    """ Calculate IoU loss.
+    """
+    def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-7):
+        """ Setting of the class.
+        Args:
+            box_format: (string), must be one of 'xywh' or 'xyxy'.
+            iou_type: (string), can be one of 'ciou', 'diou', 'giou' or 'siou'
+            reduction: (string), specifies the reduction to apply to the output, must be one of 'none', 'mean','sum'.
+            eps: (float), a value to avoid divide by zero error.
+        """
+        self.box_format = box_format
+        self.iou_type = iou_type.lower()
+        self.reduction = reduction
+        self.eps = eps
+    def __call__(self, box1, box2):
+        """ calculate iou. box1 and box2 are torch tensor with shape [M, 4] and [Nm 4].
+        """
+        box2 = box2.T
+        if self.box_format == 'xyxy':
+            b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+            b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+        elif self.box_format == 'xywh':
+            b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+            b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+            b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+            b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+        # Intersection area
+        inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+                (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+        # Union Area
+        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + self.eps
+        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + self.eps
+        union = w1 * h1 + w2 * h2 - inter + self.eps
+        iou = inter / union
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex width
+        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
+        if self.iou_type == 'giou':
+            c_area = cw * ch + self.eps  # convex area
+            iou = iou - (c_area - union) / c_area
+        elif self.iou_type in ['diou', 'ciou']:
+            c2 = cw ** 2 + ch ** 2 + self.eps  # convex diagonal squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
+                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
+            if self.iou_type == 'diou':
+                iou = iou - rho2 / c2
+            elif self.iou_type == 'ciou':
+                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + self.eps))
+                iou = iou - (rho2 / c2 + v * alpha)
+        elif self.iou_type == 'siou':
+            # SIoU Loss https://arxiv.org/pdf/2205.12740.pdf
+            s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
+            s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
+            sigma = torch.pow(s_cw ** 2 + s_ch ** 2, 0.5)
+            sin_alpha_1 = torch.abs(s_cw) / sigma
+            sin_alpha_2 = torch.abs(s_ch) / sigma
+            threshold = pow(2, 0.5) / 2
+            sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
+            angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2)
+            rho_x = (s_cw / cw) ** 2
+            rho_y = (s_ch / ch) ** 2
+            gamma = angle_cost - 2
+            distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp(gamma * rho_y)
+            omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
+            omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)
+            shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow(1 - torch.exp(-1 * omiga_h), 4)
+            iou = iou - 0.5 * (distance_cost + shape_cost)
+        loss = 1.0 - iou
+        if self.reduction == 'sum':
+            loss = loss.sum()
+        elif self.reduction == 'mean':
+            loss = loss.mean()
+        return loss
+def pairwise_bbox_iou(box1, box2, box_format='xywh'):
+    """Calculate iou.
+    This code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/boxes.py
+    """
+    if box_format == 'xyxy':
+        lt = torch.max(box1[:, None, :2], box2[:, :2])
+        rb = torch.min(box1[:, None, 2:], box2[:, 2:])
+        area_1 = torch.prod(box1[:, 2:] - box1[:, :2], 1)
+        area_2 = torch.prod(box2[:, 2:] - box2[:, :2], 1)
+    elif box_format == 'xywh':
+        lt = torch.max(
+            (box1[:, None, :2] - box1[:, None, 2:] / 2),
+            (box2[:, :2] - box2[:, 2:] / 2),
+        )
+        rb = torch.min(
+            (box1[:, None, :2] + box1[:, None, 2:] / 2),
+            (box2[:, :2] + box2[:, 2:] / 2),
+        )
+        area_1 = torch.prod(box1[:, 2:], 1)
+        area_2 = torch.prod(box2[:, 2:], 1)
+    valid = (lt < rb).type(lt.type()).prod(dim=2)
+    inter = torch.prod(rb - lt, 2) * valid
+    return inter / (area_1[:, None] + area_2 - inter)

yolov6/utils/general.py ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+import glob
+from pathlib import Path
+def increment_name(path):
+    "increase save directory's id"
+    path = Path(path)
+    sep = ''
+    if path.exists():
+        path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
+        for n in range(1, 9999):
+            p = f'{path}{sep}{n}{suffix}'
+            if not os.path.exists(p):
+                break
+        path = Path(p)
+    return path
+def find_latest_checkpoint(search_dir='.'):
+    # Find the most recent saved checkpoint in search_dir
+    checkpoint_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
+    return max(checkpoint_list, key=os.path.getctime) if checkpoint_list else ''

yolov6/utils/nms.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# The code is based on
+# https://github.com/ultralytics/yolov5/blob/master/utils/general.py
+import os
+import time
+import numpy as np
+import cv2
+import torch
+import torchvision
+# Settings
+torch.set_printoptions(linewidth=320, precision=5, profile='long')
+np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
+cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
+os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
+def xywh2xyxy(x):
+    # Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+    """Runs Non-Maximum Suppression (NMS) on inference results.
+    This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+    Args:
+        prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+        conf_thres: (float) confidence threshold.
+        iou_thres: (float) iou threshold.
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+        agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+        multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+        max_det:(int), max number of output bboxes.
+    Returns:
+         list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+    """
+    num_classes = prediction.shape[2] - 5  # number of classes
+    pred_candidates = prediction[..., 4] > conf_thres  # candidates
+    # Check the parameters.
+    assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+    assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+    # Function settings.
+    max_wh = 4096  # maximum box width and height
+    max_nms = 30000  # maximum number of boxes put into torchvision.ops.nms()
+    time_limit = 10.0  # quit the function when nms cost time exceed the limit time.
+    multi_label &= num_classes > 1  # multiple labels per box
+    tik = time.time()
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for img_idx, x in enumerate(prediction):  # image index, image inference
+        x = x[pred_candidates[img_idx]]  # confidence
+        # If no box remains, skip the next process.
+        if not x.shape[0]:
+            continue
+        # confidence multiply the objectness
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix's shape is  (n,6), each row represents (xyxy, conf, cls)
+        if multi_label:
+            box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
+        else:  # Only keep the class with highest scores.
+            conf, class_idx = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class, only keep boxes whose category is in classes.
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Check shape
+        num_box = x.shape[0]  # number of boxes
+        if not num_box:  # no boxes kept.
+            continue
+        elif num_box > max_nms:  # excess max boxes' number.
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        class_offset = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + class_offset, x[:, 4]  # boxes (offset by class), scores
+        keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if keep_box_idx.shape[0] > max_det:  # limit detections
+            keep_box_idx = keep_box_idx[:max_det]
+        output[img_idx] = x[keep_box_idx]
+        if (time.time() - tik) > time_limit:
+            print(f'WARNING: NMS cost time exceed the limited {time_limit}s.')
+            break  # time limit exceeded
+    return output

yolov6/utils/torch_utils.py ADDED Viewed

	@@ -0,0 +1,110 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import time
+from contextlib import contextmanager
+from copy import deepcopy
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from yolov6.utils.events import LOGGER
+try:
+    import thop  # for FLOPs computation
+except ImportError:
+    thop = None
+@contextmanager
+def torch_distributed_zero_first(local_rank: int):
+    """
+    Decorator to make all processes in distributed training wait for each local_master to do something.
+    """
+    if local_rank not in [-1, 0]:
+        dist.barrier(device_ids=[local_rank])
+    yield
+    if local_rank == 0:
+        dist.barrier(device_ids=[0])
+def time_sync():
+    # Waits for all kernels in all streams on a CUDA device to complete if cuda is available.
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+    return time.time()
+def initialize_weights(model):
+    for m in model.modules():
+        t = type(m)
+        if t is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif t is nn.BatchNorm2d:
+            m.eps = 1e-3
+            m.momentum = 0.03
+        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            m.inplace = True
+def fuse_conv_and_bn(conv, bn):
+    # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    fusedconv = (
+        nn.Conv2d(
+            conv.in_channels,
+            conv.out_channels,
+            kernel_size=conv.kernel_size,
+            stride=conv.stride,
+            padding=conv.padding,
+            groups=conv.groups,
+            bias=True,
+        )
+        .requires_grad_(False)
+        .to(conv.weight.device)
+    )
+    # prepare filters
+    w_conv = conv.weight.clone().view(conv.out_channels, -1)
+    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
+    # prepare spatial bias
+    b_conv = (
+        torch.zeros(conv.weight.size(0), device=conv.weight.device)
+        if conv.bias is None
+        else conv.bias
+    )
+    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
+        torch.sqrt(bn.running_var + bn.eps)
+    )
+    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+    return fusedconv
+def fuse_model(model):
+    from yolov6.layers.common import Conv
+    for m in model.modules():
+        if type(m) is Conv and hasattr(m, "bn"):
+            m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+            delattr(m, "bn")  # remove batchnorm
+            m.forward = m.forward_fuse  # update forward
+    return model
+def get_model_info(model, img_size=640):
+    """Get model Params and GFlops.
+    Code base on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/model_utils.py
+    """
+    from thop import profile
+    stride = 32
+    img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
+    flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
+    params /= 1e6
+    flops /= 1e9
+    img_size = img_size if isinstance(img_size, list) else [img_size, img_size]
+    flops *= img_size[0] * img_size[1] / stride / stride * 2  # Gflops
+    info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
+    return info