diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..ea21823b5f2255d106fdf1df31e1fc7168b49fe2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +.env/ +results/ +**__pycache__** +*.onnx +*.pt +**byte_track_results** +**deep_sort_results** +**nor_fair_results** +test_env/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6f444d282974a184e130cd5c8ced143a37e47e60 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +FROM pytorch/pytorch:latest + +# Set Time Zone to prevent issues for installing some apt packages +ENV TZ=Europe/Minsk +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# install apt packages +RUN apt-get update -y +RUN apt-get install git gcc \ + g++ python3-opencv \ + vim -y + +RUN mkdir /app +WORKDIR /app + +ADD asone asone + +ADD sample_videos sample_videos +ADD main.py main.py +# ADD demo.py demo.py + +ADD setup.py setup.py +ADD requirements.txt requirements.txt + + +RUN pip3 install Cython numpy +RUN pip3 install cython-bbox +ADD pypi_README.md pypi_README.md + +RUN pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113 +RUN pip3 install . + + +WORKDIR /workspace +# Entry Point +CMD /bin/bash diff --git a/app.py b/app.py index 647e0592d4bb31ce1339b3596639236fb9df3439..5129f761567fcbd4cf49df9386d6c3fb04eb4a8e 100755 --- a/app.py +++ b/app.py @@ -1,3 +1,4 @@ +import os import torch import gradio as gr import cv2 @@ -11,6 +12,8 @@ from utils.plots import plot_one_box from utils.torch_utils import time_synchronized import time from ultralytics import YOLO +import asone +from asone import ASOne def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints @@ -173,7 +176,31 @@ def inference2(video,model_link,iou_threshold,confidence_threshold): finalVideo.release() return 'output.mp4',np.mean(fps_video) - +def inference3(video,model_link,iou_threshold,confidence_threshold): + model_path = 'weights/'+str(model_link)+'.pt' + device = torch.cuda.is_available() + dt_obj = ASOne( + tracker=asone.BYTETRACK, + detector=asone.YOLOV8M_PYTORCH, + weights=model_path, + use_cuda=device + ) + track_fn = dt_obj.track_video(video, + conf_thres=confidence_threshold, + iou_thres=iou_threshold, + display=False, + draw_trails=None, + filter_classes=None, + class_names=None) # class_names=['License Plate'] for custom weights + fps_a=[] + for bbox_details, frame_details in track_fn: + #bbox_xyxy, ids, scores, class_ids = bbox_details + frame, frame_num, fps = frame_details + fps_a.append(fps) + print(frame_num) + + file_name=os.path.basename(video) + return f'data/results/{file_name}', np.mean(fps_a) examples_images = ['data/images/1.jpg', 'data/images/2.jpg', @@ -206,11 +233,19 @@ with gr.Blocks() as demo: video_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45) video_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25) gr.Examples(examples=examples_videos,inputs=video_input,outputs=video_output) - video_button = gr.Button("Detect") - - # with gr.Tab("Webcam Video"): - # gr.Markdown("## YOLOv7 Inference on Webcam Video") - # gr.Markdown("Coming Soon") + video_button = gr.Button("Detect") + with gr.Tab("Tracking"): + gr.Markdown("## Multi object tracking") + + with gr.Row(): + track_input = gr.Video(type='pil', label="Input Video", source="upload") + track_output = gr.Video(type="pil", label="Output Video",format="mp4") + track_fps_video = gr.Number(0,label='FPS') + track_drop = gr.Dropdown(choices=models,value=models[0]) + track_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45) + track_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25) + gr.Examples(examples=examples_videos,inputs=track_input,outputs=track_output) + track_button = gr.Button("Detect") text_button.click(inference, inputs=[image_input,image_drop, image_iou_threshold,image_conf_threshold], @@ -218,5 +253,8 @@ with gr.Blocks() as demo: video_button.click(inference2, inputs=[video_input,video_drop, video_iou_threshold,video_conf_threshold], outputs=[video_output,fps_video]) + track_button.click(inference3, inputs=[track_input,track_drop, + track_iou_threshold,track_conf_threshold], + outputs=[track_output,track_fps_video]) demo.launch(debug=True,enable_queue=True) \ No newline at end of file diff --git a/asone/__init__.py b/asone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c857bb9b9362ad23bb42e6a5dba815af9a3d38d --- /dev/null +++ b/asone/__init__.py @@ -0,0 +1,114 @@ +from .asone import ASOne +import asone.detectors +import asone.trackers + + +BYTETRACK = 0 +DEEPSORT = 1 +NORFAIR = 2 + + +YOLOV5X6_PYTORCH = 0 +YOLOV5S_PYTORCH = 2 +YOLOV5N_PYTORCH = 4 +YOLOV5M_PYTORCH = 6 +YOLOV5L_PYTORCH = 8 +YOLOV5X_PYTORCH = 10 +YOLOV5N6_PYTORCH = 12 +YOLOV5S6_PYTORCH = 14 +YOLOV5M6_PYTORCH = 16 +YOLOV5L6_PYTORCH = 18 + + +YOLOV6N_PYTORCH = 20 +YOLOV6T_PYTORCH = 22 +YOLOV6S_PYTORCH = 24 +YOLOV6M_PYTORCH = 26 +YOLOV6L_PYTORCH = 28 +YOLOV6L_RELU_PYTORCH = 30 +YOLOV6S_REPOPT_PYTORCH = 32 + +YOLOV7_TINY_PYTORCH = 34 +YOLOV7_PYTORCH = 36 +YOLOV7_X_PYTORCH = 38 +YOLOV7_W6_PYTORCH = 40 +YOLOV7_E6_PYTORCH = 42 +YOLOV7_D6_PYTORCH = 44 +YOLOV7_E6E_PYTORCH = 46 + +YOLOR_CSP_X_PYTORCH = 48 +YOLOR_CSP_X_STAR_PYTORCH = 50 +YOLOR_CSP_STAR_PYTORCH = 52 +YOLOR_CSP_PYTORCH = 54 +YOLOR_P6_PYTORCH = 56 + + + + +YOLOX_L_PYTORCH = 58 +YOLOX_NANO_PYTORCH = 60 +YOLOX_TINY_PYTORCH = 62 +YOLOX_DARKNET_PYTORCH = 64 +YOLOX_S_PYTORCH = 66 +YOLOX_M_PYTORCH = 68 +YOLOX_X_PYTORCH = 70 + +#ONNX + +YOLOV5X6_ONNX = 1 +YOLOV5S_ONNX = 3 +YOLOV5N_ONNX = 5 +YOLOV5M_ONNX = 7 +YOLOV5L_ONNX = 9 +YOLOV5X_ONNX = 11 +YOLOV5N6_ONNX = 13 +YOLOV5S6_ONNX = 15 +YOLOV5M6_ONNX = 17 +YOLOV5L6_ONNX = 19 + + +YOLOV6N_ONNX = 21 +YOLOV6T_ONNX = 23 +YOLOV6S_ONNX = 25 +YOLOV6M_ONNX = 27 +YOLOV6L_ONNX = 29 +YOLOV6L_RELU_ONNX = 31 +YOLOV6S_REPOPT_ONNX = 33 + +YOLOV7_TINY_ONNX = 35 +YOLOV7_ONNX = 37 +YOLOV7_X_ONNX = 39 +YOLOV7_W6_ONNX = 41 +YOLOV7_E6_ONNX = 43 +YOLOV7_D6_ONNX = 45 +YOLOV7_E6E_ONNX = 47 + +YOLOR_CSP_X_ONNX = 49 +YOLOR_CSP_X_STAR_ONNX = 51 +YOLOR_CSP_STAR_ONNX = 53 +YOLOR_CSP_ONNX = 55 +YOLOR_P6_ONNX = 57 + + +YOLOX_L_ONNX = 59 +YOLOX_NANO_ONNX = 61 +YOLOX_TINY_ONNX = 63 +YOLOX_DARKNET_ONNX = 65 +YOLOX_S_ONNX = 67 +YOLOX_M_ONNX = 69 +YOLOX_X_ONNX = 71 + +# YOLOv8 +YOLOV8N_PYTORCH = 72 +YOLOV8N_ONNX = 73 +YOLOV8S_PYTORCH = 74 +YOLOV8S_ONNX = 75 +YOLOV8M_PYTORCH = 76 +YOLOV8M_ONNX = 77 +YOLOV8L_PYTORCH = 78 +YOLOV8L_ONNX = 79 +YOLOV8X_PYTORCH = 80 +YOLOV8X_ONNX = 81 + + +__all__ = ['ASOne', 'detectors', 'trackers'] diff --git a/asone/asone.py b/asone/asone.py new file mode 100644 index 0000000000000000000000000000000000000000..978c7840e4f90e33de9e448903c738013efae09e --- /dev/null +++ b/asone/asone.py @@ -0,0 +1,195 @@ +import copy +import cv2 +from loguru import logger +import os +import time +import asone.utils as utils +from asone.trackers import Tracker +from asone.detectors import Detector +from asone.utils.default_cfg import config +import numpy as np + +class ASOne: + def __init__(self, + detector: int = 0, + tracker: int = -1, + weights: str = None, + use_cuda: bool = True) -> None: + + self.use_cuda = use_cuda + + # get detector object + self.detector = self.get_detector(detector, weights) + + if tracker == -1: + self.tracker = None + return + + self.tracker = self.get_tracker(tracker) + + + def get_detector(self, detector: int, weights: str): + detector = Detector(detector, weights=weights, + use_cuda=self.use_cuda).get_detector() + return detector + + def get_tracker(self, tracker: int): + + tracker = Tracker(tracker, self.detector, + use_cuda=self.use_cuda) + return tracker + + def _update_args(self, kwargs): + for key, value in kwargs.items(): + if key in config.keys(): + config[key] = value + else: + print(f'"{key}" argument not found! valid args: {list(config.keys())}') + exit() + return config + + def track_stream(self, + stream_url, + **kwargs + ): + + output_filename = 'result.mp4' + kwargs['filename'] = output_filename + config = self._update_args(kwargs) + + for (bbox_details, frame_details) in self._start_tracking(stream_url, config): + # yeild bbox_details, frame_details to main script + yield bbox_details, frame_details + + + def track_video(self, + video_path, + **kwargs + ): + output_filename = os.path.basename(video_path) + kwargs['filename'] = output_filename + config = self._update_args(kwargs) + + for (bbox_details, frame_details) in self._start_tracking(video_path, config): + # yeild bbox_details, frame_details to main script + yield bbox_details, frame_details + + def detect(self, source, **kwargs)->np.ndarray: + """ Function to perform detection on an img + + Args: + source (_type_): if str read the image. if nd.array pass it directly to detect + + Returns: + _type_: ndarray of detection + """ + if isinstance(source, str): + source = cv2.imread(source) + return self.detector.detect(source, **kwargs) + + def track_webcam(self, + cam_id=0, + **kwargs): + output_filename = 'results.mp4' + + kwargs['filename'] = output_filename + kwargs['fps'] = 29 + config = self._update_args(kwargs) + + + for (bbox_details, frame_details) in self._start_tracking(cam_id, config): + # yeild bbox_details, frame_details to main script + yield bbox_details, frame_details + + def _start_tracking(self, + stream_path: str, + config: dict) -> tuple: + if not self.tracker: + print(f'No tracker is selected. use detect() function perform detcetion or pass a tracker.') + exit() + + fps = config.pop('fps') + output_dir = config.pop('output_dir') + filename = config.pop('filename') + save_result = config.pop('save_result') + display = config.pop('display') + draw_trails = config.pop('draw_trails') + class_names = config.pop('class_names') + + cap = cv2.VideoCapture(stream_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) + + if fps is None: + fps = cap.get(cv2.CAP_PROP_FPS) + + if save_result: + os.makedirs(output_dir, exist_ok=True) + save_path = os.path.join(output_dir, filename) + logger.info(f"video save path is {save_path}") + + video_writer = cv2.VideoWriter( + save_path, + cv2.VideoWriter_fourcc(*"mp4v"), + fps, + (int(width), int(height)), + ) + + frame_id = 1 + tic = time.time() + + prevTime = 0 + + while True: + start_time = time.time() + + ret, frame = cap.read() + if not ret: + break + im0 = copy.deepcopy(frame) + + bboxes_xyxy, ids, scores, class_ids = self.tracker.detect_and_track( + frame, config) + elapsed_time = time.time() - start_time + + logger.info( + 'frame {}/{} ({:.2f} ms)'.format(frame_id, int(frame_count), + elapsed_time * 1000)) + + im0 = utils.draw_boxes(im0, + bboxes_xyxy, + class_ids, + identities=ids, + draw_trails=draw_trails, + class_names=class_names) + + currTime = time.time() + fps = 1 / (currTime - prevTime) + prevTime = currTime + cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30) + cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [ + 225, 255, 255], thickness=2, lineType=cv2.LINE_AA) + + if display: + cv2.imshow(' Sample', im0) + if save_result: + video_writer.write(im0) + + frame_id += 1 + + if cv2.waitKey(25) & 0xFF == ord('q'): + break + + # yeild required values in form of (bbox_details, frames_details) + yield (bboxes_xyxy, ids, scores, class_ids), (im0 if display else frame, frame_id-1, fps) + + tac = time.time() + print(f'Total Time Taken: {tac - tic:.2f}') + +if __name__ == '__main__': + # asone = ASOne(tracker='norfair') + asone = ASOne() + + asone.start_tracking('data/sample_videos/video2.mp4', + save_result=True, display=False) diff --git a/asone/demo_detector.py b/asone/demo_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..15dc6c8a8d347db0c60d2197d2766280d6ef9f19 --- /dev/null +++ b/asone/demo_detector.py @@ -0,0 +1,85 @@ +import asone +from asone import ASOne +from .utils import draw_boxes +import cv2 +import argparse +import time +import os + +def main(args): + filter_classes = args.filter_classes + video_path = args.video + + os.makedirs(args.output_path, exist_ok=True) + + if filter_classes: + filter_classes = filter_classes.split(',') + + + detector = ASOne(asone.YOLOV7_PYTORCH, weights=args.weights, use_cuda=args.use_cuda) + + cap = cv2.VideoCapture(video_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + FPS = cap.get(cv2.CAP_PROP_FPS) + + if args.save: + video_writer = cv2.VideoWriter( + os.path.basename(video_path), + cv2.VideoWriter_fourcc(*"mp4v"), + FPS, + (int(width), int(height)), + ) + + frame_no = 1 + tic = time.time() + + prevTime = 0 + + while True: + start_time = time.time() + + ret, img = cap.read() + if not ret: + break + frame = img.copy() + + dets, img_info = detector.detect(img, conf_thres=0.25, iou_thres=0.45) + currTime = time.time() + fps = 1 / (currTime - prevTime) + prevTime = currTime + + if dets is not None: + bbox_xyxy = dets[:, :4] + scores = dets[:, 4] + class_ids = dets[:, 5] + img = draw_boxes(img, bbox_xyxy, class_ids=class_ids) + + cv2.line(img, (20, 25), (127, 25), [85, 45, 255], 30) + cv2.putText(img, f'FPS: {int(fps)}', (11, 35), 0, 1, [ + 225, 255, 255], thickness=2, lineType=cv2.LINE_AA) + + + frame_no+=1 + if args.display: + cv2.imshow('Window', img) + + if args.save: + video_writer.write(img) + + if cv2.waitKey(25) & 0xFF == ord('q'): + break + +if __name__=='__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("video", help="Path of video") + parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', help='If provided the model will run on cpu otherwise it will run on gpu') + parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ') + parser.add_argument('-w', '--weights', default=None, help='Path of trained weights') + parser.add_argument('-o', '--output_path', default='data/results', help='path of output file') + parser.add_argument('--no_display', action='store_false', default=True, dest='display', help='if provided video will not be displayed') + parser.add_argument('--no_save', action='store_false', default=True, dest='save', help='if provided video will not be saved') + + args = parser.parse_args() + main(args) diff --git a/asone/demo_tracker.py b/asone/demo_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..c497f4be8e4b7627c294c06430d6c1895ca81cf8 --- /dev/null +++ b/asone/demo_tracker.py @@ -0,0 +1,101 @@ +import argparse +from .trackers import Tracker +import argparse +import asone +from .utils import draw_boxes +from .detectors import Detector +import cv2 +import os +from loguru import logger +import time +import copy + +def main(args): + filter_classes = args.filter_classes + + if filter_classes: + filter_classes = filter_classes.split(',') + + detector = Detector(asone.YOLOV7_E6_ONNX, weights=args.weights, use_cuda=args.use_cuda).get_detector() + tracker = Tracker(asone.BYTETRACK, detector, use_cuda=args.use_cuda).get_tracker() + + cap = cv2.VideoCapture(args.video_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) + output_dir = 'data/results' + if args.save_results: + os.makedirs(output_dir, exist_ok=True) + save_path = os.path.join(output_dir, os.path.basename(args.video_path)) + logger.info(f"video save path is {save_path}") + + video_writer = cv2.VideoWriter( + save_path, + cv2.VideoWriter_fourcc(*"mp4v"), + fps, + (int(width), int(height)), + ) + + frame_id = 1 + tic = time.time() + + prevTime = 0 + + while True: + start_time = time.time() + + ret, frame = cap.read() + if not ret: + break + im0 = copy.deepcopy(frame) + + bboxes_xyxy, ids, scores, class_ids = tracker.detect_and_track( + frame, filter_classes=filter_classes) + + elapsed_time = time.time() - start_time + + logger.info( + f'frame {frame_id}/{int(frame_count)} {elapsed_time * 1000:.2f} ms') + + im0 = draw_boxes(im0, bboxes_xyxy, class_ids, identities=ids) + + currTime = time.time() + fps = 1 / (currTime - prevTime) + prevTime = currTime + cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30) + cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [ + 225, 255, 255], thickness=2, lineType=cv2.LINE_AA) + + if args.display: + cv2.imshow(' Sample', im0) + if args.save_results: + video_writer.write(im0) + + frame_id += 1 + + if cv2.waitKey(25) & 0xFF == ord('q'): + break + + tac = time.time() + print(f'Total Time Taken: {tac - tic:.2f}') + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('video_path', help='Path to input video') + parser.add_argument('--cpu', default=True, + action='store_false', dest='use_cuda', help='run on cpu') + parser.add_argument('--no_display', default=True, + action='store_false', dest='display', help='Disable display') + parser.add_argument('--no_save', default=True, + action='store_false', dest='save_results', help='Disable result saving') + + parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ') + parser.add_argument('-w', '--weights', default=None, help='Path of trained weights') + + args = parser.parse_args() + + main(args) diff --git a/asone/detectors/__init__.py b/asone/detectors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5a8380eace249ee6811ec5c5bab448f6b586ad --- /dev/null +++ b/asone/detectors/__init__.py @@ -0,0 +1,13 @@ +from asone.detectors.yolov5 import YOLOv5Detector +from asone.detectors.yolov6 import YOLOv6Detector +from asone.detectors.yolov7 import YOLOv7Detector +from asone.detectors.yolor import YOLOrDetector +from asone.detectors.yolox import YOLOxDetector + +from asone.detectors.detector import Detector +__all__ = ['Detector' + 'YOLOv5Detector', + 'YOLOv6Detector', + 'YOLOv7Detector', + 'YOLOrDetector', + 'YOLOxDetector'] diff --git a/asone/detectors/detector.py b/asone/detectors/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..eff96921b628d0fc2394184b80af251f74463f17 --- /dev/null +++ b/asone/detectors/detector.py @@ -0,0 +1,92 @@ +import cv2 + +from asone.detectors.yolov5 import YOLOv5Detector +from asone.detectors.yolov6 import YOLOv6Detector +from asone.detectors.yolov7 import YOLOv7Detector +from asone.detectors.yolor import YOLOrDetector +from asone.detectors.yolox import YOLOxDetector + +from asone.detectors.utils.weights_path import get_weight_path +from asone.detectors.utils.cfg_path import get_cfg_path +from asone.detectors.utils.exp_name import get_exp__name +from .yolov8 import YOLOv8Detector + + +class Detector: + def __init__(self, + model_flag: int, + weights: str = None, + use_cuda: bool = True): + + self.model = self._select_detector(model_flag, weights, use_cuda) + + def _select_detector(self, model_flag, weights, cuda): + # Get required weight using model_flag + if weights and weights.split('.')[-1] == 'onnx': + onnx = True + weight = weights + elif weights: + onnx = False + weight = weights + else: + onnx, weight = get_weight_path(model_flag) + + if model_flag in range(0, 20): + _detector = YOLOv5Detector(weights=weight, + use_onnx=onnx, + use_cuda=cuda) + elif model_flag in range(20, 34): + _detector = YOLOv6Detector(weights=weight, + use_onnx=onnx, + use_cuda=cuda) + elif model_flag in range(34, 48): + _detector = YOLOv7Detector(weights=weight, + use_onnx=onnx, + use_cuda=cuda) + elif model_flag in range(48, 58): + # Get Configuration file for Yolor + if model_flag in range(48, 57, 2): + cfg = get_cfg_path(model_flag) + else: + cfg = None + _detector = YOLOrDetector(weights=weight, + cfg=cfg, + use_onnx=onnx, + use_cuda=cuda) + + elif model_flag in range(58, 72): + # Get exp file and corresponding model for pytorch only + if model_flag in range(58, 71, 2): + exp, model_name = get_exp__name(model_flag) + else: + exp = model_name = None + _detector = YOLOxDetector(model_name=model_name, + exp_file=exp, + weights=weight, + use_onnx=onnx, + use_cuda=cuda) + elif model_flag in range(72, 82): + # Get exp file and corresponding model for pytorch only + _detector = YOLOv8Detector(weights=weight, + use_onnx=onnx, + use_cuda=cuda) + + return _detector + + def get_detector(self): + return self.model + + def detect(self, + image: list, + **kwargs: dict): + return self.model.detect(image, **kwargs) + + +if __name__ == '__main__': + + # Initialize YOLOv6 object detector + model_type = 56 + result = Detector(model_flag=model_type, use_cuda=True) + img = cv2.imread('asone/asone-linux/test.jpeg') + pred = result.get_detector(img) + print(pred) diff --git a/asone/detectors/utils/__init__.py b/asone/detectors/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/utils/cfg_path.py b/asone/detectors/utils/cfg_path.py new file mode 100644 index 0000000000000000000000000000000000000000..60f4f285cfd34c477e56b2316074b4e80edb0244 --- /dev/null +++ b/asone/detectors/utils/cfg_path.py @@ -0,0 +1,18 @@ +import os + +cfg_dir = os.path.dirname(os.path.dirname(__file__)) + +configuration = {'0': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp_x.cfg'), + '1': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp.cfg'), + '2': os.path.join(cfg_dir, 'yolor','cfg','yolor_p6.cfg')} + +def get_cfg_path(model_flag): + if model_flag in [48,50]: + cfg = configuration['0'] + if model_flag in [52,54]: + cfg = configuration['1'] + if model_flag == 56: + cfg = configuration['2'] + return cfg + + \ No newline at end of file diff --git a/asone/detectors/utils/exp_name.py b/asone/detectors/utils/exp_name.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1338e9f2d4b60b9a3d2fc7db9864b70cc41f21 --- /dev/null +++ b/asone/detectors/utils/exp_name.py @@ -0,0 +1,32 @@ +import os + +exp_dir = os.path.dirname(os.path.dirname(__file__)) + +exp_file_name = {'58': (os.path.join(exp_dir, 'yolox','exps','yolox_l.py'),'yolox-l'), + '60': (os.path.join(exp_dir, 'yolox','exps','yolox_nano.py'),'yolox-nano'), + '62': (os.path.join(exp_dir, 'yolox','exps','yolox_tiny'),'yolox-tiny'), + '64': (os.path.join(exp_dir, 'yolox','exps','yolov3.py'),'yolox-darknet'), + '66': (os.path.join(exp_dir, 'yolox','exps','yolox_s.py'),'yolox-s'), + '68': (os.path.join(exp_dir, 'yolox','exps','yolox_m.py'),'yolox-m'), + '70': (os.path.join(exp_dir, 'yolox','exps','yolox_x.py'),'yolox-x') + } + + +def get_exp__name(model_flag): + + if model_flag == 58: + exp, model_name = exp_file_name['58'][0], exp_file_name['58'][1] + elif model_flag == 60: + exp, model_name = exp_file_name['60'][0], exp_file_name['60'][1] + elif model_flag == 62: + exp, model_name = exp_file_name['62'][0], exp_file_name['62'][1] + elif model_flag == 64: + exp, model_name = exp_file_name['64'][0], exp_file_name['64'][1] + elif model_flag == 66: + exp, model_name = exp_file_name['66'][0], exp_file_name['66'][1] + elif model_flag == 68: + exp, model_name = exp_file_name['68'][0], exp_file_name['68'][1] + elif model_flag == 70: + exp, model_name = exp_file_name['70'][0], exp_file_name['70'][1] + + return exp, model_name \ No newline at end of file diff --git a/asone/detectors/utils/weights_path.py b/asone/detectors/utils/weights_path.py new file mode 100644 index 0000000000000000000000000000000000000000..246c9afb2caeb55f24d78104de65639c4ee0e8d1 --- /dev/null +++ b/asone/detectors/utils/weights_path.py @@ -0,0 +1,117 @@ +import os + +weights = { '0': os.path.join('yolov5','weights','yolov5x6.pt'), + '1': os.path.join('yolov5','weights','yolov5x6.onnx'), + '2': os.path.join('yolov5','weights','yolov5s.pt'), + '3': os.path.join('yolov5','weights','yolov5s.onnx'), + '4': os.path.join('yolov5','weights','yolov5n.pt'), + '5': os.path.join('yolov5','weights','yolov5n.onnx'), + '6': os.path.join('yolov5','weights','yolov5m.pt'), + '7': os.path.join('yolov5','weights','yolov5m.onnx'), + '8': os.path.join('yolov5','weights','yolov5l.pt'), + '9': os.path.join('yolov5','weights','yolov5l.onnx'), + '10': os.path.join('yolov5','weights','yolov5x.pt'), + '11': os.path.join('yolov5','weights','yolov5x.onnx'), + '12': os.path.join('yolov5','weights','yolov5n6.pt'), + '13': os.path.join('yolov5','weights','yolov5n6.onnx'), + '14': os.path.join('yolov5','weights','yolov5s6.pt'), + '15': os.path.join('yolov5','weights','yolov5s6.onnx'), + '16': os.path.join('yolov5','weights','yolov5m6.pt'), + '17': os.path.join('yolov5','weights','yolov5m6.onnx'), + '18': os.path.join('yolov5','weights','yolov5l6.pt'), + '19': os.path.join('yolov5','weights','yolov5l6.onnx'), + # YOLOv6 + '20': os.path.join('yolov6','weights','yolov6n.pt'), + '21': os.path.join('yolov6','weights','yolov6n.onnx'), + '22': os.path.join('yolov6','weights','yolov6t.pt'), + '23': os.path.join('yolov6','weights','yolov6t.onnx'), + '24': os.path.join('yolov6','weights','yolov6s.pt'), + '25': os.path.join('yolov6','weights','yolov6s.onnx'), + '26': os.path.join('yolov6','weights','yolov6m.pt'), + '27': os.path.join('yolov6','weights','yolov6m.onnx'), + '28': os.path.join('yolov6','weights','yolov6l.pt'), + '29': os.path.join('yolov6','weights','yolov6l.onnx'), + '30': os.path.join('yolov6','weights','yolov6l_relu.pt'), + '31': os.path.join('yolov6','weights','yolov6l_relu.onnx'), + '32': os.path.join('yolov6','weights','yolov6s_repopt.pt'), + '33': os.path.join('yolov6','weights','yolov6s_repopt.onnx'), + # YOLOv7 + '34': os.path.join('yolov7','weights','yolov7-tiny.pt'), + '35': os.path.join('yolov7','weights','yolov7-tiny.onnx'), + '36': os.path.join('yolov7','weights','yolov7.pt'), + '37': os.path.join('yolov7','weights','yolov7.onnx'), + '38': os.path.join('yolov7','weights','yolov7x.pt'), + '39': os.path.join('yolov7','weights','yolov7x.onnx'), + '40': os.path.join('yolov7','weights','yolov7-w6.pt'), + '41': os.path.join('yolov7','weights','yolov7-w6.onnx'), + '42': os.path.join('yolov7','weights','yolov7-e6.pt'), + '43': os.path.join('yolov7','weights','yolov7-e6.onnx'), + '44': os.path.join('yolov7','weights','yolov7-d6.pt'), + '45': os.path.join('yolov7','weights','yolov7-d6.onnx'), + '46': os.path.join('yolov7','weights','yolov7-e6e.pt'), + '47': os.path.join('yolov7','weights','yolov7-e6e.onnx'), + # YOLOR + '48': os.path.join('yolor','weights','yolor_csp_x.pt'), + '49': os.path.join('yolor','weights','yolor_csp_x.onnx'), + '50': os.path.join('yolor','weights','yolor_csp_x_star.pt'), + '51': os.path.join('yolor','weights','yolor_csp_x_star.onnx'), + '52': os.path.join('yolor','weights','yolor_csp_star.pt'), + '53': os.path.join('yolor','weights','yolor_csp_star.onnx'), + '54': os.path.join('yolor','weights','yolor_csp.pt'), + '55': os.path.join('yolor','weights','yolor_csp.onnx'), + '56': os.path.join('yolor','weights','yolor_p6.pt'), + '57': os.path.join('yolor','weights','yolor_p6.onnx'), + # YOLOX + '58': os.path.join('yolox','weights','yolox_l.pth'), + '59': os.path.join('yolox','weights','yolox_l.onnx'), + '60': os.path.join('yolox','weights','yolox_nano.pth'), + '61': os.path.join('yolox','weights','yolox_nano.onnx'), + '62': os.path.join('yolox','weights','yolox_tiny.pth'), + '63': os.path.join('yolox','weights','yolox_tiny.onnx'), + '64': os.path.join('yolox','weights','yolox_darknet.pth'), + '65': os.path.join('yolox','weights','yolox_darknet.onnx'), + '66': os.path.join('yolox','weights','yolox_s.pth'), + '67': os.path.join('yolox','weights','yolox_s.onnx'), + '68': os.path.join('yolox','weights','yolox_m.pth'), + '69': os.path.join('yolox','weights','yolox_m.onnx'), + '70': os.path.join('yolox','weights','yolox_x.pth'), + '71': os.path.join('yolox','weights','yolox_x.onnx'), + # YOLOv8 + '72': os.path.join('yolov8','weights','yolov8n.pt'), + '73': os.path.join('yolov8','weights','yolov8n.onnx'), + '74': os.path.join('yolov8','weights','yolov8s.pt'), + '75': os.path.join('yolov8','weights','yolov8s.onnx'), + '76': os.path.join('yolov8','weights','yolov8m.pt'), + '77': os.path.join('yolov8','weights','yolov8m.onnx'), + '78': os.path.join('yolov8','weights','yolov8l.pt'), + '79': os.path.join('yolov8','weights','yolov8l.onnx'), + '80': os.path.join('yolov8','weights','yolov8x.pt'), + '81': os.path.join('yolov8','weights','yolov8x.onnx') + + + +} + +def get_weight_path(model_flag): + if model_flag in range(0, 20): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + elif model_flag in range(20, 34): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + elif model_flag in range(34, 48): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + elif model_flag in range(48, 58): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + elif model_flag in range(58, 72): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + elif model_flag in range(72, 82): + onnx = False if (model_flag % 2 == 0) else True + weight = weights[str(model_flag)] + + + return onnx, weight + diff --git a/asone/detectors/yolor/__init__.py b/asone/detectors/yolor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2af71b91fa133c55877d8c91c92cc7988a682c13 --- /dev/null +++ b/asone/detectors/yolor/__init__.py @@ -0,0 +1,2 @@ +from .yolor_detector import YOLOrDetector +__all__ = ['YOLOrDetector'] \ No newline at end of file diff --git a/asone/detectors/yolor/cfg/yolor_csp.cfg b/asone/detectors/yolor/cfg/yolor_csp.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f5f3ab421200036bf5d5c58f9be964f6d2e47a2 --- /dev/null +++ b/asone/detectors/yolor/cfg/yolor_csp.cfg @@ -0,0 +1,1376 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 164 +[implicit_add] +filters=256 + +# 165 +[implicit_add] +filters=512 + +# 166 +[implicit_add] +filters=1024 + +# 167 +[implicit_mul] +filters=255 + +# 168 +[implicit_mul] +filters=255 + +# 169 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[shift_channels] +from=164 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=167 + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[shift_channels] +from=165 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=168 + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=silu + +[shift_channels] +from=166 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=169 + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/asone/detectors/yolor/cfg/yolor_csp_x.cfg b/asone/detectors/yolor/cfg/yolor_csp_x.cfg new file mode 100644 index 0000000000000000000000000000000000000000..55a54109bf4882055ebc02b5a8688bfd3d618e4d --- /dev/null +++ b/asone/detectors/yolor/cfg/yolor_csp_x.cfg @@ -0,0 +1,1576 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 192 +[implicit_add] +filters=320 + +# 193 +[implicit_add] +filters=640 + +# 194 +[implicit_add] +filters=1280 + +# 195 +[implicit_mul] +filters=255 + +# 196 +[implicit_mul] +filters=255 + +# 197 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[shift_channels] +from=192 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=195 + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[shift_channels] +from=193 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=196 + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=silu + +[shift_channels] +from=194 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=197 + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/asone/detectors/yolor/cfg/yolor_p6.cfg b/asone/detectors/yolor/cfg/yolor_p6.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c7fe50c1c5e22c047194c83c8809d4f223a557e9 --- /dev/null +++ b/asone/detectors/yolor/cfg/yolor_p6.cfg @@ -0,0 +1,1760 @@ +[net] +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# P1 + +# Downsample + +# 0 +[reorg] + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 16 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=128 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 43 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=192 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 70 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=256 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 85 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=320 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 100 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1, -13 + +# 115 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 131 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 70 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 147 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 43 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 163 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=192 +activation=silu + +[route] +layers = -1, 147 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 176 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1, 131 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 189 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1, 115 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 202 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 203 +[implicit_add] +filters=256 + +# 204 +[implicit_add] +filters=384 + +# 205 +[implicit_add] +filters=512 + +# 206 +[implicit_add] +filters=640 + +# 207 +[implicit_mul] +filters=255 + +# 208 +[implicit_mul] +filters=255 + +# 209 +[implicit_mul] +filters=255 + +# 210 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[shift_channels] +from=203 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=207 + +[yolo] +mask = 0,1,2 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 176 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[shift_channels] +from=204 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=208 + +[yolo] +mask = 3,4,5 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 189 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[shift_channels] +from=205 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=209 + +[yolo] +mask = 6,7,8 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 202 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[shift_channels] +from=206 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=210 + +[yolo] +mask = 9,10,11 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + +# ============ End of Head ============ # diff --git a/asone/detectors/yolor/models/__init__.py b/asone/detectors/yolor/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/asone/detectors/yolor/models/__init__.py @@ -0,0 +1 @@ + diff --git a/asone/detectors/yolor/models/common.py b/asone/detectors/yolor/models/common.py new file mode 100644 index 0000000000000000000000000000000000000000..2c7caaa455b244d0d3df265f862ee8c684bbf76e --- /dev/null +++ b/asone/detectors/yolor/models/common.py @@ -0,0 +1,1023 @@ +# This file contains modules common to various models + +import math + +import numpy as np +import torch +import torch.nn as nn +from PIL import Image, ImageDraw + +from asone.detectors.yolor.utils.datasets import letterbox +from asone.detectors.yolor.utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh +from asone.detectors.yolor.utils.plots import color_list + +try: + from pytorch_wavelets import DWTForward, DWTInverse + + class DWT(nn.Module): + def __init__(self): + super(DWT, self).__init__() + self.xfm = DWTForward(J=1, wave='db1', mode='zero') + + def forward(self, x): + b,c,w,h = x.shape + yl, yh = self.xfm(x) + return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) +except: + + class DWT(nn.Module): # use ReOrg instead + def __init__(self): + super(DWT, self).__init__() + + def forward(self, x): + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +class ImplicitA(nn.Module): + def __init__(self, channel): + super(ImplicitA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self, x): + return self.implicit.expand_as(x) + x + + +class ImplicitM(nn.Module): + def __init__(self, channel): + super(ImplicitM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self, x): + return self.implicit.expand_as(x) * x + + +class ReOrg(nn.Module): + def __init__(self): + super(ReOrg, self).__init__() + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +def DWConv(c1, c2, k=1, s=1, act=True): + # Depthwise convolution + return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) + + +class Conv(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Conv, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act else nn.Identity() + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class ConvSig(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(ConvSig, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.act = nn.Sigmoid() if act else nn.Identity() + + def forward(self, x): + return self.act(self.conv(x)) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class ConvSqu(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(ConvSqu, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.act = nn.SiLU() if act else nn.Identity() + + def forward(self, x): + return self.act(self.conv(x)) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Bottleneck, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckG(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(BottleneckG, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1, g=g) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPF(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPF, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + #self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.m(self.cv1(x)) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPL(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPL, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + #self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.act(self.bn(torch.cat((y1, y2), dim=1))) + + +class BottleneckCSPLG(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=3, e=0.25): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPLG, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, g*c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(g*c_, g*c_, 1, 1, groups=g, bias=False) + #self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d((1+g) * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[BottleneckG(g*c_, g*c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.act(self.bn(torch.cat((y1, y2), dim=1))) + + +class BottleneckCSPSE(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPSE, self).__init__() + c_ = int(c2 * e) # hidden channels + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.cs = ConvSqu(c1, c1//8, 1, 1) + self.cvsig = ConvSig(c1//8, c1, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x = x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x) + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPSEA(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPSEA, self).__init__() + c_ = int(c2 * e) # hidden channels + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.cs = ConvSqu(c1, c1//8, 1, 1) + self.cvsig = ConvSig(c1//8, c1, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x = x + x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x) + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPSAM(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPSAM, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cvsig = ConvSig(c1, c1, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x = x * self.cvsig(x) + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPSAMA(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPSAMA, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cvsig = ConvSig(c1, c1, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x = x + x * self.cvsig(x) + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSPSAMB(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPSAMB, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cvsig = ConvSig(c2, c2, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + return y * self.cvsig(y) + + +class BottleneckCSPGC(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPGC, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + self.channel_add_conv = nn.Sequential( + nn.Conv2d(c2, c2, kernel_size=1), + nn.LayerNorm([c2, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(c2, c2, kernel_size=1)) + + self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + + def spatial_pool(self, x): + + batch, channel, height, width = x.size() + + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + + return context + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + return y + self.channel_add_conv(self.spatial_pool(y)) + + +class BottleneckCSPDNL(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPDNL, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + + self.conv_query = nn.Conv2d(c2, c2, kernel_size=1) + self.conv_key = nn.Conv2d(c2, c2, kernel_size=1) + self.conv_value = nn.Conv2d(c2, c2, kernel_size=1, bias=False) + self.conv_out = None + self.scale = math.sqrt(c2) + self.temperature = 0.05 + self.softmax = nn.Softmax(dim=2) + self.gamma = nn.Parameter(torch.zeros(1)) + self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + # [N, C, T, H, W] + residual = y + # [N, C, T, H', W'] + input_x = y + # [N, C', T, H, W] + query = self.conv_query(y) + # [N, C', T, H', W'] + key = self.conv_key(input_x) + value = self.conv_value(input_x) + # [N, C', H x W] + query = query.view(query.size(0), query.size(1), -1) + # [N, C', H' x W'] + key = key.view(key.size(0), key.size(1), -1) + value = value.view(value.size(0), value.size(1), -1) + # channel whitening + key_mean = key.mean(2).unsqueeze(2) + query_mean = query.mean(2).unsqueeze(2) + key -= key_mean + query -= query_mean + # [N, T x H x W, T x H' x W'] + sim_map = torch.bmm(query.transpose(1, 2), key) + sim_map = sim_map/self.scale + sim_map = sim_map/self.temperature + sim_map = self.softmax(sim_map) + # [N, T x H x W, C'] + out_sim = torch.bmm(sim_map, value.transpose(1, 2)) + # [N, C', T x H x W] + out_sim = out_sim.transpose(1, 2) + # [N, C', T, H, W] + out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *y.size()[2:]).contiguous() + out_sim = self.gamma * out_sim + # [N, 1, H', W'] + mask = self.conv_mask(input_x) + # [N, 1, H'x W'] + mask = mask.view(mask.size(0), mask.size(1), -1) + mask = self.softmax(mask) + # [N, C, 1, 1] + out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1).contiguous() + + return out_sim + out_gc + residual + + +class BottleneckCSP2(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP2, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv3 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x1 = self.cv1(x) + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class BottleneckCSP2SAM(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP2SAM, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cvsig = ConvSig(c_, c_, 1, 1) + self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv3 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) + self.act = nn.SiLU() + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x1 = self.cv1(x) + x1 = x1 * self.cvsig(x1).contiguous() + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class VoVCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(VoVCSP, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1//2, c_//2, 3, 1) + self.cv2 = Conv(c_//2, c_//2, 3, 1) + self.cv3 = Conv(c_, c2, 1, 1) + + def forward(self, x): + _, x1 = x.chunk(2, dim=1) + x1 = self.cv1(x1) + x2 = self.cv2(x1) + return self.cv3(torch.cat((x1,x2), dim=1)) + + +class SPP(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=(5, 9, 13)): + super(SPP, self).__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class SPPCSP(nn.Module): + # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): + super(SPPCSP, self).__init__() + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = Conv(c_, c_, 3, 1) + self.cv4 = Conv(c_, c_, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.cv5 = Conv(4 * c_, c_, 1, 1) + self.cv6 = Conv(c_, c_, 3, 1) + self.bn = nn.BatchNorm2d(2 * c_) + self.act = nn.SiLU() + self.cv7 = Conv(2 * c_, c2, 1, 1) + + def forward(self, x): + x1 = self.cv4(self.cv3(self.cv1(x))) + y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) + y2 = self.cv2(x) + return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class Focus(nn.Module): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Focus, self).__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) + + +class MP(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, k=2): + super(MP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return self.m(x) + + +class DownD(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, n=1, k=2): + super(DownD, self).__init__() + c_ = int(c1) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 3, k) + self.cv3 = Conv(c_, c2, 1, 1) + self.cv4 = Conv(c1, c2, 1, 1) + self.ap = nn.AvgPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return self.cv3(self.cv2(self.cv1(x))) + self.cv4(self.ap(x)) + + +class DownC(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, n=1, k=2): + super(DownC, self).__init__() + c_ = int(c1) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2//2, 3, k) + self.cv3 = Conv(c1, c2//2, 1, 1) + self.mp = nn.MaxPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return torch.cat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), dim=1) + + +class DNL(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=3, s=1): + super(DNL, self).__init__() + c_ = int(c1) # hidden channels + + # + self.conv_query = nn.Conv2d(c1, c_, kernel_size=1) + self.conv_key = nn.Conv2d(c1, c_, kernel_size=1) + + self.conv_value = nn.Conv2d(c1, c1, kernel_size=1, bias=False) + self.conv_out = None + + self.scale = math.sqrt(c_) + self.temperature = 0.05 + + self.softmax = nn.Softmax(dim=2) + + self.gamma = nn.Parameter(torch.zeros(1)) + + self.conv_mask = nn.Conv2d(c1, 1, kernel_size=1) + + self.cv = Conv(c1, c2, k, s) + + def forward(self, x): + + # [N, C, T, H, W] + residual = x + + # [N, C, T, H', W'] + input_x = x + + # [N, C', T, H, W] + query = self.conv_query(x) + + # [N, C', T, H', W'] + key = self.conv_key(input_x) + value = self.conv_value(input_x) + + # [N, C', H x W] + query = query.view(query.size(0), query.size(1), -1) + + # [N, C', H' x W'] + key = key.view(key.size(0), key.size(1), -1) + value = value.view(value.size(0), value.size(1), -1) + + # channel whitening + key_mean = key.mean(2).unsqueeze(2) + query_mean = query.mean(2).unsqueeze(2) + key -= key_mean + query -= query_mean + + # [N, T x H x W, T x H' x W'] + sim_map = torch.bmm(query.transpose(1, 2), key) + sim_map = sim_map/self.scale + sim_map = sim_map/self.temperature + sim_map = self.softmax(sim_map) + + # [N, T x H x W, C'] + out_sim = torch.bmm(sim_map, value.transpose(1, 2)) + + # [N, C', T x H x W] + out_sim = out_sim.transpose(1, 2) + + # [N, C', T, H, W] + out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *x.size()[2:]) + out_sim = self.gamma * out_sim + + # [N, 1, H', W'] + mask = self.conv_mask(input_x) + # [N, 1, H'x W'] + mask = mask.view(mask.size(0), mask.size(1), -1) + mask = self.softmax(mask) + # [N, C, 1, 1] + out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1) + out_sim = out_sim+out_gc + + return self.cv(out_sim + residual) + + +class GC(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=3, s=1): + super(GC, self).__init__() + c_ = int(c1) # hidden channels + + # + self.channel_add_conv = nn.Sequential( + nn.Conv2d(c1, c_, kernel_size=1), + nn.LayerNorm([c_, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(c_, c1, kernel_size=1)) + + self.conv_mask = nn.Conv2d(c_, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + + self.cv = Conv(c1, c2, k, s) + + + def spatial_pool(self, x): + + batch, channel, height, width = x.size() + + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + + return context + + def forward(self, x): + + return self.cv(x + self.channel_add_conv(self.spatial_pool(x))) + + +class SAM(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=3, s=1): + super(SAM, self).__init__() + c_ = int(c1) # hidden channels + self.cvsig = ConvSig(c1, c1, 1, 1) + self.cv = Conv(c1, c2, k, s) + + def forward(self, x): + + return self.cv(x * self.cvsig(x)) + + +class SAMA(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=3, s=1): + super(SAMA, self).__init__() + c_ = int(c1) # hidden channels + self.cvsig = ConvSig(c1, c1, 1, 1) + self.cv = Conv(c1, c2, k, s) + + def forward(self, x): + + return self.cv(x + x * self.cvsig(x)) + + +class SAMB(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=3, s=1): + super(SAMB, self).__init__() + c_ = int(c1) # hidden channels + self.cv = Conv(c1, c2, k, s) + self.cvsig = ConvSig(c2, c2, 1, 1) + + def forward(self, x): + + x = self.cv(x) + + return x * self.cvsig(x) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class NMS(nn.Module): + # Non-Maximum Suppression (NMS) module + conf = 0.25 # confidence threshold + iou = 0.45 # IoU threshold + classes = None # (optional list) filter by class + + def __init__(self): + super(NMS, self).__init__() + + def forward(self, x): + return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) + + +class autoShape(nn.Module): + # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + img_size = 640 # inference size (pixels) + conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + classes = None # (optional list) filter by class + + def __init__(self, model): + super(autoShape, self).__init__() + self.model = model.eval() + + def forward(self, imgs, size=640, augment=False, profile=False): + # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: + # opencv: imgs = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) + # PIL: imgs = Image.open('image.jpg') # HWC x(720,1280,3) + # numpy: imgs = np.zeros((720,1280,3)) # HWC + # torch: imgs = torch.zeros(16,3,720,1280) # BCHW + # multiple: imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images + + p = next(self.model.parameters()) # for device and type + if isinstance(imgs, torch.Tensor): # torch + return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference + + # Pre-process + if not isinstance(imgs, list): + imgs = [imgs] + shape0, shape1 = [], [] # image and inference shapes + batch = range(len(imgs)) # batch size + for i in batch: + imgs[i] = np.array(imgs[i]) # to numpy + imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3) # enforce 3ch input + s = imgs[i].shape[:2] # HWC + shape0.append(s) # image shape + g = (size / max(s)) # gain + shape1.append([y * g for y in s]) + shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape + x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch] # pad + x = np.stack(x, 0) if batch[-1] else x[0][None] # stack + x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 + + # Inference + with torch.no_grad(): + y = self.model(x, augment, profile)[0] # forward + y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS + + # Post-process + for i in batch: + if y[i] is not None: + y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i]) + + return Detections(imgs, y, self.names) + + +class Detections: + # detections class for YOLOv5 inference results + def __init__(self, imgs, pred, names=None): + super(Detections, self).__init__() + self.imgs = imgs # list of images as numpy arrays + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) + self.names = names # class names + self.xyxy = pred # xyxy pixels + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels + gn = [torch.Tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.]) for im in imgs] # normalization gains + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized + + def display(self, pprint=False, show=False, save=False): + colors = color_list() + for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): + str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' + if pred is not None: + for c in pred[:, -1].unique(): + n = (pred[:, -1] == c).sum() # detections per class + str += f'{n} {self.names[int(c)]}s, ' # add to string + if show or save: + img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np + for *box, conf, cls in pred: # xyxy, confidence, class + # str += '%s %.2f, ' % (names[int(cls)], conf) # label + ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot + if save: + f = f'results{i}.jpg' + str += f"saved to '{f}'" + img.save(f) # save + if show: + img.show(f'Image {i}') # show + if pprint: + print(str) + + def print(self): + self.display(pprint=True) # print results + + def show(self): + self.display(show=True) # show results + + def save(self): + self.display(save=True) # save results + + +class Flatten(nn.Module): + # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions + @staticmethod + def forward(x): + return x.view(x.size(0), -1) + + +class Classify(nn.Module): + # Classification head, i.e. x(b,c1,20,20) to x(b,c2) + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super(Classify, self).__init__() + self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) + self.flat = Flatten() + + def forward(self, x): + z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list + return self.flat(self.conv(z)) # flatten to x(b,c2) + + +class TransformerLayer(nn.Module): + def __init__(self, c, num_heads): + super().__init__() + + self.ln1 = nn.LayerNorm(c) + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.ln2 = nn.LayerNorm(c) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + x_ = self.ln1(x) + x = self.ma(self.q(x_), self.k(x_), self.v(x_))[0] + x + x = self.ln2(x) + x = self.fc2(self.fc1(x)) + x + return x + + +class TransformerBlock(nn.Module): + def __init__(self, c1, c2, num_heads, num_layers): + super().__init__() + + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) + self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) + self.c2 = c2 + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2) + p = p.unsqueeze(0) + p = p.transpose(0, 3) + p = p.squeeze(3) + e = self.linear(p) + x = p + e + + x = self.tr(x) + x = x.unsqueeze(3) + x = x.transpose(0, 3) + x = x.reshape(b, self.c2, w, h) + return x + + + +class BottleneckCSPTR(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPTR, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = TransformerBlock(c_, c_, 4, n) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + +class BottleneckCSP2TR(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSP2TR, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv3 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) + self.act = nn.SiLU() + self.m = TransformerBlock(c_, c_, 4, n) + + def forward(self, x): + x1 = self.cv1(x) + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + + +class SPPCSPTR(nn.Module): + # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): + super(SPPCSPTR, self).__init__() + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = Conv(c_, c_, 3, 1) + self.cv4 = Conv(c_, c_, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.cv5 = Conv(4 * c_, c_, 1, 1) + self.cv6 = TransformerBlock(c_, c_, 4, 1) + self.bn = nn.BatchNorm2d(2 * c_) + self.act = nn.SiLU() + self.cv7 = Conv(2 * c_, c2, 1, 1) + + def forward(self, x): + x1 = self.cv4(self.cv3(self.cv1(x))) + y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) + y2 = self.cv2(x) + return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1)))) + +class TR(BottleneckCSPTR): + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = TransformerBlock(c_, c_, 4, n) \ No newline at end of file diff --git a/asone/detectors/yolor/models/export.py b/asone/detectors/yolor/models/export.py new file mode 100644 index 0000000000000000000000000000000000000000..4300a13fad760ff4df9212d3942b4e7236b51a22 --- /dev/null +++ b/asone/detectors/yolor/models/export.py @@ -0,0 +1,68 @@ +import argparse + +import torch + +from asone.detectors.yolor.utils.google_utils import attempt_download + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path') + parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') + parser.add_argument('--batch-size', type=int, default=1, help='batch size') + opt = parser.parse_args() + opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand + print(opt) + + # Input + img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection + + # Load PyTorch model + attempt_download(opt.weights) + model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float() + model.eval() + model.model[-1].export = True # set Detect() layer export=True + y = model(img) # dry run + + # TorchScript export + try: + print('\nStarting TorchScript export with torch %s...' % torch.__version__) + f = opt.weights.replace('.pt', '.torchscript.pt') # filename + ts = torch.jit.trace(model, img) + ts.save(f) + print('TorchScript export success, saved as %s' % f) + except Exception as e: + print('TorchScript export failure: %s' % e) + + # ONNX export + try: + import onnx + + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + f = opt.weights.replace('.pt', '.onnx') # filename + model.fuse() # only for ONNX + torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], + output_names=['classes', 'boxes'] if y is None else ['output']) + + # Checks + onnx_model = onnx.load(f) # load onnx model + onnx.checker.check_model(onnx_model) # check onnx model + print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model + print('ONNX export success, saved as %s' % f) + except Exception as e: + print('ONNX export failure: %s' % e) + + # CoreML export + try: + import coremltools as ct + + print('\nStarting CoreML export with coremltools %s...' % ct.__version__) + # convert model from torchscript and apply pixel scaling as per detect.py + model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) + f = opt.weights.replace('.pt', '.mlmodel') # filename + model.save(f) + print('CoreML export success, saved as %s' % f) + except Exception as e: + print('CoreML export failure: %s' % e) + + # Finish + print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') diff --git a/asone/detectors/yolor/models/models.py b/asone/detectors/yolor/models/models.py new file mode 100644 index 0000000000000000000000000000000000000000..386b0b2952dbb502998980ce463ad12259ce0753 --- /dev/null +++ b/asone/detectors/yolor/models/models.py @@ -0,0 +1,761 @@ +from asone.detectors.yolor.utils.google_utils import * +from asone.detectors.yolor.utils.layers import * +from asone.detectors.yolor.utils.parse_config import * +from asone.detectors.yolor.utils import torch_utils + +ONNX_EXPORT = False + + +def create_modules(module_defs, img_size, cfg): + # Constructs module list of layer blocks from module configuration in module_defs + + img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # expand if necessary + _ = module_defs.pop(0) # cfg training hyperparams (unused) + output_filters = [3] # input channels + module_list = nn.ModuleList() + routs = [] # list of layers which rout to deeper layers + yolo_index = -1 + + for i, mdef in enumerate(module_defs): + modules = nn.Sequential() + + if mdef['type'] == 'convolutional': + bn = mdef['batch_normalize'] + filters = mdef['filters'] + k = mdef['size'] # kernel size + stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) + if isinstance(k, int): # single-size conv + modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], + out_channels=filters, + kernel_size=k, + stride=stride, + padding=k // 2 if mdef['pad'] else 0, + groups=mdef['groups'] if 'groups' in mdef else 1, + bias=not bn)) + else: # multiple-size conv + modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], + out_ch=filters, + k=k, + stride=stride, + bias=not bn)) + + if bn: + modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) + else: + routs.append(i) # detection output (goes into yolo layer) + + if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 + modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) + elif mdef['activation'] == 'swish': + modules.add_module('activation', Swish()) + elif mdef['activation'] == 'mish': + modules.add_module('activation', Mish()) + elif mdef['activation'] == 'emb': + modules.add_module('activation', F.normalize()) + elif mdef['activation'] == 'logistic': + modules.add_module('activation', nn.Sigmoid()) + elif mdef['activation'] == 'silu': + modules.add_module('activation', nn.SiLU()) + + elif mdef['type'] == 'deformableconvolutional': + bn = mdef['batch_normalize'] + filters = mdef['filters'] + k = mdef['size'] # kernel size + stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) + if isinstance(k, int): # single-size conv + modules.add_module('DeformConv2d', DeformConv2d(output_filters[-1], + filters, + kernel_size=k, + padding=k // 2 if mdef['pad'] else 0, + stride=stride, + bias=not bn, + modulation=True)) + else: # multiple-size conv + modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], + out_ch=filters, + k=k, + stride=stride, + bias=not bn)) + + if bn: + modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) + else: + routs.append(i) # detection output (goes into yolo layer) + + if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 + modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) + elif mdef['activation'] == 'swish': + modules.add_module('activation', Swish()) + elif mdef['activation'] == 'mish': + modules.add_module('activation', Mish()) + elif mdef['activation'] == 'silu': + modules.add_module('activation', nn.SiLU()) + + elif mdef['type'] == 'dropout': + p = mdef['probability'] + modules = nn.Dropout(p) + + elif mdef['type'] == 'avgpool': + modules = GAP() + + elif mdef['type'] == 'silence': + filters = output_filters[-1] + modules = Silence() + + elif mdef['type'] == 'scale_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ScaleChannel(layers=layers) + + elif mdef['type'] == 'shift_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ShiftChannel(layers=layers) + + elif mdef['type'] == 'shift_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ShiftChannel2D(layers=layers) + + elif mdef['type'] == 'control_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ControlChannel(layers=layers) + + elif mdef['type'] == 'control_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ControlChannel2D(layers=layers) + + elif mdef['type'] == 'alternate_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] * 2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = AlternateChannel(layers=layers) + + elif mdef['type'] == 'alternate_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] * 2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = AlternateChannel2D(layers=layers) + + elif mdef['type'] == 'select_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = SelectChannel(layers=layers) + + elif mdef['type'] == 'select_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = SelectChannel2D(layers=layers) + + elif mdef['type'] == 'sam': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ScaleSpatial(layers=layers) + + elif mdef['type'] == 'BatchNorm2d': + filters = output_filters[-1] + modules = nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4) + if i == 0 and filters == 3: # normalize RGB image + # imagenet mean and var https://pytorch.org/docs/stable/torchvision/models.html#classification + modules.running_mean = torch.tensor([0.485, 0.456, 0.406]) + modules.running_var = torch.tensor([0.0524, 0.0502, 0.0506]) + + elif mdef['type'] == 'maxpool': + k = mdef['size'] # kernel size + stride = mdef['stride'] + maxpool = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) + if k == 2 and stride == 1: # yolov3-tiny + modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) + modules.add_module('MaxPool2d', maxpool) + else: + modules = maxpool + + elif mdef['type'] == 'local_avgpool': + k = mdef['size'] # kernel size + stride = mdef['stride'] + avgpool = nn.AvgPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) + if k == 2 and stride == 1: # yolov3-tiny + modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) + modules.add_module('AvgPool2d', avgpool) + else: + modules = avgpool + + elif mdef['type'] == 'upsample': + if ONNX_EXPORT: # explicitly state size, avoid scale_factor + g = (yolo_index + 1) * 2 / 32 # gain + modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) # img_size = (320, 192) + else: + modules = nn.Upsample(scale_factor=mdef['stride']) + + elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat(layers=layers) + + elif mdef['type'] == 'route2': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat2(layers=layers) + + elif mdef['type'] == 'route3': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat3(layers=layers) + + elif mdef['type'] == 'route_lhalf': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])//2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat_l(layers=layers) + + elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = WeightedFeatureFusion(layers=layers, weight='weights_type' in mdef) + + elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale + pass + + elif mdef['type'] == 'reorg': # yolov3-spp-pan-scale + filters = 4 * output_filters[-1] + modules.add_module('Reorg', Reorg()) + + elif mdef['type'] == 'dwt': # yolov3-spp-pan-scale + filters = 4 * output_filters[-1] + modules.add_module('DWT', DWT()) + + elif mdef['type'] == 'implicit_add': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitA(channel=filters) + + elif mdef['type'] == 'implicit_mul': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitM(channel=filters) + + elif mdef['type'] == 'implicit_cat': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitC(channel=filters) + + elif mdef['type'] == 'implicit_add_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DA(atom=filters, channel=channels) + + elif mdef['type'] == 'implicit_mul_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DM(atom=filters, channel=channels) + + elif mdef['type'] == 'implicit_cat_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DC(atom=filters, channel=channels) + + elif mdef['type'] == 'yolo': + yolo_index += 1 + stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides + if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides + stride = [32, 16, 8] + layers = mdef['from'] if 'from' in mdef else [] + modules = YOLOLayer(anchors=mdef['anchors'][mdef['mask']], # anchor list + nc=mdef['classes'], # number of classes + img_size=img_size, # (416, 416) + yolo_index=yolo_index, # 0, 1, 2... + layers=layers, # output layers + stride=stride[yolo_index]) + + # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) + try: + j = layers[yolo_index] if 'from' in mdef else -2 + bias_ = module_list[j][0].bias # shape(255,) + bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) + #bias[:, 4] += -4.5 # obj + bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) + bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) + module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + + #j = [-2, -5, -8] + #for sj in j: + # bias_ = module_list[sj][0].bias + # bias = bias_[:modules.no * 1].view(1, -1) + # bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) + # bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) + # module_list[sj][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + except: + print('WARNING: smart bias initialization failure.') + + elif mdef['type'] == 'jde': + yolo_index += 1 + stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides + if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides + stride = [32, 16, 8] + layers = mdef['from'] if 'from' in mdef else [] + modules = JDELayer(anchors=mdef['anchors'][mdef['mask']], # anchor list + nc=mdef['classes'], # number of classes + img_size=img_size, # (416, 416) + yolo_index=yolo_index, # 0, 1, 2... + layers=layers, # output layers + stride=stride[yolo_index]) + + # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) + try: + j = layers[yolo_index] if 'from' in mdef else -1 + bias_ = module_list[j][0].bias # shape(255,) + bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) + #bias[:, 4] += -4.5 # obj + bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) + bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) + module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + except: + print('WARNING: smart bias initialization failure.') + + else: + print('Warning: Unrecognized Layer Type: ' + mdef['type']) + + # Register module list and number of output filters + module_list.append(modules) + output_filters.append(filters) + + routs_binary = [False] * (i + 1) + for i in routs: + routs_binary[i] = True + return module_list, routs_binary + + +class YOLOLayer(nn.Module): + def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): + super(YOLOLayer, self).__init__() + self.anchors = torch.Tensor(anchors) + self.index = yolo_index # index of this layer in layers + self.layers = layers # model output layer indices + self.stride = stride # layer stride + self.nl = len(layers) # number of output layers (3) + self.na = len(anchors) # number of anchors (3) + self.nc = nc # number of classes (80) + self.no = nc + 5 # number of outputs (85) + self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints + self.anchor_vec = self.anchors / self.stride + self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) + + if ONNX_EXPORT: + self.training = False + self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points + + def create_grids(self, ng=(13, 13), device='cpu'): + self.nx, self.ny = ng # x and y grid size + self.ng = torch.tensor(ng, dtype=torch.float) + + # build xy offsets + if not self.training: + yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) + self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() + + if self.anchor_vec.device != device: + self.anchor_vec = self.anchor_vec.to(device) + self.anchor_wh = self.anchor_wh.to(device) + + def forward(self, p, out): + ASFF = False # https://arxiv.org/abs/1911.09516 + if ASFF: + i, n = self.index, self.nl # index in layers, number of layers + p = out[self.layers[i]] + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # outputs and weights + # w = F.softmax(p[:, -n:], 1) # normalized weights + w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) + # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension + + # weighted ASFF sum + p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] + for j in range(n): + if j != i: + p += w[:, j:j + 1] * \ + F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) + + elif ONNX_EXPORT: + bs = 1 # batch size + else: + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) + p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + + elif ONNX_EXPORT: + # Avoid broadcasting for ANE operations + m = self.na * self.nx * self.ny + ng = 1. / self.ng.repeat(m, 1) + grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) + anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng + + p = p.view(m, self.no) + xy = torch.sigmoid(p[:, 0:2]) + grid # x, y + wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height + p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ + torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf + return p_cls, xy * ng, wh + + else: # inference + io = p.sigmoid() + io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) + io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh + io[..., :4] *= self.stride + #io = p.clone() # inference output + #io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy + #io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method + #io[..., :4] *= self.stride + #torch.sigmoid_(io[..., 4:]) + return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] + + +class JDELayer(nn.Module): + def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): + super(JDELayer, self).__init__() + self.anchors = torch.Tensor(anchors) + self.index = yolo_index # index of this layer in layers + self.layers = layers # model output layer indices + self.stride = stride # layer stride + self.nl = len(layers) # number of output layers (3) + self.na = len(anchors) # number of anchors (3) + self.nc = nc # number of classes (80) + self.no = nc + 5 # number of outputs (85) + self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints + self.anchor_vec = self.anchors / self.stride + self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) + + if ONNX_EXPORT: + self.training = False + self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points + + def create_grids(self, ng=(13, 13), device='cpu'): + self.nx, self.ny = ng # x and y grid size + self.ng = torch.tensor(ng, dtype=torch.float) + + # build xy offsets + if not self.training: + yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) + self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() + + if self.anchor_vec.device != device: + self.anchor_vec = self.anchor_vec.to(device) + self.anchor_wh = self.anchor_wh.to(device) + + def forward(self, p, out): + ASFF = False # https://arxiv.org/abs/1911.09516 + if ASFF: + i, n = self.index, self.nl # index in layers, number of layers + p = out[self.layers[i]] + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # outputs and weights + # w = F.softmax(p[:, -n:], 1) # normalized weights + w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) + # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension + + # weighted ASFF sum + p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] + for j in range(n): + if j != i: + p += w[:, j:j + 1] * \ + F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) + + elif ONNX_EXPORT: + bs = 1 # batch size + else: + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) + p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + + elif ONNX_EXPORT: + # Avoid broadcasting for ANE operations + m = self.na * self.nx * self.ny + ng = 1. / self.ng.repeat(m, 1) + grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) + anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng + + p = p.view(m, self.no) + xy = torch.sigmoid(p[:, 0:2]) + grid # x, y + wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height + p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ + torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf + return p_cls, xy * ng, wh + + else: # inference + #io = p.sigmoid() + #io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) + #io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh + #io[..., :4] *= self.stride + io = p.clone() # inference output + io[..., :2] = torch.sigmoid(io[..., :2]) * 2. - 0.5 + self.grid # xy + io[..., 2:4] = (torch.sigmoid(io[..., 2:4]) * 2) ** 2 * self.anchor_wh # wh yolo method + io[..., :4] *= self.stride + io[..., 4:] = F.softmax(io[..., 4:]) + return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] + +class Darknet(nn.Module): + # YOLOv3 object detection model + + def __init__(self, cfg, img_size=(416, 416), verbose=False): + super(Darknet, self).__init__() + + self.module_defs = parse_model_cfg(cfg) + self.module_list, self.routs = create_modules(self.module_defs, img_size, cfg) + self.yolo_layers = get_yolo_layers(self) + # torch_utils.initialize_weights(self) + + # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision + self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training + self.info(verbose) if not ONNX_EXPORT else None # print model description + + def forward(self, x, augment=False, verbose=False): + + if not augment: + return self.forward_once(x) + else: # Augment images (inference and test only) https://github.com/ultralytics/yolov3/issues/931 + img_size = x.shape[-2:] # height, width + s = [0.83, 0.67] # scales + y = [] + for i, xi in enumerate((x, + torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale + torch_utils.scale_img(x, s[1], same_shape=False), # scale + )): + # cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) + y.append(self.forward_once(xi)[0]) + + y[1][..., :4] /= s[0] # scale + y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr + y[2][..., :4] /= s[1] # scale + + # for i, yi in enumerate(y): # coco small, medium, large = < 32**2 < 96**2 < + # area = yi[..., 2:4].prod(2)[:, :, None] + # if i == 1: + # yi *= (area < 96. ** 2).float() + # elif i == 2: + # yi *= (area > 32. ** 2).float() + # y[i] = yi + + y = torch.cat(y, 1) + return y, None + + def forward_once(self, x, augment=False, verbose=False): + img_size = x.shape[-2:] # height, width + yolo_out, out = [], [] + if verbose: + print('0', x.shape) + str = '' + + # Augment images (inference and test only) + if augment: # https://github.com/ultralytics/yolov3/issues/931 + nb = x.shape[0] # batch size + s = [0.83, 0.67] # scales + x = torch.cat((x, + torch_utils.scale_img(x.flip(3), s[0]), # flip-lr and scale + torch_utils.scale_img(x, s[1]), # scale + ), 0) + + for i, module in enumerate(self.module_list): + name = module.__class__.__name__ + #print(name) + if name in ['WeightedFeatureFusion', 'FeatureConcat', 'FeatureConcat2', 'FeatureConcat3', 'FeatureConcat_l', 'ScaleChannel', 'ShiftChannel', 'ShiftChannel2D', 'ControlChannel', 'ControlChannel2D', 'AlternateChannel', 'AlternateChannel2D', 'SelectChannel', 'SelectChannel2D', 'ScaleSpatial']: # sum, concat + if verbose: + l = [i - 1] + module.layers # layers + sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes + str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)]) + x = module(x, out) # WeightedFeatureFusion(), FeatureConcat() + elif name in ['ImplicitA', 'ImplicitM', 'ImplicitC', 'Implicit2DA', 'Implicit2DM', 'Implicit2DC']: + x = module() + elif name == 'YOLOLayer': + yolo_out.append(module(x, out)) + elif name == 'JDELayer': + yolo_out.append(module(x, out)) + else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc. + #print(module) + #print(x.shape) + x = module(x) + + out.append(x if self.routs[i] else []) + if verbose: + print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str) + str = '' + + if self.training: # train + return yolo_out + elif ONNX_EXPORT: # export + x = [torch.cat(x, 0) for x in zip(*yolo_out)] + return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 + else: # inference or test + x, p = zip(*yolo_out) # inference output, training output + x = torch.cat(x, 1) # cat yolo outputs + if augment: # de-augment results + x = torch.split(x, nb, dim=0) + x[1][..., :4] /= s[0] # scale + x[1][..., 0] = img_size[1] - x[1][..., 0] # flip lr + x[2][..., :4] /= s[1] # scale + x = torch.cat(x, 1) + return x, p + + def fuse(self): + # Fuse Conv2d + BatchNorm2d layers throughout model + print('Fusing layers...') + fused_list = nn.ModuleList() + for a in list(self.children())[0]: + if isinstance(a, nn.Sequential): + for i, b in enumerate(a): + if isinstance(b, nn.modules.batchnorm.BatchNorm2d): + # fuse this bn layer with the previous conv2d layer + conv = a[i - 1] + fused = torch_utils.fuse_conv_and_bn(conv, b) + a = nn.Sequential(fused, *list(a.children())[i + 1:]) + break + fused_list.append(a) + self.module_list = fused_list + self.info() if not ONNX_EXPORT else None # yolov3-spp reduced from 225 to 152 layers + + def info(self, verbose=False): + torch_utils.model_info(self, verbose) + + +def get_yolo_layers(model): + return [i for i, m in enumerate(model.module_list) if m.__class__.__name__ in ['YOLOLayer', 'JDELayer']] # [89, 101, 113] + + +def load_darknet_weights(self, weights, cutoff=-1): + # Parses and loads the weights stored in 'weights' + + # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) + file = Path(weights).name + if file == 'darknet53.conv.74': + cutoff = 75 + elif file == 'yolov3-tiny.conv.15': + cutoff = 15 + + # Read weights file + with open(weights, 'rb') as f: + # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision + self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training + + weights = np.fromfile(f, dtype=np.float32) # the rest are weights + + ptr = 0 + for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if mdef['type'] == 'convolutional': + conv = module[0] + if mdef['batch_normalize']: + # Load BN bias, weights, running mean and running variance + bn = module[1] + nb = bn.bias.numel() # number of biases + # Bias + bn.bias.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.bias)) + ptr += nb + # Weight + bn.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.weight)) + ptr += nb + # Running Mean + bn.running_mean.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_mean)) + ptr += nb + # Running Var + bn.running_var.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_var)) + ptr += nb + else: + # Load conv. bias + nb = conv.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + nb]).view_as(conv.bias) + conv.bias.data.copy_(conv_b) + ptr += nb + # Load conv. weights + nw = conv.weight.numel() # number of weights + conv.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nw]).view_as(conv.weight)) + ptr += nw + + +def save_weights(self, path='model.weights', cutoff=-1): + # Converts a PyTorch model to Darket format (*.pt to *.weights) + # Note: Does not work if model.fuse() is applied + with open(path, 'wb') as f: + # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version.tofile(f) # (int32) version info: major, minor, revision + self.seen.tofile(f) # (int64) number of images seen during training + + # Iterate through layers + for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if mdef['type'] == 'convolutional': + conv_layer = module[0] + # If batch norm, load bn first + if mdef['batch_normalize']: + bn_layer = module[1] + bn_layer.bias.data.cpu().numpy().tofile(f) + bn_layer.weight.data.cpu().numpy().tofile(f) + bn_layer.running_mean.data.cpu().numpy().tofile(f) + bn_layer.running_var.data.cpu().numpy().tofile(f) + # Load conv bias + else: + conv_layer.bias.data.cpu().numpy().tofile(f) + # Load conv weights + conv_layer.weight.data.cpu().numpy().tofile(f) + + +def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights', saveto='converted.weights'): + # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa) + # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights') + + # Initialize model + model = Darknet(cfg) + ckpt = torch.load(weights) # load checkpoint + try: + ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} + model.load_state_dict(ckpt['model'], strict=False) + save_weights(model, path=saveto, cutoff=-1) + except KeyError as e: + print(e) + +def attempt_download(weights): + # Attempt to download pretrained weights if not found locally + weights = weights.strip() + msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0' + + if len(weights) > 0 and not os.path.isfile(weights): + d = {''} + + file = Path(weights).name + if file in d: + r = gdrive_download(id=d[file], name=weights) + else: # download from pjreddie.com + url = 'https://pjreddie.com/media/files/' + file + print('Downloading ' + url) + r = os.system('curl -f ' + url + ' -o ' + weights) + + # Error check + if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB + os.system('rm ' + weights) # remove partial downloads + raise Exception(msg) diff --git a/asone/detectors/yolor/utils/__init__.py b/asone/detectors/yolor/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/asone/detectors/yolor/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/asone/detectors/yolor/utils/activations.py b/asone/detectors/yolor/utils/activations.py new file mode 100644 index 0000000000000000000000000000000000000000..ba6b854ddcc4ba2004440b8e2c946911d37f0af1 --- /dev/null +++ b/asone/detectors/yolor/utils/activations.py @@ -0,0 +1,72 @@ +# Activation functions + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +# Swish https://arxiv.org/pdf/1905.02244.pdf --------------------------------------------------------------------------- +class Swish(nn.Module): # + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() + @staticmethod + def forward(x): + # return x * F.hardsigmoid(x) # for torchscript and CoreML + return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX + + +class MemoryEfficientSwish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x * torch.sigmoid(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + return grad_output * (sx * (1 + x * (1 - sx))) + + def forward(self, x): + return self.F.apply(x) + + +# Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- +class Mish(nn.Module): + @staticmethod + def forward(x): + return x * F.softplus(x).tanh() + + +class MemoryEfficientMish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + + def forward(self, x): + return self.F.apply(x) + + +# FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- +class FReLU(nn.Module): + def __init__(self, c1, k=3): # ch_in, kernel + super().__init__() + self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) + self.bn = nn.BatchNorm2d(c1) + + def forward(self, x): + return torch.max(x, self.bn(self.conv(x))) diff --git a/asone/detectors/yolor/utils/autoanchor.py b/asone/detectors/yolor/utils/autoanchor.py new file mode 100644 index 0000000000000000000000000000000000000000..1e82492bf09050013cb1bee6fbec6baef5ff22a5 --- /dev/null +++ b/asone/detectors/yolor/utils/autoanchor.py @@ -0,0 +1,152 @@ +# Auto-anchor utils + +import numpy as np +import torch +import yaml +from scipy.cluster.vq import kmeans +from tqdm import tqdm + + +def check_anchor_order(m): + # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary + a = m.anchor_grid.prod(-1).view(-1) # anchor area + da = a[-1] - a[0] # delta a + ds = m.stride[-1] - m.stride[0] # delta s + if da.sign() != ds.sign(): # same order + print('Reversing anchor order') + m.anchors[:] = m.anchors.flip(0) + m.anchor_grid[:] = m.anchor_grid.flip(0) + + +def check_anchors(dataset, model, thr=4.0, imgsz=640): + # Check anchor fit to data, recompute if necessary + print('\nAnalyzing anchors... ', end='') + m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() + shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) + scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale + wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh + + def metric(k): # compute metric + r = wh[:, None] / k[None] + x = torch.min(r, 1. / r).min(2)[0] # ratio metric + best = x.max(1)[0] # best_x + aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold + bpr = (best > 1. / thr).float().mean() # best possible recall + return bpr, aat + + bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) + print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') + if bpr < 0.98: # threshold to recompute + print('. Attempting to improve anchors, please wait...') + na = m.anchor_grid.numel() // 2 # number of anchors + new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) + new_bpr = metric(new_anchors.reshape(-1, 2))[0] + if new_bpr > bpr: # replace anchors + new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) + m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference + m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss + check_anchor_order(m) + print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') + else: + print('Original anchors better than new anchors. Proceeding with original anchors.') + print('') # newline + + +def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): + """ Creates kmeans-evolved anchors from training dataset + + Arguments: + path: path to dataset *.yaml, or a loaded dataset + n: number of anchors + img_size: image size used for training + thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 + gen: generations to evolve anchors using genetic algorithm + verbose: print all results + + Return: + k: kmeans evolved anchors + + Usage: + from utils.general import *; _ = kmean_anchors() + """ + thr = 1. / thr + + def metric(k, wh): # compute metrics + r = wh[:, None] / k[None] + x = torch.min(r, 1. / r).min(2)[0] # ratio metric + # x = wh_iou(wh, torch.tensor(k)) # iou metric + return x, x.max(1)[0] # x, best_x + + def anchor_fitness(k): # mutation fitness + _, best = metric(torch.tensor(k, dtype=torch.float32), wh) + return (best * (best > thr).float()).mean() # fitness + + def print_results(k): + k = k[np.argsort(k.prod(1))] # sort small to large + x, best = metric(k, wh0) + bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr + print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) + print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % + (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') + for i, x in enumerate(k): + print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg + return k + + if isinstance(path, str): # *.yaml file + with open(path) as f: + data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict + from utils.datasets import LoadImagesAndLabels + dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) + else: + dataset = path # dataset + + # Get label wh + shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) + wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh + + # Filter + i = (wh0 < 3.0).any(1).sum() + if i: + print('WARNING: Extremely small objects found. ' + '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) + wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels + + # Kmeans calculation + print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) + s = wh.std(0) # sigmas for whitening + k, dist = kmeans(wh / s, n, iter=30) # points, mean distance + k *= s + wh = torch.tensor(wh, dtype=torch.float32) # filtered + wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered + k = print_results(k) + + # Plot + # k, d = [None] * 20, [None] * 20 + # for i in tqdm(range(1, 21)): + # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance + # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) + # ax = ax.ravel() + # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') + # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh + # ax[0].hist(wh[wh[:, 0]<100, 0],400) + # ax[1].hist(wh[wh[:, 1]<100, 1],400) + # fig.tight_layout() + # fig.savefig('wh.png', dpi=200) + + # Evolve + npr = np.random + f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma + pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar + for _ in pbar: + v = np.ones(sh) + while (v == 1).all(): # mutate until a change occurs (prevent duplicates) + v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) + kg = (k.copy() * v).clip(min=2.0) + fg = anchor_fitness(kg) + if fg > f: + f, k = fg, kg.copy() + pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f + if verbose: + print_results(k) + + return print_results(k) diff --git a/asone/detectors/yolor/utils/datasets.py b/asone/detectors/yolor/utils/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..641b05966a42c81945b1f303fb75af45b3b86026 --- /dev/null +++ b/asone/detectors/yolor/utils/datasets.py @@ -0,0 +1,1297 @@ +# Dataset utils and dataloaders + +import glob +import math +import os +import random +import shutil +import time +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from threading import Thread + +import cv2 +import numpy as np +import torch +from PIL import Image, ExifTags +from torch.utils.data import Dataset +from tqdm import tqdm + +import pickle +from copy import deepcopy +from pycocotools import mask as maskUtils +from torchvision.utils import save_image + +from asone.detectors.yolor.utils.general import xyxy2xywh, xywh2xyxy +from asone.detectors.yolor.utils.torch_utils import torch_distributed_zero_first + +# Parameters +help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' +img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes +vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes + +# Get orientation exif tag +for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == 'Orientation': + break + + +def get_hash(files): + # Returns a single hash value of a list of files + return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) + + +def exif_size(img): + # Returns exif-corrected PIL size + s = img.size # (width, height) + try: + rotation = dict(img._getexif().items())[orientation] + if rotation == 6: # rotation 270 + s = (s[1], s[0]) + elif rotation == 8: # rotation 90 + s = (s[1], s[0]) + except: + pass + + return s + + +def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, + rank=-1, world_size=1, workers=8): + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + with torch_distributed_zero_first(rank): + dataset = LoadImagesAndLabels(path, imgsz, batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=opt.single_cls, + stride=int(stride), + pad=pad, + rank=rank) + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None + dataloader = InfiniteDataLoader(dataset, + batch_size=batch_size, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader() + return dataloader, dataset + + +def create_dataloader9(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, + rank=-1, world_size=1, workers=8): + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + with torch_distributed_zero_first(rank): + dataset = LoadImagesAndLabels9(path, imgsz, batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=opt.single_cls, + stride=int(stride), + pad=pad, + rank=rank) + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None + dataloader = InfiniteDataLoader(dataset, + batch_size=batch_size, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabels9.collate_fn) # torch.utils.data.DataLoader() + return dataloader, dataset + + +class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader): + """ Dataloader that reuses workers + + Uses same syntax as vanilla DataLoader + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for i in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler(object): + """ Sampler that repeats forever + + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) + + +class LoadImages: # for inference + def __init__(self, path, img_size=640, auto_size=32): + p = str(Path(path)) # os-agnostic + p = os.path.abspath(p) # absolute path + if '*' in p: + files = sorted(glob.glob(p, recursive=True)) # glob + elif os.path.isdir(p): + files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir + elif os.path.isfile(p): + files = [p] # files + else: + raise Exception('ERROR: %s does not exist' % p) + + images = [x for x in files if x.split('.')[-1].lower() in img_formats] + videos = [x for x in files if x.split('.')[-1].lower() in vid_formats] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.auto_size = auto_size + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'images' + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \ + (p, img_formats, vid_formats) + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + ret_val, img0 = self.cap.read() + if not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + else: + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='') + + else: + # Read image + self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, 'Image Not Found ' + path + print('image %g/%g %s: ' % (self.count, self.nf, path), end='') + + # Padded resize + img = letterbox(img0, new_shape=self.img_size, auto_size=self.auto_size)[0] + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return path, img, img0, self.cap + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + def __len__(self): + return self.nf # number of files + + +class LoadWebcam: # for inference + def __init__(self, pipe='0', img_size=640): + self.img_size = img_size + + if pipe.isnumeric(): + pipe = eval(pipe) # local camera + # pipe = 'rtsp://192.168.1.64/1' # IP camera + # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login + # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera + + self.pipe = pipe + self.cap = cv2.VideoCapture(pipe) # video capture object + self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + if cv2.waitKey(1) == ord('q'): # q to quit + self.cap.release() + cv2.destroyAllWindows() + raise StopIteration + + # Read frame + if self.pipe == 0: # local camera + ret_val, img0 = self.cap.read() + img0 = cv2.flip(img0, 1) # flip left-right + else: # IP camera + n = 0 + while True: + n += 1 + self.cap.grab() + if n % 30 == 0: # skip frames + ret_val, img0 = self.cap.retrieve() + if ret_val: + break + + # Print + assert ret_val, 'Camera Error %s' % self.pipe + img_path = 'webcam.jpg' + print('webcam %g: ' % self.count, end='') + + # Padded resize + img = letterbox(img0, new_shape=self.img_size)[0] + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return img_path, img, img0, None + + def __len__(self): + return 0 + + +class LoadStreams: # multiple IP or RTSP cameras + def __init__(self, sources='streams.txt', img_size=640): + self.mode = 'images' + self.img_size = img_size + + if os.path.isfile(sources): + with open(sources, 'r') as f: + sources = [x.strip() for x in f.read().splitlines() if len(x.strip())] + else: + sources = [sources] + + n = len(sources) + self.imgs = [None] * n + self.sources = sources + for i, s in enumerate(sources): + # Start the thread to read frames from the video stream + print('%g/%g: %s... ' % (i + 1, n, s), end='') + cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s) + assert cap.isOpened(), 'Failed to open %s' % s + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) % 100 + _, self.imgs[i] = cap.read() # guarantee first frame + thread = Thread(target=self.update, args=([i, cap]), daemon=True) + print(' success (%gx%g at %.2f FPS).' % (w, h, fps)) + thread.start() + print('') # newline + + # check for common shapes + s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes + self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal + if not self.rect: + print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') + + def update(self, index, cap): + # Read next stream frame in a daemon thread + n = 0 + while cap.isOpened(): + n += 1 + # _, self.imgs[index] = cap.read() + cap.grab() + if n == 4: # read every 4th frame + _, self.imgs[index] = cap.retrieve() + n = 0 + time.sleep(0.01) # wait time + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + img0 = self.imgs.copy() + if cv2.waitKey(1) == ord('q'): # q to quit + cv2.destroyAllWindows() + raise StopIteration + + # Letterbox + img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0] + + # Stack + img = np.stack(img, 0) + + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + + return self.sources, img, img0, None + + def __len__(self): + return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years + + +class LoadImagesAndLabels(Dataset): # for training/testing + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + + def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings + return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] + + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + elif p.is_file(): # file + with open(p, 'r') as t: + t = t.read().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + else: + raise Exception('%s does not exist' % p) + self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + assert self.img_files, 'No images found' + except Exception as e: + raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) + + # Check cache + self.label_files = img2label_paths(self.img_files) # labels + cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels + if os.path.isfile(cache_path): + cache = torch.load(cache_path) # load + if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed + cache = self.cache_labels(cache_path) # re-cache + else: + cache = self.cache_labels(cache_path) # cache + + # Read cache + cache.pop('hash') # remove hash + labels, shapes = zip(*cache.values()) + self.labels = list(labels) + self.shapes = np.array(shapes, dtype=np.float64) + self.img_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + n = len(shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.img_files = [self.img_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + + # Check labels + create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False + nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate + pbar = enumerate(self.label_files) + if rank in [-1, 0]: + pbar = tqdm(pbar) + for i, file in pbar: + l = self.labels[i] # label + if l is not None and l.shape[0]: + assert l.shape[1] == 5, '> 5 label columns: %s' % file + assert (l >= 0).all(), 'negative labels: %s' % file + assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file + if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows + nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows + if single_cls: + l[:, 0] = 0 # force dataset into single-class mode + self.labels[i] = l + nf += 1 # file found + + # Create subdataset (a smaller dataset) + if create_datasubset and ns < 1E4: + if ns == 0: + create_folder(path='./datasubset') + os.makedirs('./datasubset/images') + exclude_classes = 43 + if exclude_classes not in l[:, 0]: + ns += 1 + # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image + with open('./datasubset/images.txt', 'a') as f: + f.write(self.img_files[i] + '\n') + + # Extract object detection boxes for a second stage classifier + if extract_bounding_boxes: + p = Path(self.img_files[i]) + img = cv2.imread(str(p)) + h, w = img.shape[:2] + for j, x in enumerate(l): + f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) + if not os.path.exists(Path(f).parent): + os.makedirs(Path(f).parent) # make new output folder + + b = x[1:] * [w, h, w, h] # box + b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.3 + 30 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' + else: + ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty + # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove + + if rank in [-1, 0]: + pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( + cache_path, nf, nm, ne, nd, n) + if nf == 0: + s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) + print(s) + assert not augment, '%s. Can not train without labels.' % s + + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) + self.imgs = [None] * n + if cache_images: + gb = 0 # Gigabytes of cached images + self.img_hw0, self.img_hw = [None] * n, [None] * n + results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads + pbar = tqdm(enumerate(results), total=n) + for i, x in pbar: + self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) + gb += self.imgs[i].nbytes + pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) + + def cache_labels(self, path='labels.cache3'): + # Cache dataset labels, check images and read shapes + x = {} # dict + pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) + for (img, label) in pbar: + try: + l = [] + im = Image.open(img) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' + if os.path.isfile(label): + with open(label, 'r') as f: + l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels + if len(l) == 0: + l = np.zeros((0, 5), dtype=np.float32) + x[img] = [l, shape] + except Exception as e: + print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) + + x['hash'] = get_hash(self.label_files + self.img_files) + torch.save(x, path) # save for next time + return x + + def __len__(self): + return len(self.img_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + def __getitem__(self, index): + if self.image_weights: + index = self.indices[index] + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + if mosaic: + # Load mosaic + img, labels = load_mosaic(self, index) + #img, labels = load_mosaic9(self, index) + shapes = None + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if random.random() < hyp['mixup']: + img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + #img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + # Load labels + labels = [] + x = self.labels[index] + if x.size > 0: + # Normalized xywh to pixel xyxy format + labels = x.copy() + labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width + labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height + labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] + labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] + + if self.augment: + # Augment imagespace + if not mosaic: + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + + # Augment colorspace + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + + # Apply cutouts + # if random.random() < 0.9: + # labels = cutout(img, labels) + + nL = len(labels) # number of labels + if nL: + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh + labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 + labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 + + if self.augment: + # flip up-down + if random.random() < hyp['flipud']: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + # flip left-right + if random.random() < hyp['fliplr']: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + labels_out = torch.zeros((nL, 6)) + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.img_files[index], shapes + + @staticmethod + def collate_fn(batch): + img, label, path, shapes = zip(*batch) # transposed + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes + + +class LoadImagesAndLabels9(Dataset): # for training/testing + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + + def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings + return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] + + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + elif p.is_file(): # file + with open(p, 'r') as t: + t = t.read().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + else: + raise Exception('%s does not exist' % p) + self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + assert self.img_files, 'No images found' + except Exception as e: + raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) + + # Check cache + self.label_files = img2label_paths(self.img_files) # labels + cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels + if os.path.isfile(cache_path): + cache = torch.load(cache_path) # load + if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed + cache = self.cache_labels(cache_path) # re-cache + else: + cache = self.cache_labels(cache_path) # cache + + # Read cache + cache.pop('hash') # remove hash + labels, shapes = zip(*cache.values()) + self.labels = list(labels) + self.shapes = np.array(shapes, dtype=np.float64) + self.img_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + n = len(shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.img_files = [self.img_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + + # Check labels + create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False + nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate + pbar = enumerate(self.label_files) + if rank in [-1, 0]: + pbar = tqdm(pbar) + for i, file in pbar: + l = self.labels[i] # label + if l is not None and l.shape[0]: + assert l.shape[1] == 5, '> 5 label columns: %s' % file + assert (l >= 0).all(), 'negative labels: %s' % file + assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file + if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows + nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows + if single_cls: + l[:, 0] = 0 # force dataset into single-class mode + self.labels[i] = l + nf += 1 # file found + + # Create subdataset (a smaller dataset) + if create_datasubset and ns < 1E4: + if ns == 0: + create_folder(path='./datasubset') + os.makedirs('./datasubset/images') + exclude_classes = 43 + if exclude_classes not in l[:, 0]: + ns += 1 + # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image + with open('./datasubset/images.txt', 'a') as f: + f.write(self.img_files[i] + '\n') + + # Extract object detection boxes for a second stage classifier + if extract_bounding_boxes: + p = Path(self.img_files[i]) + img = cv2.imread(str(p)) + h, w = img.shape[:2] + for j, x in enumerate(l): + f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) + if not os.path.exists(Path(f).parent): + os.makedirs(Path(f).parent) # make new output folder + + b = x[1:] * [w, h, w, h] # box + b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.3 + 30 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' + else: + ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty + # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove + + if rank in [-1, 0]: + pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( + cache_path, nf, nm, ne, nd, n) + if nf == 0: + s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) + print(s) + assert not augment, '%s. Can not train without labels.' % s + + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) + self.imgs = [None] * n + if cache_images: + gb = 0 # Gigabytes of cached images + self.img_hw0, self.img_hw = [None] * n, [None] * n + results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads + pbar = tqdm(enumerate(results), total=n) + for i, x in pbar: + self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) + gb += self.imgs[i].nbytes + pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) + + def cache_labels(self, path='labels.cache3'): + # Cache dataset labels, check images and read shapes + x = {} # dict + pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) + for (img, label) in pbar: + try: + l = [] + im = Image.open(img) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' + if os.path.isfile(label): + with open(label, 'r') as f: + l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels + if len(l) == 0: + l = np.zeros((0, 5), dtype=np.float32) + x[img] = [l, shape] + except Exception as e: + print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) + + x['hash'] = get_hash(self.label_files + self.img_files) + torch.save(x, path) # save for next time + return x + + def __len__(self): + return len(self.img_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + def __getitem__(self, index): + if self.image_weights: + index = self.indices[index] + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + if mosaic: + # Load mosaic + #img, labels = load_mosaic(self, index) + img, labels = load_mosaic9(self, index) + shapes = None + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if random.random() < hyp['mixup']: + #img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + # Load labels + labels = [] + x = self.labels[index] + if x.size > 0: + # Normalized xywh to pixel xyxy format + labels = x.copy() + labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width + labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height + labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] + labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] + + if self.augment: + # Augment imagespace + if not mosaic: + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + + # Augment colorspace + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + + # Apply cutouts + # if random.random() < 0.9: + # labels = cutout(img, labels) + + nL = len(labels) # number of labels + if nL: + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh + labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 + labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 + + if self.augment: + # flip up-down + if random.random() < hyp['flipud']: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + # flip left-right + if random.random() < hyp['fliplr']: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + labels_out = torch.zeros((nL, 6)) + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.img_files[index], shapes + + @staticmethod + def collate_fn(batch): + img, label, path, shapes = zip(*batch) # transposed + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes + + +# Ancillary functions -------------------------------------------------------------------------------------------------- +def load_image(self, index): + # loads 1 image from dataset, returns img, original hw, resized hw + img = self.imgs[index] + if img is None: # not cached + path = self.img_files[index] + img = cv2.imread(path) # BGR + assert img is not None, 'Image Not Found ' + path + h0, w0 = img.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) + return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized + else: + return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized + + +def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=np.int16) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + + # Histogram equalization + # if random.random() < 0.2: + # for i in range(3): + # img[:, :, i] = cv2.equalizeHist(img[:, :, i]) + + +def load_mosaic(self, index): + # loads images in a mosaic + + labels4 = [] + s = self.img_size + yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = load_image(self, index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + # Labels + x = self.labels[index] + labels = x.copy() + if x.size > 0: # Normalized xywh to pixel xyxy format + labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw + labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh + labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw + labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh + labels4.append(labels) + + # Concat/clip labels + if len(labels4): + labels4 = np.concatenate(labels4, 0) + np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4 = random_perspective(img4, labels4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img4, labels4 + + +def load_mosaic9(self, index): + # loads images in a 9-mosaic + + labels9 = [] + s = self.img_size + indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(8)] # 8 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = load_image(self, index) + + # place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padx, pady = c[:2] + x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords + + # Labels + x = self.labels[index] + labels = x.copy() + if x.size > 0: # Normalized xywh to pixel xyxy format + labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padx + labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + pady + labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padx + labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + pady + labels9.append(labels) + + # Image + img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous + + # Offset + yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y + img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] + + # Concat/clip labels + if len(labels9): + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + + np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective + # img9, labels9 = replicate(img9, labels9) # replicate + + # Augment + img9, labels9 = random_perspective(img9, labels9, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img9, labels9 + + +def replicate(img, labels): + # Replicate labels + h, w = img.shape[:2] + boxes = labels[:, 1:].astype(int) + x1, y1, x2, y2 = boxes.T + s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) + for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices + x1b, y1b, x2b, y2b = boxes[i] + bh, bw = y2b - y1b, x2b - x1b + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y + x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] + img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + + return img, labels + + +def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, auto_size=32): + # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, auto_size), np.mod(dh, auto_size) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + + +def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = img.shape[0] + border[0] * 2 # shape(h,w,c) + width = img.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(img[:, :, ::-1]) # base + # ax[1].imshow(img2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + if n: + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + if perspective: + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + else: # affine + xy = xy[:, :2].reshape(n, 8) + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # # apply angle-based reduction of bounding boxes + # radians = a * math.pi / 180 + # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 + # x = (xy[:, 2] + xy[:, 0]) / 2 + # y = (xy[:, 3] + xy[:, 1]) / 2 + # w = (xy[:, 2] - xy[:, 0]) * reduction + # h = (xy[:, 3] - xy[:, 1]) * reduction + # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T + + # clip boxes + xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T) + targets = targets[i] + targets[:, 1:5] = xy[i] + + return img, targets + + +def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n) + # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates + + +def cutout(image, labels): + # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 + h, w = image.shape[:2] + + def bbox_ioa(box1, box2): + # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 + box2 = box2.transpose() + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + + # Intersection area + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ + (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 + + # Intersection over box2 area + return inter_area / box2_area + + # create random masks + scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction + for s in scales: + mask_h = random.randint(1, int(h * s)) + mask_w = random.randint(1, int(w * s)) + + # box + xmin = max(0, random.randint(0, w) - mask_w // 2) + ymin = max(0, random.randint(0, h) - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + + # return unobscured labels + if len(labels) and s > 0.03: + box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) + ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area + labels = labels[ioa < 0.60] # remove >60% obscured labels + + return labels + + +def create_folder(path='./new'): + # Create folder + if os.path.exists(path): + shutil.rmtree(path) # delete output folder + os.makedirs(path) # make new output folder + + +def flatten_recursive(path='../coco128'): + # Flatten a recursive directory by bringing all files to top level + new_path = Path(path + '_flat') + create_folder(new_path) + for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): + shutil.copyfile(file, new_path / Path(file).name) + + diff --git a/asone/detectors/yolor/utils/export.py b/asone/detectors/yolor/utils/export.py new file mode 100644 index 0000000000000000000000000000000000000000..a49c578bfe538306cd8370cd24c8f3edd63bf00d --- /dev/null +++ b/asone/detectors/yolor/utils/export.py @@ -0,0 +1,80 @@ +import argparse + +import torch +from asone.detectors.yolor.models.models import * +from asone.detectors.yolor.utils.google_utils import attempt_download + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path') + parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') + parser.add_argument('--batch-size', type=int, default=1, help='batch size') + parser.add_argument('--cfg', type=str, default='cfg/yolor_p6.cfg', help='*.cfg path') + opt = parser.parse_args() + opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand + # print(opt) + + # Input + img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection + + # Load PyTorch model + attempt_download(opt.weights) + # print(ad) + # model = Darknet(cfg, ).cuda() + model.load_state_dict(torch.load(opt.weights, map_location=device)['model']) + print(type(model)) + print("*"*50) + exit() + model.eval() + model.model[-1].export = True # set Detect() layer export=True + y = model(img) # dry run + + # print("-------------------") + # model = Darknet(cfg, imgsz).cuda() + # model.load_state_dict(torch.load(weights[0], map_location=device)['model']) + #model = attempt_load(weights, map_location=device) # load FP32 model + #imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size + # model.to(device).eval() + # TorchScript export + try: + print('\nStarting TorchScript export with torch %s...' % torch.__version__) + f = opt.weights.replace('.pt', '.torchscript.pt') # filename + ts = torch.jit.trace(model, img) + ts.save(f) + print('TorchScript export success, saved as %s' % f) + except Exception as e: + print('TorchScript export failure: %s' % e) + + # ONNX export + try: + import onnx + + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + f = opt.weights.replace('.pt', '.onnx') # filename + model.fuse() # only for ONNX + torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], + output_names=['classes', 'boxes'] if y is None else ['output']) + + # Checks + onnx_model = onnx.load(f) # load onnx model + onnx.checker.check_model(onnx_model) # check onnx model + print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model + print('ONNX export success, saved as %s' % f) + except Exception as e: + print('ONNX export failure: %s' % e) + + # CoreML export + try: + import coremltools as ct + + print('\nStarting CoreML export with coremltools %s...' % ct.__version__) + # convert model from torchscript and apply pixel scaling as per detect.py + model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) + f = opt.weights.replace('.pt', '.mlmodel') # filename + model.save(f) + print('CoreML export success, saved as %s' % f) + except Exception as e: + print('CoreML export failure: %s' % e) + + # Finish + print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') diff --git a/asone/detectors/yolor/utils/general.py b/asone/detectors/yolor/utils/general.py new file mode 100644 index 0000000000000000000000000000000000000000..99c9b1d430b5cf312c953402a73a44f4f0840ba3 --- /dev/null +++ b/asone/detectors/yolor/utils/general.py @@ -0,0 +1,449 @@ +# General utils + +import glob +import logging +import math +import os +import platform +import random +import re +import subprocess +import time +from pathlib import Path + +import cv2 +import matplotlib +import numpy as np +import torch +import yaml + +from asone.detectors.yolor.utils.google_utils import gsutil_getsize +from asone.detectors.yolor.utils.metrics import fitness +from asone.detectors.yolor.utils.torch_utils import init_torch_seeds + +# Set printoptions +torch.set_printoptions(linewidth=320, precision=5, profile='long') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +matplotlib.rc('font', **{'size': 11}) + +# Prevent OpenCV from multithreading (to use PyTorch DataLoader) +cv2.setNumThreads(0) + + +def set_logging(rank=-1): + logging.basicConfig( + format="%(message)s", + level=logging.INFO if rank in [-1, 0] else logging.WARN) + + +def init_seeds(seed=0): + random.seed(seed) + np.random.seed(seed) + init_torch_seeds(seed) + + +def get_latest_run(search_dir='.'): + # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(last_list, key=os.path.getctime) if last_list else '' + + +def check_git_status(): + # Suggest 'git pull' if repo is out of date + if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'): + s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8') + if 'Your branch is behind' in s: + print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n') + + +def check_img_size(img_size, s=32): + # Verify img_size is a multiple of stride s + new_size = make_divisible(img_size, int(s)) # ceil gs-multiple + if new_size != img_size: + print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size)) + return new_size + + +def check_file(file): + # Search for file if not found + if os.path.isfile(file) or file == '': + return file + else: + files = glob.glob('./**/' + file, recursive=True) # find file + assert len(files), 'File Not Found: %s' % file # assert file was found + assert len(files) == 1, "Multiple files match '%s', specify exact path: %s" % (file, files) # assert unique + return files[0] # return file + + +def check_dataset(dict): + # Download dataset if not found locally + val, s = dict.get('val'), dict.get('download') + if val and len(val): + val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + if not all(x.exists() for x in val): + print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) + if s and len(s): # download script + print('Downloading %s ...' % s) + if s.startswith('http') and s.endswith('.zip'): # URL + f = Path(s).name # filename + torch.hub.download_url_to_file(s, f) + r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip + else: # bash script + r = os.system(s) + print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value + else: + raise Exception('Dataset not found.') + + +def make_divisible(x, divisor): + # Returns x evenly divisible by divisor + return math.ceil(x / divisor) * divisor + + +def labels_to_class_weights(labels, nc=80): + # Get class weights (inverse frequency) from training labels + if labels[0] is None: # no labels loaded + return torch.Tensor() + + labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO + classes = labels[:, 0].astype(np.int) # labels = [class xywh] + weights = np.bincount(classes, minlength=nc) # occurrences per class + + # Prepend gridpoint count (for uCE training) + # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image + # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start + + weights[weights == 0] = 1 # replace empty bins with 1 + weights = 1 / weights # number of targets per class + weights /= weights.sum() # normalize + return torch.from_numpy(weights) + + +def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): + # Produces image weights based on class mAPs + n = len(labels) + class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)]) + image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) + # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample + return image_weights + + +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + return x + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, :4] /= gain + clip_coords(coords, img0_shape) + return coords + + +def clip_coords(boxes, img_shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + boxes[:, 0].clamp_(0, img_shape[1]) # x1 + boxes[:, 1].clamp_(0, img_shape[0]) # y1 + boxes[:, 2].clamp_(0, img_shape[1]) # x2 + boxes[:, 3].clamp_(0, img_shape[0]) # y2 + + +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, EIoU=False, ECIoU=False, eps=1e-9): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.T + + # Get the coordinates of bounding boxes + if x1y1x2y2: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: # transform from xywh to xyxy + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + # Intersection area + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + # Union Area + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + union = w1 * h1 + w2 * h2 - inter + eps + + iou = inter / union + if GIoU or DIoU or CIoU or EIoU or ECIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height + if CIoU or DIoU or EIoU or ECIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared + if DIoU: + return iou - rho2 / c2 # DIoU + elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / ((1 + eps) - iou + v) + return iou - (rho2 / c2 + v * alpha) # CIoU + elif EIoU: # Efficient IoU https://arxiv.org/abs/2101.08158 + rho3 = (w1-w2) **2 + c3 = cw ** 2 + eps + rho4 = (h1-h2) **2 + c4 = ch ** 2 + eps + return iou - rho2 / c2 - rho3 / c3 - rho4 / c4 # EIoU + elif ECIoU: + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / ((1 + eps) - iou + v) + rho3 = (w1-w2) **2 + c3 = cw ** 2 + eps + rho4 = (h1-h2) **2 + c4 = ch ** 2 + eps + return iou - v * alpha - rho2 / c2 - rho3 / c3 - rho4 / c4 # ECIoU + else: # GIoU https://arxiv.org/pdf/1902.09630.pdf + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU + else: + return iou # IoU + + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + + +def wh_iou(wh1, wh2): + # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 + wh1 = wh1[:, None] # [N,1,2] + wh2 = wh2[None] # [1,M,2] + inter = torch.min(wh1, wh2).prod(2) # [N,M] + return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) + + +def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): + """Performs Non-Maximum Suppression (NMS) on inference results + + Returns: + detections with shape: nx6 (x1, y1, x2, y2, conf, cls) + """ + + nc = prediction[0].shape[1] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_det = 300 # maximum number of detections per image + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) + + t = time.time() + output = [torch.zeros(0, 6)] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # If none remain process next image + n = x.shape[0] # number of boxes + if not n: + continue + + # Sort by confidence + # x = x[x[:, 4].argsort(descending=True)] + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torch.ops.torchvision.nms(boxes, scores, iou_thres) + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + break # time limit exceeded + + return output + + +def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer() + # Strip optimizer from 'f' to finalize training, optionally save as 's' + x = torch.load(f, map_location=torch.device('cpu')) + x['optimizer'] = None + x['training_results'] = None + x['epoch'] = -1 + #x['model'].half() # to FP16 + #for p in x['model'].parameters(): + # p.requires_grad = False + torch.save(x, s or f) + mb = os.path.getsize(s or f) / 1E6 # filesize + print('Optimizer stripped from %s,%s %.1fMB' % (f, (' saved as %s,' % s) if s else '', mb)) + + +def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''): + # Print mutation results to evolve.txt (for use with train.py --evolve) + a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys + b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values + c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3) + print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c)) + + if bucket: + url = 'gs://%s/evolve.txt' % bucket + if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0): + os.system('gsutil cp %s .' % url) # download evolve.txt if larger than local + + with open('evolve.txt', 'a') as f: # append result + f.write(c + b + '\n') + x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows + x = x[np.argsort(-fitness(x))] # sort + np.savetxt('evolve.txt', x, '%10.3g') # save sort by fitness + + # Save yaml + for i, k in enumerate(hyp.keys()): + hyp[k] = float(x[0, i + 7]) + with open(yaml_file, 'w') as f: + results = tuple(x[0, :7]) + c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3) + f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n') + yaml.dump(hyp, f, sort_keys=False) + + if bucket: + os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket)) # upload + + +def apply_classifier(x, model, img, im0): + # applies a second stage classifier to yolo outputs + im0 = [im0] if isinstance(im0, np.ndarray) else im0 + for i, d in enumerate(x): # per image + if d is not None and len(d): + d = d.clone() + + # Reshape and pad cutouts + b = xyxy2xywh(d[:, :4]) # boxes + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square + b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad + d[:, :4] = xywh2xyxy(b).long() + + # Rescale boxes from img_size to im0 size + scale_coords(img.shape[2:], d[:, :4], im0[i].shape) + + # Classes + pred_cls1 = d[:, 5].long() + ims = [] + for j, a in enumerate(d): # per item + cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] + im = cv2.resize(cutout, (224, 224)) # BGR + # cv2.imwrite('test%i.jpg' % j, cutout) + + im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 + im /= 255.0 # 0 - 255 to 0.0 - 1.0 + ims.append(im) + + pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction + x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections + + return x + + +def increment_path(path, exist_ok=True, sep=''): + # Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc. + path = Path(path) # os-agnostic + if (path.exists() and exist_ok) or (not path.exists()): + return str(path) + else: + dirs = glob.glob(f"{path}{sep}*") # similar paths + matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] + i = [int(m.groups()[0]) for m in matches if m] # indices + n = max(i) + 1 if i else 2 # increment number + return f"{path}{sep}{n}" # update path diff --git a/asone/detectors/yolor/utils/google_utils.py b/asone/detectors/yolor/utils/google_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7f476a1949f5868b31afb319fdc7b8d1977dc37b --- /dev/null +++ b/asone/detectors/yolor/utils/google_utils.py @@ -0,0 +1,132 @@ +# Google utils: https://cloud.google.com/storage/docs/reference/libraries + +import os +import platform +import subprocess +import time +from pathlib import Path + +import torch +import torch.nn as nn + + +def gsutil_getsize(url=''): + # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du + s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') + return eval(s.split(' ')[0]) if len(s) else 0 # bytes + + +def attempt_download(weights): + # Attempt to download pretrained weights if not found locally + weights = weights.strip().replace("'", '') + file = Path(weights).name + + msg = weights + ' missing, try downloading from https://github.com/WongKinYiu/yolor/releases/' + models = ['yolor_p6.pt', 'yolor_w6.pt'] # available models + + if file in models and not os.path.isfile(weights): + + try: # GitHub + url = 'https://github.com/WongKinYiu/yolor/releases/download/v1.0/' + file + print('Downloading %s to %s...' % (url, weights)) + torch.hub.download_url_to_file(url, weights) + assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check + except Exception as e: # GCP + print('ERROR: Download failure.') + print('') + + +def attempt_load(weights, map_location=None): + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + attempt_download(w) + model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model + + if len(model) == 1: + return model[-1] # return model + else: + print('Ensemble created with %s\n' % weights) + for k in ['names', 'stride']: + setattr(model, k, getattr(model[-1], k)) + return model # return ensemble + + +def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'): + # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() + t = time.time() + + print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') + os.remove(name) if os.path.exists(name) else None # remove existing + os.remove('cookie') if os.path.exists('cookie') else None + + # Attempt file download + out = "NUL" if platform.system() == "Windows" else "/dev/null" + os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) + if os.path.exists('cookie'): # large file + s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) + else: # small file + s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) + r = os.system(s) # execute, capture return + os.remove('cookie') if os.path.exists('cookie') else None + + # Error check + if r != 0: + os.remove(name) if os.path.exists(name) else None # remove partial + print('Download error ') # raise Exception('Download error') + return r + + # Unzip if archive + if name.endswith('.zip'): + print('unzipping... ', end='') + os.system('unzip -q %s' % name) # unzip + os.remove(name) # remove zip to free space + + print('Done (%.1fs)' % (time.time() - t)) + return r + + +def get_token(cookie="./cookie"): + with open(cookie) as f: + for line in f: + if "download" in line: + return line.split()[-1] + return "" + +class Ensemble(nn.ModuleList): + # Ensemble of models + def __init__(self): + super().__init__() + + def forward(self, x, augment=False, profile=False, visualize=False): + y = [module(x, augment, profile, visualize)[0] for module in self] + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 1) # nms ensemble + return y, None # inference, train output +# def upload_blob(bucket_name, source_file_name, destination_blob_name): +# # Uploads a file to a bucket +# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python +# +# storage_client = storage.Client() +# bucket = storage_client.get_bucket(bucket_name) +# blob = bucket.blob(destination_blob_name) +# +# blob.upload_from_filename(source_file_name) +# +# print('File {} uploaded to {}.'.format( +# source_file_name, +# destination_blob_name)) +# +# +# def download_blob(bucket_name, source_blob_name, destination_file_name): +# # Uploads a blob from a bucket +# storage_client = storage.Client() +# bucket = storage_client.get_bucket(bucket_name) +# blob = bucket.blob(source_blob_name) +# +# blob.download_to_filename(destination_file_name) +# +# print('Blob {} downloaded to {}.'.format( +# source_blob_name, +# destination_file_name)) diff --git a/asone/detectors/yolor/utils/layers.py b/asone/detectors/yolor/utils/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..c0665b55852b8ccfe08ddf28e1cfe8cc2b7fb746 --- /dev/null +++ b/asone/detectors/yolor/utils/layers.py @@ -0,0 +1,532 @@ +from asone.detectors.yolor.utils.general import * + +import torch +from torch import nn +import torch.nn.functional as F +try: + from mish_cuda import MishCuda as Mish + +except: + class Mish(nn.Module): # https://github.com/digantamisra98/Mish + def forward(self, x): + return x * F.softplus(x).tanh() + +try: + from pytorch_wavelets import DWTForward, DWTInverse + + class DWT(nn.Module): + def __init__(self): + super(DWT, self).__init__() + self.xfm = DWTForward(J=1, wave='db1', mode='zero') + + def forward(self, x): + b,c,w,h = x.shape + yl, yh = self.xfm(x) + return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) + +except: # using Reorg instead + class DWT(nn.Module): + def forward(self, x): + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +class Reorg(nn.Module): + def forward(self, x): + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +def make_divisible(v, divisor): + # Function ensures all layers have a channel number that is divisible by 8 + # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + return math.ceil(v / divisor) * divisor + + +class Flatten(nn.Module): + # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions + def forward(self, x): + return x.view(x.size(0), -1) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class FeatureConcat(nn.Module): + def __init__(self, layers): + super(FeatureConcat, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] + + +class FeatureConcat2(nn.Module): + def __init__(self, layers): + super(FeatureConcat2, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach()], 1) + + +class FeatureConcat3(nn.Module): + def __init__(self, layers): + super(FeatureConcat3, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach(), outputs[self.layers[2]].detach()], 1) + + +class FeatureConcat_l(nn.Module): + def __init__(self, layers): + super(FeatureConcat_l, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[i][:,:outputs[i].shape[1]//2,:,:] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]][:,:outputs[self.layers[0]].shape[1]//2,:,:] + + +class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers, weight=False): + super(WeightedFeatureFusion, self).__init__() + self.layers = layers # layer indices + self.weight = weight # apply weights boolean + self.n = len(layers) + 1 # number of layers + if weight: + self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights + + def forward(self, x, outputs): + # Weights + if self.weight: + w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) + x = x * w[0] + + # Fusion + nx = x.shape[1] # input channels + for i in range(self.n - 1): + a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add + na = a.shape[1] # feature channels + + # Adjust channels + if nx == na: # same shape + x = x + a + elif nx > na: # slice input + x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a + else: # slice feature + x = x + a[:, :nx] + + return x + + +class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 + def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): + super(MixConv2d, self).__init__() + + groups = len(k) + if method == 'equal_ch': # equal channels per group + i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices + ch = [(i == g).sum() for g in range(groups)] + else: # 'equal_params': equal parameter count per group + b = [out_ch] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, + out_channels=ch[g], + kernel_size=k[g], + stride=stride, + padding=k[g] // 2, # 'same' pad + dilation=dilation, + bias=bias) for g in range(groups)]) + + def forward(self, x): + return torch.cat([m(x) for m in self.m], 1) + + +# Activation functions below ------------------------------------------------------------------------------------------- +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x * torch.sigmoid(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) # sigmoid(ctx) + return grad_output * (sx * (1 + x * (1 - sx))) + + +class MishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +class MemoryEfficientMish(nn.Module): + def forward(self, x): + return MishImplementation.apply(x) + + +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf + def forward(self, x): + return x * F.hardtanh(x + 3, 0., 6., True) / 6. + + +class DeformConv2d(nn.Module): + def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False): + """ + Args: + modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2). + """ + super(DeformConv2d, self).__init__() + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.zero_padding = nn.ZeroPad2d(padding) + self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) + + self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.p_conv.weight, 0) + self.p_conv.register_backward_hook(self._set_lr) + + self.modulation = modulation + if modulation: + self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.m_conv.weight, 0) + self.m_conv.register_backward_hook(self._set_lr) + + @staticmethod + def _set_lr(module, grad_input, grad_output): + grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input))) + grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output))) + + def forward(self, x): + offset = self.p_conv(x) + if self.modulation: + m = torch.sigmoid(self.m_conv(x)) + + dtype = offset.data.type() + ks = self.kernel_size + N = offset.size(1) // 2 + + if self.padding: + x = self.zero_padding(x) + + # (b, 2N, h, w) + p = self._get_p(offset, dtype) + + # (b, h, w, 2N) + p = p.contiguous().permute(0, 2, 3, 1) + q_lt = p.detach().floor() + q_rb = q_lt + 1 + + q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1) + q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1) + + # clip p + p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) + + # bilinear kernel (b, h, w, N) + g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) + g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) + g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) + g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) + + # (b, c, h, w, N) + x_q_lt = self._get_x_q(x, q_lt, N) + x_q_rb = self._get_x_q(x, q_rb, N) + x_q_lb = self._get_x_q(x, q_lb, N) + x_q_rt = self._get_x_q(x, q_rt, N) + + # (b, c, h, w, N) + x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ + g_rb.unsqueeze(dim=1) * x_q_rb + \ + g_lb.unsqueeze(dim=1) * x_q_lb + \ + g_rt.unsqueeze(dim=1) * x_q_rt + + # modulation + if self.modulation: + m = m.contiguous().permute(0, 2, 3, 1) + m = m.unsqueeze(dim=1) + m = torch.cat([m for _ in range(x_offset.size(1))], dim=1) + x_offset *= m + + x_offset = self._reshape_x_offset(x_offset, ks) + out = self.conv(x_offset) + + return out + + def _get_p_n(self, N, dtype): + p_n_x, p_n_y = torch.meshgrid( + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1)) + # (2N, 1) + p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0) + p_n = p_n.view(1, 2*N, 1, 1).type(dtype) + + return p_n + + def _get_p_0(self, h, w, N, dtype): + p_0_x, p_0_y = torch.meshgrid( + torch.arange(1, h*self.stride+1, self.stride), + torch.arange(1, w*self.stride+1, self.stride)) + p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype) + + return p_0 + + def _get_p(self, offset, dtype): + N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) + + # (1, 2N, 1, 1) + p_n = self._get_p_n(N, dtype) + # (1, 2N, h, w) + p_0 = self._get_p_0(h, w, N, dtype) + p = p_0 + p_n + offset + return p + + def _get_x_q(self, x, q, N): + b, h, w, _ = q.size() + padded_w = x.size(3) + c = x.size(1) + # (b, c, h*w) + x = x.contiguous().view(b, c, -1) + + # (b, h, w, N) + index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y + # (b, c, h*w*N) + index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) + + x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) + + return x_offset + + @staticmethod + def _reshape_x_offset(x_offset, ks): + b, c, h, w, N = x_offset.size() + x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) + x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) + + return x_offset + + +class GAP(nn.Module): + def __init__(self): + super(GAP, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + def forward(self, x): + #b, c, _, _ = x.size() + return self.avg_pool(x)#.view(b, c) + + +class Silence(nn.Module): + def __init__(self): + super(Silence, self).__init__() + def forward(self, x): + return x + + +class ScaleChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ScaleChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return x.expand_as(a) * a + + +class ShiftChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ShiftChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) + x + + +class ShiftChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ShiftChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) + x + + +class ControlChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ControlChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) * x + + +class ControlChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ControlChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) * x + + +class AlternateChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(AlternateChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return torch.cat([a.expand_as(x), x], dim=1) + + +class AlternateChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(AlternateChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return torch.cat([a.expand_as(x), x], dim=1) + + +class SelectChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(SelectChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.sigmoid().expand_as(x) * x + + +class SelectChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(SelectChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.sigmoid().expand_as(x) * x + + +class ScaleSpatial(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ScaleSpatial, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return x * a + + +class ImplicitA(nn.Module): + def __init__(self, channel): + super(ImplicitA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class ImplicitC(nn.Module): + def __init__(self, channel): + super(ImplicitC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class ImplicitM(nn.Module): + def __init__(self, channel): + super(ImplicitM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit + + + +class Implicit2DA(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class Implicit2DC(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class Implicit2DM(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit + + + \ No newline at end of file diff --git a/asone/detectors/yolor/utils/loss.py b/asone/detectors/yolor/utils/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0701646c238f4357ab471e6afb75bf2e935a74d6 --- /dev/null +++ b/asone/detectors/yolor/utils/loss.py @@ -0,0 +1,173 @@ +# Loss functions + +import torch +import torch.nn as nn + +from asone.detectors.yolor.utils.general import bbox_iou +from asone.detectors.yolor.utils.torch_utils import is_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class BCEBlurWithLogitsLoss(nn.Module): + # BCEwithLogitLoss() with reduced missing label effects. + def __init__(self, alpha=0.05): + super(BCEBlurWithLogitsLoss, self).__init__() + self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() + self.alpha = alpha + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + pred = torch.sigmoid(pred) # prob from logits + dx = pred - true # reduce only missing label effects + # dx = (pred - true).abs() # reduce missing label and false label effects + alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) + loss *= alpha_factor + return loss.mean() + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super(FocalLoss, self).__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = 'none' # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == 'mean': + return loss.mean() + elif self.reduction == 'sum': + return loss.sum() + else: # 'none' + return loss + + +def compute_loss(p, targets, model): # predictions, targets, model + device = targets.device + #print(device) + lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) + tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + cp, cn = smooth_BCE(eps=0.0) + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + # Losses + nt = 0 # number of targets + no = len(p) # number of outputs + balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 + balance = [4.0, 1.0, 0.5, 0.4, 0.1] if no == 5 else balance + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + + n = b.shape[0] # number of targets + if n: + nt += n # cumulative targets + ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + + # Regression + pxy = ps[:, :2].sigmoid() * 2. - 0.5 + pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box + iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) + lbox += (1.0 - iou).mean() # iou loss + + # Objectness + tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio + + # Classification + if model.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(ps[:, 5:], cn, device=device) # targets + t[range(n), tcls[i]] = cp + lcls += BCEcls(ps[:, 5:], t) # BCE + + # Append targets to text file + # with open('targets.txt', 'a') as file: + # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] + + lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss + + s = 3 / no # output count scaling + lbox *= h['box'] * s + lobj *= h['obj'] * s * (1.4 if no >= 4 else 1.) + lcls *= h['cls'] * s + bs = tobj.shape[0] # batch size + + loss = lbox + lobj + lcls + return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() + + +def build_targets(p, targets, model): + nt = targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch = [], [], [], [] + gain = torch.ones(6, device=targets.device) # normalized to gridspace gain + off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() # overlap offsets + + g = 0.5 # offset + multi_gpu = is_parallel(model) + for i, jj in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers): + # get number of grid points and anchor vec for this yolo layer + anchors = model.module.module_list[jj].anchor_vec if multi_gpu else model.module_list[jj].anchor_vec + gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + a, t, offsets = [], targets * gain, 0 + if nt: + na = anchors.shape[0] # number of anchors + at = torch.arange(na).view(na, 1).repeat(1, nt) # anchor tensor, same as .repeat_interleave(nt) + r = t[None, :, 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) + a, t = at[j], t.repeat(na, 1, 1)[j] # filter + + # overlaps + gxy = t[:, 2:4] # grid xy + z = torch.zeros_like(gxy) + j, k = ((gxy % 1. < g) & (gxy > 1.)).T + l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T + a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0) + offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g + + # Define + b, c = t[:, :2].long().T # image, class + gxy = t[:, 2:4] # grid xy + gwh = t[:, 4:6] # grid wh + gij = (gxy - offsets).long() + gi, gj = gij.T # grid xy indices + + # Append + #indices.append((b, a, gj, gi)) # image, anchor, grid indices + indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + + return tcls, tbox, indices, anch + diff --git a/asone/detectors/yolor/utils/metrics.py b/asone/detectors/yolor/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..004090b0b583d3786811c1766e11a4748c86522d --- /dev/null +++ b/asone/detectors/yolor/utils/metrics.py @@ -0,0 +1,140 @@ +# Model validation metrics + +import matplotlib.pyplot as plt +import numpy as np + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_p(x): + # Model fitness as a weighted combination of metrics + w = [1.0, 0.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_r(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 1.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_ap50(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_ap(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_f(x): + # Model fitness as a weighted combination of metrics + #w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return ((x[:, 0]*x[:, 1])/(x[:, 0]+x[:, 1])) + + +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (nparray, nx1 or nx10). + conf: Objectness value from 0-1 (nparray). + pred_cls: Predicted object classes (nparray). + target_cls: True object classes (nparray). + plot: Plot precision-recall curve at mAP@0.5 + fname: Plot filename + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + px, py = np.linspace(0, 1, 1000), [] # for plotting + pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 + s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) + ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = (target_cls == c).sum() # number of labels + n_p = i.sum() # number of predictions + + if n_p == 0 or n_l == 0: + continue + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + 1e-16) # recall curve + r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Compute F1 score (harmonic mean of precision and recall) + f1 = 2 * p * r / (p + r + 1e-16) + + if plot: + py = np.stack(py, axis=1) + fig, ax = plt.subplots(1, 1, figsize=(5, 5)) + ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision) + ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + plt.legend() + fig.tight_layout() + fig.savefig(fname, dpi=200) + + return p, r, ap, f1, unique_classes.astype('int32') + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rbgirshick/py-faster-rcnn. + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec diff --git a/asone/detectors/yolor/utils/parse_config.py b/asone/detectors/yolor/utils/parse_config.py new file mode 100644 index 0000000000000000000000000000000000000000..d6cbfdd81f54c7017bcd35bfeccca7f6578f25ae --- /dev/null +++ b/asone/detectors/yolor/utils/parse_config.py @@ -0,0 +1,71 @@ +import os + +import numpy as np + + +def parse_model_cfg(path): + # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' + if not path.endswith('.cfg'): # add .cfg suffix if omitted + path += '.cfg' + if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted + path = 'cfg' + os.sep + path + + with open(path, 'r') as f: + lines = f.read().split('\n') + lines = [x for x in lines if x and not x.startswith('#')] + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + mdefs = [] # module definitions + for line in lines: + if line.startswith('['): # This marks the start of a new block + mdefs.append({}) + mdefs[-1]['type'] = line[1:-1].rstrip() + if mdefs[-1]['type'] == 'convolutional': + mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) + + else: + key, val = line.split("=") + key = key.rstrip() + + if key == 'anchors': # return nparray + mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors + elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array + mdefs[-1][key] = [int(x) for x in val.split(',')] + else: + val = val.strip() + if val.isnumeric(): # return int or float + mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) + else: + mdefs[-1][key] = val # return string + + # Check all fields are supported + supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', + 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', + 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', + 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'atoms', 'na', 'nc'] + + f = [] # fields + for x in mdefs[1:]: + [f.append(k) for k in x if k not in f] + u = [x for x in f if x not in supported] # unsupported fields + assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) + + return mdefs + + +def parse_data_cfg(path): + # Parses the data configuration file + if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted + path = 'data' + os.sep + path + + with open(path, 'r') as f: + lines = f.readlines() + + options = dict() + for line in lines: + line = line.strip() + if line == '' or line.startswith('#'): + continue + key, val = line.split('=') + options[key.strip()] = val.strip() + + return options diff --git a/asone/detectors/yolor/utils/plots.py b/asone/detectors/yolor/utils/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..88b9fc802b9b0dccfee7a2759b1f368800a0e150 --- /dev/null +++ b/asone/detectors/yolor/utils/plots.py @@ -0,0 +1,380 @@ +# Plotting utils + +import glob +import math +import os +import random +from copy import copy +from pathlib import Path + +import cv2 +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import torch +import yaml +from PIL import Image +from scipy.signal import butter, filtfilt + +from asone.detectors.yolor.utils.general import xywh2xyxy, xyxy2xywh +from asone.detectors.yolor.utils.metrics import fitness + +# Settings +matplotlib.use('Agg') # for writing to files only + + +def color_list(): + # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb + def hex2rgb(h): + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']] + + +def hist2d(x, y, n=100): + # 2d histogram used in labels.png and evolve.png + xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) + hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) + xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) + yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) + return np.log(hist[xidx, yidx]) + + +def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): + # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy + def butter_lowpass(cutoff, fs, order): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + return butter(order, normal_cutoff, btype='low', analog=False) + + b, a = butter_lowpass(cutoff, fs, order=order) + return filtfilt(b, a, data) # forward-backward filter + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + + +def plot_wh_methods(): # from utils.general import *; plot_wh_methods() + # Compares the two methods for width-height anchor multiplication + # https://github.com/ultralytics/yolov3/issues/168 + x = np.arange(-4.0, 4.0, .1) + ya = np.exp(x) + yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2 + + fig = plt.figure(figsize=(6, 3), dpi=150) + plt.plot(x, ya, '.-', label='YOLO') + plt.plot(x, yb ** 2, '.-', label='YOLO ^2') + plt.plot(x, yb ** 1.6, '.-', label='YOLO ^1.6') + plt.xlim(left=-4, right=4) + plt.ylim(bottom=0, top=6) + plt.xlabel('input') + plt.ylabel('output') + plt.grid() + plt.legend() + fig.tight_layout() + fig.savefig('comparison.png', dpi=200) + + +def output_to_target(output, width, height): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] + if isinstance(output, torch.Tensor): + output = output.cpu().numpy() + + targets = [] + for i, o in enumerate(output): + if o is not None: + for pred in o: + box = pred[:4] + w = (box[2] - box[0]) / width + h = (box[3] - box[1]) / height + x = box[0] / width + w / 2 + y = box[1] / height + h / 2 + conf = pred[4] + cls = int(pred[5]) + + targets.append([i, cls, x, y, w, h, conf]) + + return np.array(targets) + + +def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): + # Plot image grid with labels + + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + + # un-normalise + if np.max(images[0]) <= 1: + images *= 255 + + tl = 3 # line thickness + tf = max(tl - 1, 1) # font thickness + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Check if we should resize + scale_factor = max_size / max(h, w) + if scale_factor < 1: + h = math.ceil(scale_factor * h) + w = math.ceil(scale_factor * w) + + colors = color_list() # list of colors + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, img in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + + block_x = int(w * (i // ns)) + block_y = int(h * (i % ns)) + + img = img.transpose(1, 2, 0) + if scale_factor < 1: + img = cv2.resize(img, (w, h)) + + mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + if len(targets) > 0: + image_targets = targets[targets[:, 0] == i] + boxes = xywh2xyxy(image_targets[:, 2:6]).T + classes = image_targets[:, 1].astype('int') + labels = image_targets.shape[1] == 6 # labels if no conf column + conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) + + boxes[[0, 2]] *= w + boxes[[0, 2]] += block_x + boxes[[1, 3]] *= h + boxes[[1, 3]] += block_y + for j, box in enumerate(boxes.T): + cls = int(classes[j]) + color = colors[cls % len(colors)] + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) + plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + + # Draw image filename labels + if paths: + label = Path(paths[i]).name[:40] # trim to 40 char + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, + lineType=cv2.LINE_AA) + + # Image border + cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) + + if fname: + r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size + mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) + # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save + Image.fromarray(mosaic).save(fname) # PIL save + return mosaic + + +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): + # Plot LR simulating training for full epochs + optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + y = [] + for _ in range(epochs): + scheduler.step() + y.append(optimizer.param_groups[0]['lr']) + plt.plot(y, '.-', label='LR') + plt.xlabel('epoch') + plt.ylabel('LR') + plt.grid() + plt.xlim(0, epochs) + plt.ylim(0) + plt.tight_layout() + plt.savefig(Path(save_dir) / 'LR.png', dpi=200) + + +def plot_test_txt(): # from utils.general import *; plot_test() + # Plot test.txt histograms + x = np.loadtxt('test.txt', dtype=np.float32) + box = xyxy2xywh(x[:, :4]) + cx, cy = box[:, 0], box[:, 1] + + fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) + ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) + ax.set_aspect('equal') + plt.savefig('hist2d.png', dpi=300) + + fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) + ax[0].hist(cx, bins=600) + ax[1].hist(cy, bins=600) + plt.savefig('hist1d.png', dpi=200) + + +def plot_targets_txt(): # from utils.general import *; plot_targets_txt() + # Plot targets.txt histograms + x = np.loadtxt('targets.txt', dtype=np.float32).T + s = ['x targets', 'y targets', 'width targets', 'height targets'] + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + for i in range(4): + ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std())) + ax[i].legend() + ax[i].set_title(s[i]) + plt.savefig('targets.jpg', dpi=200) + + +def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt() + # Plot study.txt generated by test.py + fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) + ax = ax.ravel() + + fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) + for f in ['study/study_coco_yolo%s.txt' % x for x in ['s', 'm', 'l', 'x']]: + y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T + x = np.arange(y.shape[1]) if x is None else np.array(x) + s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] + for i in range(7): + ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) + ax[i].set_title(s[i]) + + j = y[3].argmax() + 1 + ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, + label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO')) + + ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], + 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') + + ax2.grid() + ax2.set_xlim(0, 30) + ax2.set_ylim(28, 50) + ax2.set_yticks(np.arange(30, 55, 5)) + ax2.set_xlabel('GPU Speed (ms/img)') + ax2.set_ylabel('COCO AP val') + ax2.legend(loc='lower right') + plt.savefig('study_mAP_latency.png', dpi=300) + plt.savefig(f.replace('.txt', '.png'), dpi=300) + + +def plot_labels(labels, save_dir=''): + # plot dataset labels + c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes + nc = int(c.max() + 1) # number of classes + + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + ax[0].set_xlabel('classes') + ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet') + ax[1].set_xlabel('x') + ax[1].set_ylabel('y') + ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') + ax[2].set_xlabel('width') + ax[2].set_ylabel('height') + plt.savefig(Path(save_dir) / 'labels.png', dpi=200) + plt.close() + + # seaborn correlogram + try: + import seaborn as sns + import pandas as pd + x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) + sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o', + plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02), + diag_kws=dict(bins=50)) + plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200) + plt.close() + except Exception as e: + pass + + +def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution() + # Plot hyperparameter evolution results in evolve.txt + with open(yaml_file) as f: + hyp = yaml.load(f, Loader=yaml.FullLoader) + x = np.loadtxt('evolve.txt', ndmin=2) + f = fitness(x) + # weights = (f - f.min()) ** 2 # for weighted results + plt.figure(figsize=(10, 12), tight_layout=True) + matplotlib.rc('font', **{'size': 8}) + for i, (k, v) in enumerate(hyp.items()): + y = x[:, i + 7] + # mu = (y * weights).sum() / weights.sum() # best weighted result + mu = y[f.argmax()] # best single result + plt.subplot(6, 5, i + 1) + plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') + plt.plot(mu, f.max(), 'k+', markersize=15) + plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters + if i % 5 != 0: + plt.yticks([]) + print('%15s: %.3g' % (k, mu)) + plt.savefig('evolve.png', dpi=200) + print('\nPlot saved as evolve.png') + + +def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay() + # Plot training 'results*.txt', overlaying train and val losses + s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends + t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles + for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')): + results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T + n = results.shape[1] # number of rows + x = range(start, min(stop, n) if stop else n) + fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True) + ax = ax.ravel() + for i in range(5): + for j in [i, i + 5]: + y = results[j, x] + ax[i].plot(x, y, marker='.', label=s[j]) + # y_smooth = butter_lowpass_filtfilt(y) + # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j]) + + ax[i].set_title(t[i]) + ax[i].legend() + ax[i].set_ylabel(f) if i == 0 else None # add filename + fig.savefig(f.replace('.txt', '.png'), dpi=200) + + +def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): + # from utils.general import *; plot_results(save_dir='runs/train/exp0') + # Plot training 'results*.txt' + fig, ax = plt.subplots(2, 5, figsize=(12, 6)) + ax = ax.ravel() + s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', + 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] + if bucket: + # os.system('rm -rf storage.googleapis.com') + # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] + files = ['%g.txt' % x for x in id] + c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/%g.txt' % (bucket, x) for x in id) + os.system(c) + else: + files = glob.glob(str(Path(save_dir) / '*.txt')) + glob.glob('../../Downloads/results*.txt') + assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T + n = results.shape[1] # number of rows + x = range(start, min(stop, n) if stop else n) + for i in range(10): + y = results[i, x] + if i in [0, 1, 2, 5, 6, 7]: + y[y == 0] = np.nan # don't show zero loss values + # y /= y[0] # normalize + label = labels[fi] if len(labels) else Path(f).stem + ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6) + ax[i].set_title(s[i]) + # if i in [5, 6, 7]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print('Warning: Plotting error for %s; %s' % (f, e)) + + fig.tight_layout() + ax[1].legend() + fig.savefig(Path(save_dir) / 'results.png', dpi=200) diff --git a/asone/detectors/yolor/utils/torch_utils.py b/asone/detectors/yolor/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4d07baa9f06de6b32eb79ab5034f16d094aa6d67 --- /dev/null +++ b/asone/detectors/yolor/utils/torch_utils.py @@ -0,0 +1,240 @@ +# PyTorch utils + +import logging +import math +import os +import time +from contextlib import contextmanager +from copy import deepcopy + +import torch +import torch.backends.cudnn as cudnn +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +logger = logging.getLogger(__name__) + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + """ + Decorator to make all processes in distributed training wait for each local_master to do something. + """ + if local_rank not in [-1, 0]: + torch.distributed.barrier() + yield + if local_rank == 0: + torch.distributed.barrier() + + +def init_torch_seeds(seed=0): + # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html + torch.manual_seed(seed) + if seed == 0: # slower, more reproducible + cudnn.deterministic = True + cudnn.benchmark = False + else: # faster, less reproducible + cudnn.deterministic = False + cudnn.benchmark = True + + +def select_device(device='', batch_size=None): + # device = 'cpu' or '0' or '0,1,2,3' + cpu_request = device.lower() == 'cpu' + if device and not cpu_request: # if device requested other than 'cpu' + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable + assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity + + cuda = False if cpu_request else torch.cuda.is_available() + if cuda: + c = 1024 ** 2 # bytes to MB + ng = torch.cuda.device_count() + if ng > 1 and batch_size: # check that batch_size is compatible with device_count + assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) + x = [torch.cuda.get_device_properties(i) for i in range(ng)] + s = f'Using torch {torch.__version__} ' + for i in range(0, ng): + if i == 1: + s = ' ' * len(s) + logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c)) + else: + logger.info(f'Using torch {torch.__version__} CPU') + + logger.info('') # skip a line + return torch.device('cuda:0' if cuda else 'cpu') + + +def time_synchronized(): + torch.cuda.synchronize() if torch.cuda.is_available() else None + return time.time() + + +def is_parallel(model): + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: + m.inplace = True + + +def find_modules(model, mclass=nn.Conv2d): + # Finds layer indices matching module class 'mclass' + return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + + +def sparsity(model): + # Return global model sparsity + a, b = 0., 0. + for p in model.parameters(): + a += p.numel() + b += (p == 0).sum() + return b / a + + +def prune(model, amount=0.3): + # Prune model to requested global sparsity + import torch.nn.utils.prune as prune + print('Pruning model... ', end='') + for name, m in model.named_modules(): + if isinstance(m, nn.Conv2d): + prune.l1_unstructured(m, name='weight', amount=amount) # prune + prune.remove(m, 'weight') # make permanent + print(' %.3g global sparsity' % sparsity(model)) + + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) + + # prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def model_info(model, verbose=False, img_size=640): + # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + if verbose: + print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + print('%5g %40s %9s %12g %20s %10.3g %10.3g' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + + try: # FLOPS + from thop import profile + flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, img_size, img_size),), verbose=False)[0] / 1E9 * 2 + img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float + fs = ', %.9f GFLOPS' % (flops) # 640x640 FLOPS + except (ImportError, Exception): + fs = '' + + logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + + +def load_classifier(name='resnet101', n=2): + # Loads a pretrained model reshaped to n-class output + model = torchvision.models.__dict__[name](pretrained=True) + + # ResNet model properties + # input_size = [3, 224, 224] + # input_space = 'RGB' + # input_range = [0, 1] + # mean = [0.485, 0.456, 0.406] + # std = [0.229, 0.224, 0.225] + + # Reshape output to n classes + filters = model.fc.weight.shape[1] + model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) + model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) + model.fc.out_features = n + return model + + +def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio + # scales img(bs,3,y,x) by ratio + if ratio == 1.0: + return img + else: + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + gs = 32 # (pixels) grid size + h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +class ModelEMA: + """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + # Create EMA + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA + # if next(model.parameters()).device.type != 'cpu': + # self.ema.half() # FP16 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1. - d) * msd[k].detach() + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) diff --git a/asone/detectors/yolor/utils/yolor_utils.py b/asone/detectors/yolor/utils/yolor_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7de2d855b5f5d01d97f8e455978f8667d500e477 --- /dev/null +++ b/asone/detectors/yolor/utils/yolor_utils.py @@ -0,0 +1,206 @@ +import torch +import torchvision +import time +import numpy as np +import cv2 + + +class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +# Create a list of colors for each class where each color is a tuple of 3 integer values +rng = np.random.default_rng(3) +colors = rng.uniform(0, 255, size=(len(class_names), 3)) + +def box_area(box): + # box = xyxy(4,n) + return (box[2] - box[0]) * (box[3] - box[1]) + + +def box_iou(box1, box2, eps=1e-7): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps) + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + # prediction = torch.Tensor(prediction) + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.3 + 0.03 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + # print(type(x)) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + # LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, :4] /= gain + clip_coords(coords, img0_shape) + return coords + + +def clip_coords(boxes, shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + diff --git a/asone/detectors/yolor/yolor_detector.py b/asone/detectors/yolor/yolor_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..bea92db8c023f9aa01bbd35d3e121a698dd9da42 --- /dev/null +++ b/asone/detectors/yolor/yolor_detector.py @@ -0,0 +1,138 @@ + +import os +from asone.utils import get_names +import numpy as np +import warnings +import torch +import onnxruntime + +from .models.models import * +from asone import utils +from asone.detectors.yolor.utils.yolor_utils import (non_max_suppression, + scale_coords, + letterbox) + + +class YOLOrDetector: + def __init__(self, + weights=None, + cfg=None, + use_onnx=True, + use_cuda=True, + ): + + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + if not os.path.exists(weights): + utils.download_weights(weights) + + if cfg == None: + cfg = os.path.join("cfg", "yolor_p6.cfg") + # If incase weighst is a list of paths then select path at first index + weights = str(weights[0] if isinstance(weights, list) else weights) + # Load Model + self.model = self.load_model(use_cuda, weights, cfg=cfg, img_size=640) + + def load_model(self, use_cuda, weights, cfg, img_size, fp16=False): + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = fp16 & ( + (not self.use_onnx or self.use_onnx) and self.device != 'cpu') + # Load onnx + if self.use_onnx: + if use_cuda: + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + model = onnxruntime.InferenceSession(weights, providers=providers) + # Load Pytorch + else: + model = Darknet(cfg, img_size).to(self.device) + model.load_state_dict(torch.load( + weights, map_location=self.device)['model']) + model.to(self.device).eval() + model.half() if self.fp16 else model.float() + return model + + def image_preprocessing(self, + image: list, + input_shape=(640, 640)) -> list: + + original_image = image.copy() + image = letterbox(image, input_shape, stride=32, auto=False)[0] + image = image.transpose((2, 0, 1))[::-1] + image = np.ascontiguousarray(image, dtype=np.float32) + image /= 255 # 0 - 255 to 0.0 - 1.0 + if len(image.shape) == 3: + image = image[None] # expand for batch dim + return original_image, image + + def detect(self, image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False) -> list: + + # Image Preprocessing + original_image, processed_image = self.image_preprocessing( + image, input_shape) + + # Inference + if self.use_onnx: + # Input names of ONNX model on which it is exported + input_name = self.model.get_inputs()[0].name + # Run onnx model + pred = self.model.run([self.model.get_outputs()[0].name], { + input_name: processed_image})[0] + # Run Pytorch model + else: + processed_image = torch.from_numpy(processed_image).to(self.device) + # Change image floating point precision if fp16 set to true + processed_image = processed_image.half() if self.fp16 else processed_image.float() + pred = self.model(processed_image, augment=False)[0] + pred = pred.detach().cpu().numpy() + + if isinstance(pred, np.ndarray): + pred = torch.tensor(pred, device=self.device) + predictions = non_max_suppression( + pred, conf_thres, + iou_thres, + agnostic=agnostic_nms, + max_det=max_det) + + for i, prediction in enumerate(predictions): # per image + if len(prediction): + prediction[:, :4] = scale_coords( + processed_image.shape[2:], prediction[:, :4], original_image.shape).round() + predictions[i] = prediction + + predictions = predictions[0].cpu().numpy() + image_info = { + 'width': original_image.shape[1], + 'height': original_image.shape[0], + } + + self.boxes = predictions[:, :4] + self.scores = predictions[:, 4:5] + self.class_ids = predictions[:, 5:6] + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append( + class_names.index(_class.lower())) + else: + warnings.warn( + f"class {_class} not found in model classes list.") + + detection = detection[np.in1d( + detection[:, 5].astype(int), filter_class_idx)] + + return predictions, image_info diff --git a/asone/detectors/yolov5/__init__.py b/asone/detectors/yolov5/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..980a931610d78fa409eec20d3591a57ef7f0c7d0 --- /dev/null +++ b/asone/detectors/yolov5/__init__.py @@ -0,0 +1,2 @@ +from .yolov5_detector import YOLOv5Detector +__all__ = ['YOLOv5Detector'] \ No newline at end of file diff --git a/asone/detectors/yolov5/yolov5/__init__.py b/asone/detectors/yolov5/yolov5/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov5/yolov5/models/__init__.py b/asone/detectors/yolov5/yolov5/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4955e7efc290cc4d8188ce1c84093ad5c5feabe5 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/models/__init__.py @@ -0,0 +1,3 @@ +import os +import sys +sys.path.append(os.path.dirname(__file__)) diff --git a/asone/detectors/yolov5/yolov5/models/common.py b/asone/detectors/yolov5/yolov5/models/common.py new file mode 100644 index 0000000000000000000000000000000000000000..7fff4ed7de79bdf1871d2602a784c2bd4b3436e8 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/models/common.py @@ -0,0 +1,756 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Common modules +""" + +import json +import math +import platform +import warnings +from collections import OrderedDict, namedtuple +from copy import copy +from pathlib import Path + +import cv2 +import numpy as np +import pandas as pd +import requests +import torch +import torch.nn as nn +import yaml +from PIL import Image +from torch.cuda import amp + +from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_requirements, + check_suffix, check_version, + colorstr, increment_path) + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class Conv(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + return self.act(self.conv(x)) + + +class DWConv(Conv): + # Depth-wise convolution class + def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act) + + +class DWConvTranspose2d(nn.ConvTranspose2d): + # Depth-wise transpose convolution class + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out + super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) + + +class TransformerLayer(nn.Module): + # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) + def __init__(self, c, num_heads): + super().__init__() + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x + x = self.fc2(self.fc1(x)) + x + return x + + +class TransformerBlock(nn.Module): + # Vision Transformer https://arxiv.org/abs/2010.11929 + def __init__(self, c1, c2, num_heads, num_layers): + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) # learnable position embedding + self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) + self.c2 = c2 + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2).permute(2, 0, 1) + return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) + + +class CrossConv(nn.Module): + # Cross Convolution Downsample + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): + # ch_in, ch_out, kernel, stride, groups, expansion, shortcut + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, (1, k), (1, s)) + self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class C3(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class C3x(C3): + # C3 module with cross-convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n))) + + +class C3TR(C3): + # C3 module with TransformerBlock() + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = TransformerBlock(c_, c_, 4, n) + + +class C3SPP(C3): + # C3 module with SPP() + def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = SPP(c_, c_, k) + + +class C3Ghost(C3): + # C3 module with GhostBottleneck() + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) + + +class SPP(nn.Module): + # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 + def __init__(self, c1, c2, k=(5, 9, 13)): + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class SPPF(nn.Module): + # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) + + +class Focus(nn.Module): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) + # self.contract = Contract(gain=2) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) + # return self.conv(self.contract(x)) + + +class GhostConv(nn.Module): + # Ghost Convolution https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups + super().__init__() + c_ = c2 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k, s, None, g, act) + self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) + + def forward(self, x): + y = self.cv1(x) + return torch.cat((y, self.cv2(y)), 1) + + +class GhostBottleneck(nn.Module): + # Ghost Bottleneck https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride + super().__init__() + c_ = c2 // 2 + self.conv = nn.Sequential( + GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, + act=False)) if s == 2 else nn.Identity() + + def forward(self, x): + return self.conv(x) + self.shortcut(x) + + +class Contract(nn.Module): + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain' + s = self.gain + x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) + return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40) + + +class Expand(nn.Module): + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' + s = self.gain + x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) + return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super().__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class DetectMultiBackend(nn.Module): + # YOLOv5 MultiBackend class for python inference on various backends + def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): + # Usage: + # PyTorch: weights = *.pt + # TorchScript: *.torchscript + # ONNX Runtime: *.onnx + # ONNX OpenCV DNN: *.onnx with --dnn + # OpenVINO: *.xml + # CoreML: *.mlmodel + # TensorRT: *.engine + # TensorFlow SavedModel: *_saved_model + # TensorFlow GraphDef: *.pb + # TensorFlow Lite: *.tflite + # TensorFlow Edge TPU: *_edgetpu.tflite + from asone.detectors.yolov5.utils.experimental import attempt_download, attempt_load # scoped to avoid circular import + + super().__init__() + w = str(weights[0] if isinstance(weights, list) else weights) + pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend + w = attempt_download(w) # download if not local + fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 + stride, names = 32, [f'class{i}' for i in range(1000)] # assign defaults + if data: # assign class names (optional) + with open(data, errors='ignore') as f: + names = yaml.safe_load(f)['names'] + + if pt: # PyTorch + model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse) + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if fp16 else model.float() + self.model = model # explicitly assign for to(), cpu(), cuda(), half() + elif jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files) + model.half() if fp16 else model.float() + if extra_files['config.txt']: + d = json.loads(extra_files['config.txt']) # extra_files dict + stride, names = int(d['stride']), d['names'] + elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') + cuda = torch.cuda.is_available() + check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) + import onnxruntime + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] + session = onnxruntime.InferenceSession(w, providers=providers) + meta = session.get_modelmeta().custom_metadata_map # metadata + if 'stride' in meta: + stride, names = int(meta['stride']), eval(meta['names']) + elif xml: # OpenVINO + LOGGER.info(f'Loading {w} for OpenVINO inference...') + check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/ + import openvino + from openvino.runtime import Core, Layout, get_batch + ie = Core() + if not Path(w).is_file(): # if not *.xml + w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir + network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) + if network.get_parameters()[0].get_layout().empty: + network.get_parameters()[0].set_layout(Layout("NCHW")) + batch_dim = get_batch(network) + if batch_dim.is_static: + batch_size = batch_dim.get_length() + executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 + output_layer = next(iter(executable_network.outputs)) + meta = Path(w).with_suffix('.yaml') + if meta.exists(): + stride, names = self._load_metadata(meta) # load metadata + elif engine: # TensorRT + LOGGER.info(f'Loading {w} for TensorRT inference...') + import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download + check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 + Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) + logger = trt.Logger(trt.Logger.INFO) + with open(w, 'rb') as f, trt.Runtime(logger) as runtime: + model = runtime.deserialize_cuda_engine(f.read()) + context = model.create_execution_context() + bindings = OrderedDict() + fp16 = False # default updated below + dynamic_input = False + for index in range(model.num_bindings): + name = model.get_binding_name(index) + dtype = trt.nptype(model.get_binding_dtype(index)) + if model.binding_is_input(index): + if -1 in tuple(model.get_binding_shape(index)): # dynamic + dynamic_input = True + context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2])) + if dtype == np.float16: + fp16 = True + shape = tuple(context.get_binding_shape(index)) + data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) + bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) + binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) + batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size + elif coreml: # CoreML + LOGGER.info(f'Loading {w} for CoreML inference...') + import coremltools as ct + model = ct.models.MLModel(w) + else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) + if saved_model: # SavedModel + LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') + import tensorflow as tf + keras = False # assume TF1 saved_model + model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) + elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') + import tensorflow as tf + + def wrap_frozen_graph(gd, inputs, outputs): + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped + ge = x.graph.as_graph_element + return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) + + gd = tf.Graph().as_graph_def() # graph_def + with open(w, 'rb') as f: + gd.ParseFromString(f.read()) + frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0") + elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu + from tflite_runtime.interpreter import Interpreter, load_delegate + except ImportError: + import tensorflow as tf + Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, + if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime + LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') + delegate = { + 'Linux': 'libedgetpu.so.1', + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) + else: # Lite + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') + interpreter = Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + elif tfjs: + raise Exception('ERROR: YOLOv5 TF.js inference is not supported') + else: + raise Exception(f'ERROR: {w} is not a supported format') + self.__dict__.update(locals()) # assign all variables to self + + def forward(self, im, augment=False, visualize=False, val=False): + # YOLOv5 MultiBackend inference + b, ch, h, w = im.shape # batch, channel, height, width + if self.fp16 and im.dtype != torch.float16: + im = im.half() # to FP16 + + if self.pt: # PyTorch + y = self.model(im, augment=augment, visualize=visualize)[0] + elif self.jit: # TorchScript + y = self.model(im)[0] + elif self.dnn: # ONNX OpenCV DNN + im = im.cpu().numpy() # torch to numpy + self.net.setInput(im) + y = self.net.forward() + elif self.onnx: # ONNX Runtime + im = im.cpu().numpy() # torch to numpy + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + elif self.xml: # OpenVINO + im = im.cpu().numpy() # FP32 + y = self.executable_network([im])[self.output_layer] + elif self.engine: # TensorRT + if im.shape != self.bindings['images'].shape and self.dynamic_input: + self.context.set_binding_shape(self.model.get_binding_index('images'), im.shape) # reshape if dynamic + self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) + assert im.shape == self.bindings['images'].shape, ( + f"image shape {im.shape} exceeds model max shape {self.bindings['images'].shape}" if self.dynamic_input + else f"image shape {im.shape} does not match model shape {self.bindings['images'].shape}") + self.binding_addrs['images'] = int(im.data_ptr()) + self.context.execute_v2(list(self.binding_addrs.values())) + y = self.bindings['output'].data + elif self.coreml: # CoreML + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + im = Image.fromarray((im[0] * 255).astype('uint8')) + # im = im.resize((192, 320), Image.ANTIALIAS) + y = self.model.predict({'image': im}) # coordinates are xywh normalized + if 'confidence' in y: + box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) + y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) + else: + k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key + y = y[k] # output + else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + if self.saved_model: # SavedModel + y = (self.model(im, training=False) if self.keras else self.model(im)).numpy() + elif self.pb: # GraphDef + y = self.frozen_func(x=self.tf.constant(im)).numpy() + else: # Lite or Edge TPU + input, output = self.input_details[0], self.output_details[0] + int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + if int8: + scale, zero_point = input['quantization'] + im = (im / scale + zero_point).astype(np.uint8) # de-scale + self.interpreter.set_tensor(input['index'], im) + self.interpreter.invoke() + y = self.interpreter.get_tensor(output['index']) + if int8: + scale, zero_point = output['quantization'] + y = (y.astype(np.float32) - zero_point) * scale # re-scale + y[..., :4] *= [w, h, w, h] # xywh normalized to pixels + + if isinstance(y, np.ndarray): + y = torch.tensor(y, device=self.device) + return (y, []) if val else y + + def warmup(self, imgsz=(1, 3, 640, 640)): + # Warmup model by running inference once + warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb + if any(warmup_types) and self.device.type != 'cpu': + im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input + for _ in range(2 if self.jit else 1): # + self.forward(im) # warmup + + @staticmethod + def model_type(p='path/to/model.pt'): + # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx + from export import export_formats + suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes + check_suffix(p, suffixes) # checks + p = Path(p).name # eliminate trailing separators + pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes) + xml |= xml2 # *_openvino_model or *.xml + tflite &= not edgetpu # *.tflite + return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs + + @staticmethod + def _load_metadata(f='path/to/meta.yaml'): + # Load metadata from meta.yaml if it exists + with open(f, errors='ignore') as f: + d = yaml.safe_load(f) + return d['stride'], d['names'] # assign stride, names + + +class AutoShape(nn.Module): + # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + agnostic = False # NMS class-agnostic + multi_label = False # NMS multiple labels per box + classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs + max_det = 1000 # maximum number of detections per image + amp = False # Automatic Mixed Precision (AMP) inference + + def __init__(self, model, verbose=True): + super().__init__() + if verbose: + LOGGER.info('Adding AutoShape... ') + copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes + self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance + self.pt = not self.dmb or model.pt # PyTorch model + self.model = model.eval() + + def _apply(self, fn): + # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers + self = super()._apply(fn) + if self.pt: + m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() + m.stride = fn(m.stride) + m.grid = list(map(fn, m.grid)) + if isinstance(m.anchor_grid, list): + m.anchor_grid = list(map(fn, m.anchor_grid)) + return self + + @torch.no_grad() + def forward(self, imgs, size=640, augment=False, profile=False): + # Inference from various sources. For height=640, width=1280, RGB images example inputs are: + # file: imgs = 'data/images/zidane.jpg' # str or PosixPath + # URI: = 'https://ultralytics.com/images/zidane.jpg' + # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) + # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) + # numpy: = np.zeros((640,1280,3)) # HWC + # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) + # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images + + t = [time_sync()] + p = next(self.model.parameters()) if self.pt else torch.zeros(1, device=self.model.device) # for device, type + autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference + if isinstance(imgs, torch.Tensor): # torch + with amp.autocast(autocast): + return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference + + # Pre-process + n, imgs = (len(imgs), list(imgs)) if isinstance(imgs, (list, tuple)) else (1, [imgs]) # number, list of images + shape0, shape1, files = [], [], [] # image and inference shapes, filenames + for i, im in enumerate(imgs): + f = f'image{i}' # filename + if isinstance(im, (str, Path)): # filename or uri + im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im + im = np.asarray(exif_transpose(im)) + elif isinstance(im, Image.Image): # PIL Image + im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f + files.append(Path(f).with_suffix('.jpg').name) + if im.shape[0] < 5: # image in CHW + im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) + im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input + s = im.shape[:2] # HWC + shape0.append(s) # image shape + g = (size / max(s)) # gain + shape1.append([y * g for y in s]) + imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update + shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape + x = [letterbox(im, shape1, auto=False)[0] for im in imgs] # pad + x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 + t.append(time_sync()) + + with amp.autocast(autocast): + # Inference + y = self.model(x, augment, profile) # forward + t.append(time_sync()) + + # Post-process + y = non_max_suppression(y if self.dmb else y[0], + self.conf, + self.iou, + self.classes, + self.agnostic, + self.multi_label, + max_det=self.max_det) # NMS + for i in range(n): + scale_coords(shape1, y[i][:, :4], shape0[i]) + + t.append(time_sync()) + return Detections(imgs, y, files, t, self.names, x.shape) + + +class Detections: + # YOLOv5 detections class for inference results + def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None): + super().__init__() + d = pred[0].device # device + gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations + self.imgs = imgs # list of images as numpy arrays + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) + self.names = names # class names + self.files = files # image filenames + self.times = times # profiling times + self.xyxy = pred # xyxy pixels + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized + self.n = len(self.pred) # number of images (batch size) + self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) + self.s = shape # inference BCHW shape + + def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): + crops = [] + for i, (im, pred) in enumerate(zip(self.imgs, self.pred)): + s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string + if pred.shape[0]: + for c in pred[:, -1].unique(): + n = (pred[:, -1] == c).sum() # detections per class + s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + if show or save or render or crop: + annotator = Annotator(im, example=str(self.names)) + for *box, conf, cls in reversed(pred): # xyxy, confidence, class + label = f'{self.names[int(cls)]} {conf:.2f}' + if crop: + file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None + crops.append({ + 'box': box, + 'conf': conf, + 'cls': cls, + 'label': label, + 'im': save_one_box(box, im, file=file, save=save)}) + else: # all others + annotator.box_label(box, label if labels else '', color=colors(cls)) + im = annotator.im + else: + s += '(no detections)' + + im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np + if pprint: + print(s.rstrip(', ')) + if show: + im.show(self.files[i]) # show + if save: + f = self.files[i] + im.save(save_dir / f) # save + if i == self.n - 1: + LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") + if render: + self.imgs[i] = np.asarray(im) + if crop: + if save: + LOGGER.info(f'Saved results to {save_dir}\n') + return crops + + def print(self): + self.display(pprint=True) # print results + print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) + + def show(self, labels=True): + self.display(show=True, labels=labels) # show results + + def save(self, labels=True, save_dir='runs/detect/exp'): + save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir + self.display(save=True, labels=labels, save_dir=save_dir) # save results + + def crop(self, save=True, save_dir='runs/detect/exp'): + save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None + return self.display(crop=True, save=save, save_dir=save_dir) # crop results + + def render(self, labels=True): + self.display(render=True, labels=labels) # render results + return self.imgs + + def pandas(self): + # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) + new = copy(self) # return copy + ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns + cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns + for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): + a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update + setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) + return new + + def tolist(self): + # return a list of Detections objects, i.e. 'for result in results.tolist():' + r = range(self.n) # iterable + x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] + # for d in x: + # for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: + # setattr(d, k, getattr(d, k)[0]) # pop out of list + return x + + def __len__(self): + return self.n # override len(results) + + def __str__(self): + self.print() # override print(results) + return '' + + +class Classify(nn.Module): + # Classification head, i.e. x(b,c1,20,20) to x(b,c2) + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) + self.flat = nn.Flatten() + + def forward(self, x): + z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list + return self.flat(self.conv(z)) # flatten to x(b,c2) diff --git a/asone/detectors/yolov5/yolov5/models/experimental.py b/asone/detectors/yolov5/yolov5/models/experimental.py new file mode 100644 index 0000000000000000000000000000000000000000..333a7f7f33520e21dde2ef05df9ae124b5ab8846 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/models/experimental.py @@ -0,0 +1,56 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Experimental modules +""" +import math + +import numpy as np +import torch +import torch.nn as nn +from asone.detectors.yolov5.yolov5.utils.yolov5_utils import yolov5_in_syspath + +class Ensemble(nn.ModuleList): + # Ensemble of models + def __init__(self): + super().__init__() + + def forward(self, x, augment=False, profile=False, visualize=False): + y = [module(x, augment, profile, visualize)[0] for module in self] + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 1) # nms ensemble + return y, None # inference, train output + + +def attempt_load(weights, device=None, inplace=True, fuse=True): + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + with yolov5_in_syspath(): + from asone.detectors.yolov5.yolov5.models.yolo import Detect, Model + + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + with yolov5_in_syspath(): + ckpt = torch.load(w, map_location='cpu') # load + ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model + model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode + + # Compatibility updates + for m in model.modules(): + t = type(m) + if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): + m.inplace = inplace # torch 1.7.0 compatibility + if t is Detect and not isinstance(m.anchor_grid, list): + delattr(m, 'anchor_grid') + setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) + elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None # torch 1.11.0 compatibility + + if len(model) == 1: + return model[-1] # return model + print(f'Ensemble created with {weights}\n') + for k in 'names', 'nc', 'yaml': + setattr(model, k, getattr(model[0], k)) + model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride + assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' + return model # return ensemble + diff --git a/asone/detectors/yolov5/yolov5/models/general.py b/asone/detectors/yolov5/yolov5/models/general.py new file mode 100644 index 0000000000000000000000000000000000000000..de42316266d84e297efe2fbde3b239678a141674 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/models/general.py @@ -0,0 +1,1036 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +General utils +""" + +import contextlib +import glob +import inspect +import logging +import math +import os +import platform +import random +import re +import shutil +import signal +import threading +import time +import urllib +from datetime import datetime +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from subprocess import check_output +from typing import Optional +from zipfile import ZipFile + +import cv2 +import numpy as np +import pandas as pd +import pkg_resources as pkg +import torch +import torchvision +import yaml + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLOv5 root directory +RANK = int(os.getenv('RANK', -1)) + +# Settings +DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory +NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads +AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode +VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode +FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf + +torch.set_printoptions(linewidth=320, precision=5, profile='long') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +pd.options.display.max_columns = 10 +cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) +os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads +os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy) + + +def is_kaggle(): + # Is environment a Kaggle Notebook? + try: + assert os.environ.get('PWD') == '/kaggle/working' + assert os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' + return True + except AssertionError: + return False + + +def is_writeable(dir, test=False): + # Return True if directory has write permissions, test opening a file with write permissions if test=True + if not test: + return os.access(dir, os.R_OK) # possible issues on Windows + file = Path(dir) / 'tmp.txt' + try: + with open(file, 'w'): # open file with write permissions + pass + file.unlink() # remove file + return True + except OSError: + return False + + +def set_logging(name=None, verbose=VERBOSE): + # Sets level and returns logger + if is_kaggle(): + for h in logging.root.handlers: + logging.root.removeHandler(h) # remove all handlers associated with the root logger object + rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings + level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR + log = logging.getLogger(name) + log.setLevel(level) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter("%(message)s")) + handler.setLevel(level) + log.addHandler(handler) + + +set_logging() # run before defining LOGGER +LOGGER = logging.getLogger("yolov5") # define globally (used in train.py, val.py, detect.py, etc.) + + +def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'): + # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required. + env = os.getenv(env_var) + if env: + path = Path(env) # use environment variable + else: + cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs + path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir + path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable + path.mkdir(exist_ok=True) # make if required + return path + + +CONFIG_DIR = user_config_dir() # Ultralytics settings dir + + +class Profile(contextlib.ContextDecorator): + # Usage: @Profile() decorator or 'with Profile():' context manager + def __enter__(self): + self.start = time.time() + + def __exit__(self, type, value, traceback): + print(f'Profile results: {time.time() - self.start:.5f}s') + + +class Timeout(contextlib.ContextDecorator): + # Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager + def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True): + self.seconds = int(seconds) + self.timeout_message = timeout_msg + self.suppress = bool(suppress_timeout_errors) + + def _timeout_handler(self, signum, frame): + raise TimeoutError(self.timeout_message) + + def __enter__(self): + if platform.system() != 'Windows': # not supported on Windows + signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM + signal.alarm(self.seconds) # start countdown for SIGALRM to be raised + + def __exit__(self, exc_type, exc_val, exc_tb): + if platform.system() != 'Windows': + signal.alarm(0) # Cancel SIGALRM if it's scheduled + if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError + return True + + +class WorkingDirectory(contextlib.ContextDecorator): + # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager + def __init__(self, new_dir): + self.dir = new_dir # new dir + self.cwd = Path.cwd().resolve() # current dir + + def __enter__(self): + os.chdir(self.dir) + + def __exit__(self, exc_type, exc_val, exc_tb): + os.chdir(self.cwd) + + +def try_except(func): + # try-except function. Usage: @try_except decorator + def handler(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception as e: + print(e) + + return handler + + +def threaded(func): + # Multi-threads a target function and returns thread. Usage: @threaded decorator + def wrapper(*args, **kwargs): + thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) + thread.start() + return thread + + return wrapper + + +def methods(instance): + # Get class/instance methods + return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")] + + +def print_args(args: Optional[dict] = None, show_file=True, show_fcn=False): + # Print function arguments (optional args dict) + x = inspect.currentframe().f_back # previous frame + file, _, fcn, _, _ = inspect.getframeinfo(x) + if args is None: # get args automatically + args, _, _, frm = inspect.getargvalues(x) + args = {k: v for k, v in frm.items() if k in args} + s = (f'{Path(file).stem}: ' if show_file else '') + (f'{fcn}: ' if show_fcn else '') + LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items())) + + +def init_seeds(seed=0, deterministic=False): + # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html + # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible + import torch.backends.cudnn as cudnn + + if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 + torch.use_deterministic_algorithms(True) + os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' + os.environ['PYTHONHASHSEED'] = str(seed) + + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + + +def get_latest_run(search_dir='.'): + # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(last_list, key=os.path.getctime) if last_list else '' + + +def is_docker() -> bool: + """Check if the process runs inside a docker container.""" + if Path("/.dockerenv").exists(): + return True + try: # check if docker is in control groups + with open("/proc/self/cgroup") as file: + return any("docker" in line for line in file) + except OSError: + return False + + +def is_colab(): + # Is environment a Google Colab instance? + try: + import google.colab + return True + except ImportError: + return False + + +def is_pip(): + # Is file in a pip package? + return 'site-packages' in Path(__file__).resolve().parts + + +def is_ascii(s=''): + # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) + s = str(s) # convert list, tuple, None, etc. to str + return len(s.encode().decode('ascii', 'ignore')) == len(s) + + +def is_chinese(s='人工智能'): + # Is string composed of any Chinese characters? + return bool(re.search('[\u4e00-\u9fff]', str(s))) + + +def emojis(str=''): + # Return platform-dependent emoji-safe version of string + return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str + + +def file_age(path=__file__): + # Return days since last file update + dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta + return dt.days # + dt.seconds / 86400 # fractional days + + +def file_date(path=__file__): + # Return human-readable file modification date, i.e. '2021-3-26' + t = datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + +def file_size(path): + # Return file/dir size (MB) + mb = 1 << 20 # bytes to MiB (1024 ** 2) + path = Path(path) + if path.is_file(): + return path.stat().st_size / mb + elif path.is_dir(): + return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb + else: + return 0.0 + + +def check_online(): + # Check internet connectivity + import socket + try: + socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility + return True + except OSError: + return False + + +def git_describe(path=ROOT): # path must be a directory + # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe + try: + assert (Path(path) / '.git').is_dir() + return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] + except Exception: + return '' + + +@try_except +@WorkingDirectory(ROOT) +def check_git_status(repo='ultralytics/yolov5'): + # YOLOv5 status check, recommend 'git pull' if code is out of date + url = f'https://github.com/{repo}' + msg = f', for updates see {url}' + s = colorstr('github: ') # string + assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg + assert check_online(), s + 'skipping check (offline)' + msg + + splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode()) + matches = [repo in s for s in splits] + if any(matches): + remote = splits[matches.index(True) - 1] + else: + remote = 'ultralytics' + check_output(f'git remote add {remote} {url}', shell=True) + check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch + branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out + n = int(check_output(f'git rev-list {branch}..{remote}/master --count', shell=True)) # commits behind + if n > 0: + pull = 'git pull' if remote == 'origin' else f'git pull {remote} master' + s += f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update." + else: + s += f'up to date with {url} ✅' + LOGGER.info(emojis(s)) # emoji-safe + + +def check_python(minimum='3.7.0'): + # Check current python version vs. required python version + check_version(platform.python_version(), minimum, name='Python ', hard=True) + + +def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False): + # Check version vs. required version + current, minimum = (pkg.parse_version(x) for x in (current, minimum)) + result = (current == minimum) if pinned else (current >= minimum) # bool + s = f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed' # string + if hard: + assert result, s # assert min requirements met + if verbose and not result: + LOGGER.warning(s) + return result + + +@try_except +def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()): + # Check installed dependencies meet requirements (pass *.txt file or list of packages) + prefix = colorstr('red', 'bold', 'requirements:') + check_python() # check python version + if isinstance(requirements, (str, Path)): # requirements.txt file + file = Path(requirements) + assert file.exists(), f"{prefix} {file.resolve()} not found, check failed." + with file.open() as f: + requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] + else: # list or tuple of packages + requirements = [x for x in requirements if x not in exclude] + + n = 0 # number of packages updates + for i, r in enumerate(requirements): + try: + pkg.require(r) + except Exception: # DistributionNotFound or VersionConflict if requirements not met + s = f"{prefix} {r} not found and is required by YOLOv5" + if install and AUTOINSTALL: # check environment variable + LOGGER.info(f"{s}, attempting auto-update...") + try: + assert check_online(), f"'pip install {r}' skipped (offline)" + LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode()) + n += 1 + except Exception as e: + LOGGER.warning(f'{prefix} {e}') + else: + LOGGER.info(f'{s}. Please install and rerun your command.') + + if n: # if packages updated + source = file.resolve() if 'file' in locals() else requirements + s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ + f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" + LOGGER.info(emojis(s)) + + +def check_img_size(imgsz, s=32, floor=0): + # Verify image size is a multiple of stride s in each dimension + if isinstance(imgsz, int): # integer i.e. img_size=640 + new_size = max(make_divisible(imgsz, int(s)), floor) + else: # list i.e. img_size=[640, 480] + imgsz = list(imgsz) # convert to list if tuple + new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] + if new_size != imgsz: + LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}') + return new_size + + +def check_imshow(): + # Check if environment supports image displays + try: + assert not is_docker(), 'cv2.imshow() is disabled in Docker environments' + assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments' + cv2.imshow('test', np.zeros((1, 1, 3))) + cv2.waitKey(1) + cv2.destroyAllWindows() + cv2.waitKey(1) + return True + except Exception as e: + LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') + return False + + +def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''): + # Check file(s) for acceptable suffix + if file and suffix: + if isinstance(suffix, str): + suffix = [suffix] + for f in file if isinstance(file, (list, tuple)) else [file]: + s = Path(f).suffix.lower() # file suffix + if len(s): + assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" + + +def check_yaml(file, suffix=('.yaml', '.yml')): + # Search/download YAML file (if necessary) and return path, checking suffix + return check_file(file, suffix) + + +def check_file(file, suffix=''): + # Search/download file (if necessary) and return path + check_suffix(file, suffix) # optional + file = str(file) # convert to str() + if Path(file).is_file() or not file: # exists + return file + elif file.startswith(('http:/', 'https:/')): # download + url = file # warning: Pathlib turns :// -> :/ + file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth + if Path(file).is_file(): + LOGGER.info(f'Found {url} locally at {file}') # file already exists + else: + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, file) + assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check + return file + else: # search + files = [] + for d in 'data', 'models', 'utils': # search directories + files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file + assert len(files), f'File not found: {file}' # assert file was found + assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique + return files[0] # return file + + +def check_font(font=FONT, progress=False): + # Download font to CONFIG_DIR if necessary + font = Path(font) + file = CONFIG_DIR / font.name + if not font.exists() and not file.exists(): + url = "https://ultralytics.com/assets/" + font.name + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, str(file), progress=progress) + + +def check_dataset(data, autodownload=True): + # Download, check and/or unzip dataset if not found locally + + # Download (optional) + extract_dir = '' + if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip + download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1) + data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) + extract_dir, autodownload = data.parent, False + + # Read yaml (optional) + if isinstance(data, (str, Path)): + with open(data, errors='ignore') as f: + data = yaml.safe_load(f) # dictionary + + # Checks + for k in 'train', 'val', 'nc': + assert k in data, emojis(f"data.yaml '{k}:' field missing ❌") + if 'names' not in data: + LOGGER.warning(emojis("data.yaml 'names:' field missing ⚠️, assigning default names 'class0', 'class1', etc.")) + data['names'] = [f'class{i}' for i in range(data['nc'])] # default names + + # Resolve paths + path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.' + if not path.is_absolute(): + path = (ROOT / path).resolve() + for k in 'train', 'val', 'test': + if data.get(k): # prepend path + data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]] + + # Parse yaml + train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) + if val: + val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + if not all(x.exists() for x in val): + LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()])) + if not s or not autodownload: + raise Exception(emojis('Dataset not found ❌')) + t = time.time() + root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' + if s.startswith('http') and s.endswith('.zip'): # URL + f = Path(s).name # filename + LOGGER.info(f'Downloading {s} to {f}...') + torch.hub.download_url_to_file(s, f) + Path(root).mkdir(parents=True, exist_ok=True) # create root + ZipFile(f).extractall(path=root) # unzip + Path(f).unlink() # remove zip + r = None # success + elif s.startswith('bash '): # bash script + LOGGER.info(f'Running {s} ...') + r = os.system(s) + else: # python script + r = exec(s, {'yaml': data}) # return None + dt = f'({round(time.time() - t, 1)}s)' + s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌" + LOGGER.info(emojis(f"Dataset download {s}")) + check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts + return data # dictionary + + +def check_amp(model): + # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation + from asone.detectors.yolov5.utils.common import AutoShape, DetectMultiBackend + + def amp_allclose(model, im): + # All close FP32 vs AMP results + m = AutoShape(model, verbose=False) # model + a = m(im).xywhn[0] # FP32 inference + m.amp = True + b = m(im).xywhn[0] # AMP inference + return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance + + prefix = colorstr('AMP: ') + device = next(model.parameters()).device # get model device + if device.type == 'cpu': + return False # AMP disabled on CPU + f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check + im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3)) + try: + assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im) + LOGGER.info(emojis(f'{prefix}checks passed ✅')) + return True + except Exception: + help_url = 'https://github.com/ultralytics/yolov5/issues/7908' + LOGGER.warning(emojis(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}')) + return False + + +def url2file(url): + # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt + url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ + return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth + + +def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3): + # Multi-threaded file download and unzip function, used in data.yaml for autodownload + def download_one(url, dir): + # Download 1 file + success = True + f = dir / Path(url).name # filename + if Path(url).is_file(): # exists in current path + Path(url).rename(f) # move to dir + elif not f.exists(): + LOGGER.info(f'Downloading {url} to {f}...') + for i in range(retry + 1): + if curl: + s = 'sS' if threads > 1 else '' # silent + r = os.system(f'curl -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue + success = r == 0 + else: + torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download + success = f.is_file() + if success: + break + elif i < retry: + LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...') + else: + LOGGER.warning(f'Failed to download {url}...') + + if unzip and success and f.suffix in ('.zip', '.gz'): + LOGGER.info(f'Unzipping {f}...') + if f.suffix == '.zip': + ZipFile(f).extractall(path=dir) # unzip + elif f.suffix == '.gz': + os.system(f'tar xfz {f} --directory {f.parent}') # unzip + if delete: + f.unlink() # remove zip + + dir = Path(dir) + dir.mkdir(parents=True, exist_ok=True) # make directory + if threads > 1: + pool = ThreadPool(threads) + pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded + pool.close() + pool.join() + else: + for u in [url] if isinstance(url, (str, Path)) else url: + download_one(u, dir) + + +def make_divisible(x, divisor): + # Returns nearest x divisible by divisor + if isinstance(divisor, torch.Tensor): + divisor = int(divisor.max()) # to int + return math.ceil(x / divisor) * divisor + + +def clean_str(s): + # Cleans a string by replacing special characters with underscore _ + return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) + + +def one_cycle(y1=0.0, y2=1.0, steps=100): + # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf + return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + + +def colorstr(*input): + # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') + *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string + colors = { + 'black': '\033[30m', # basic colors + 'red': '\033[31m', + 'green': '\033[32m', + 'yellow': '\033[33m', + 'blue': '\033[34m', + 'magenta': '\033[35m', + 'cyan': '\033[36m', + 'white': '\033[37m', + 'bright_black': '\033[90m', # bright colors + 'bright_red': '\033[91m', + 'bright_green': '\033[92m', + 'bright_yellow': '\033[93m', + 'bright_blue': '\033[94m', + 'bright_magenta': '\033[95m', + 'bright_cyan': '\033[96m', + 'bright_white': '\033[97m', + 'end': '\033[0m', # misc + 'bold': '\033[1m', + 'underline': '\033[4m'} + return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] + + +def labels_to_class_weights(labels, nc=80): + # Get class weights (inverse frequency) from training labels + if labels[0] is None: # no labels loaded + return torch.Tensor() + + labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO + classes = labels[:, 0].astype(int) # labels = [class xywh] + weights = np.bincount(classes, minlength=nc) # occurrences per class + + # Prepend gridpoint count (for uCE training) + # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image + # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start + + weights[weights == 0] = 1 # replace empty bins with 1 + weights = 1 / weights # number of targets per class + weights /= weights.sum() # normalize + return torch.from_numpy(weights).float() + + +def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): + # Produces image weights based on class_weights and image contents + # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample + class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels]) + return (class_weights.reshape(1, nc) * class_counts).sum(1) + + +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): + # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x + y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y + y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x + y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y + return y + + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + if clip: + clip_coords(x, (h - eps, w - eps)) # warning: inplace clip + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center + y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center + y[:, 2] = (x[:, 2] - x[:, 0]) / w # width + y[:, 3] = (x[:, 3] - x[:, 1]) / h # height + return y + + +def xyn2xy(x, w=640, h=640, padw=0, padh=0): + # Convert normalized segments into pixel segments, shape (n,2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = w * x[:, 0] + padw # top left x + y[:, 1] = h * x[:, 1] + padh # top left y + return y + + +def segment2box(segment, width=640, height=640): + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = x[inside], y[inside] + return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy + + +def segments2boxes(segments): + # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + # Up-sample an (n,2) segment + for i, s in enumerate(segments): + s = np.concatenate((s, s[0:1, :]), axis=0) + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy + return segments + + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, :4] /= gain + clip_coords(coords, img0_shape) + return coords + + +def clip_coords(boxes, shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.3 + 0.03 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + + +def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() + # Strip optimizer from 'f' to finalize training, optionally save as 's' + x = torch.load(f, map_location=torch.device('cpu')) + if x.get('ema'): + x['model'] = x['ema'] # replace model with ema + for k in 'optimizer', 'best_fitness', 'wandb_id', 'ema', 'updates': # keys + x[k] = None + x['epoch'] = -1 + x['model'].half() # to FP16 + for p in x['model'].parameters(): + p.requires_grad = False + torch.save(x, s or f) + mb = os.path.getsize(s or f) / 1E6 # filesize + LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") + + +def print_mutation(results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')): + evolve_csv = save_dir / 'evolve.csv' + evolve_yaml = save_dir / 'hyp_evolve.yaml' + keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', + 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps] + keys = tuple(x.strip() for x in keys) + vals = results + tuple(hyp.values()) + n = len(keys) + + # Download (optional) + if bucket: + url = f'gs://{bucket}/evolve.csv' + if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0): + os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local + + # Log to evolve.csv + s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header + with open(evolve_csv, 'a') as f: + f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n') + + # Save yaml + with open(evolve_yaml, 'w') as f: + data = pd.read_csv(evolve_csv) + data = data.rename(columns=lambda x: x.strip()) # strip keys + i = np.argmax(fitness(data.values[:, :4])) # + generations = len(data) + f.write('# YOLOv5 Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' + + f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + + '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n') + yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False) + + # Print to screen + LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix + + ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}' + for x in vals) + '\n\n') + + if bucket: + os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload + + +def apply_classifier(x, model, img, im0): + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() + im0 = [im0] if isinstance(im0, np.ndarray) else im0 + for i, d in enumerate(x): # per image + if d is not None and len(d): + d = d.clone() + + # Reshape and pad cutouts + b = xyxy2xywh(d[:, :4]) # boxes + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square + b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad + d[:, :4] = xywh2xyxy(b).long() + + # Rescale boxes from img_size to im0 size + scale_coords(img.shape[2:], d[:, :4], im0[i].shape) + + # Classes + pred_cls1 = d[:, 5].long() + ims = [] + for a in d: + cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] + im = cv2.resize(cutout, (224, 224)) # BGR + + im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + ims.append(im) + + pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction + x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections + + return x + + +def increment_path(path, exist_ok=False, sep='', mkdir=False): + # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. + path = Path(path) # os-agnostic + if path.exists() and not exist_ok: + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') + + # Method 1 + for n in range(2, 9999): + p = f'{path}{sep}{n}{suffix}' # increment path + if not os.path.exists(p): # + break + path = Path(p) + + # Method 2 (deprecated) + # dirs = glob.glob(f"{path}{sep}*") # similar paths + # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs] + # i = [int(m.groups()[0]) for m in matches if m] # indices + # n = max(i) + 1 if i else 2 # increment number + # path = Path(f"{path}{sep}{n}{suffix}") # increment path + + if mkdir: + path.mkdir(parents=True, exist_ok=True) # make directory + + return path + + +# OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------ +imshow_ = cv2.imshow # copy to avoid recursion errors + + +def imread(path, flags=cv2.IMREAD_COLOR): + return cv2.imdecode(np.fromfile(path, np.uint8), flags) + + +def imwrite(path, im): + try: + cv2.imencode(Path(path).suffix, im)[1].tofile(path) + return True + except Exception: + return False + + +def imshow(path, im): + imshow_(path.encode('unicode_escape').decode(), im) + + +cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine + +# Variables ------------------------------------------------------------------------------------------------------------ +NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size for tqdm diff --git a/asone/detectors/yolov5/yolov5/models/yolo.py b/asone/detectors/yolov5/yolov5/models/yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..185d8e8633ae5ffa443a523b974c625b2fcdc258 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/models/yolo.py @@ -0,0 +1,345 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +YOLO-specific modules + +Usage: + $ python path/to/models/yolo.py --cfg yolov5s.yaml +""" + +import argparse +import contextlib +import os +import platform +import sys +from copy import deepcopy +from pathlib import Path + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLOv5 root directory +# if str(ROOT) not in sys.path: +# sys.path.append(str(ROOT)) # add ROOT to PATH +if platform.system() != 'Windows': + ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from asone.detectors.yolov5.yolov5.models.common import * +from asone.detectors.yolov5.yolov5.models.experimental import * +from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_version, + check_yaml, make_divisible, + print_args) +from asone.detectors.yolov5.yolov5.utils.torch_utils import ( + fuse_conv_and_bn, + initialize_weights, + model_info, + profile, + scale_img, + select_device, + time_sync) + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + + +class Detect(nn.Module): + stride = None # strides computed during build + onnx_dynamic = False # ONNX export parameter + export = False # export mode + + def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid + self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv + self.inplace = inplace # use inplace ops (e.g. slice assignment) + + def forward(self, x): + z = [] # inference output + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) + + y = x[i].sigmoid() + if self.inplace: + y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 + xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy + wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) + + def _make_grid(self, nx=20, ny=20, i=0): + d = self.anchors[i].device + t = self.anchors[i].dtype + shape = 1, self.na, ny, nx, 2 # grid shape + y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) + if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility + yv, xv = torch.meshgrid(y, x, indexing='ij') + else: + yv, xv = torch.meshgrid(y, x) + grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 + anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) + return grid, anchor_grid + + +class DetectionModel(nn.Module): + # YOLOv5 model + def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes + super().__init__() + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg, encoding='ascii', errors='ignore') as f: + self.yaml = yaml.safe_load(f) # model dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + if anchors: + LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') + self.yaml['anchors'] = round(anchors) # override yaml value + self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist + self.names = [str(i) for i in range(self.yaml['nc'])] # default names + self.inplace = self.yaml.get('inplace', True) + + # Build strides, anchors + m = self.model[-1] # Detect() + if isinstance(m, Detect): + s = 256 # 2x min stride + m.inplace = self.inplace + m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) # must be in pixel-space (not grid-space) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_biases() # only run once + + # Init weights, biases + initialize_weights(self) + self.info() + LOGGER.info('') + + def forward(self, x, augment=False, profile=False, visualize=False): + if augment: + return self._forward_augment(x) # augmented inference, None + return self._forward_once(x, profile, visualize) # single-scale inference, train + + def _forward_augment(self, x): + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + yi = self._forward_once(xi)[0] # forward + # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save + yi = self._descale_pred(yi, fi, si, img_size) + y.append(yi) + y = self._clip_augmented(y) # clip augmented tails + return torch.cat(y, 1), None # augmented inference, train + + def _forward_once(self, x, profile=False, visualize=False): + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + if profile: + self._profile_one_layer(m, x, dt) + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + if visualize: + feature_visualization(x, m.type, m.i, save_dir=visualize) + return x + + def _descale_pred(self, p, flips, scale, img_size): + # de-scale predictions following augmented inference (inverse operation) + if self.inplace: + p[..., :4] /= scale # de-scale + if flips == 2: + p[..., 1] = img_size[0] - p[..., 1] # de-flip ud + elif flips == 3: + p[..., 0] = img_size[1] - p[..., 0] # de-flip lr + else: + x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale + if flips == 2: + y = img_size[0] - y # de-flip ud + elif flips == 3: + x = img_size[1] - x # de-flip lr + p = torch.cat((x, y, wh, p[..., 4:]), -1) + return p + + def _clip_augmented(self, y): + # Clip YOLOv5 augmented inference tails + nl = self.model[-1].nl # number of detection layers (P3-P5) + g = sum(4 ** x for x in range(nl)) # grid points + e = 1 # exclude layer count + i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices + y[0] = y[0][:, :-i] # large + i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices + y[-1] = y[-1][:, i:] # small + return y + + def _profile_one_layer(self, m, x, dt): + c = isinstance(m, Detect) # is final layer, copy input as inplace fix + o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs + t = time_sync() + for _ in range(10): + m(x.copy() if c else x) + dt.append((time_sync() - t) * 100) + if m == self.model[0]: + LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") + LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') + if c: + LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") + + def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Detect() module + for mi, s in zip(m.m, m.stride): # from + b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85) + b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + def _print_biases(self): + m = self.model[-1] # Detect() module + for mi in m.m: # from + b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) + LOGGER.info( + ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) + + def _print_weights(self): + for m in self.model.modules(): + if type(m) is Bottleneck: + LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights + + def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + # LOGGER.info('Fusing layers... ') + for m in self.model.modules(): + if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.forward_fuse # update forward + # self.info() + return self + + def info(self, verbose=False, img_size=640): # print model information + model_info(self, verbose, img_size) + + def _apply(self, fn): + # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers + self = super()._apply(fn) + m = self.model[-1] # Detect() + if isinstance(m, Detect): + m.stride = fn(m.stride) + m.grid = list(map(fn, m.grid)) + if isinstance(m.anchor_grid, list): + m.anchor_grid = list(map(fn, m.anchor_grid)) + return self + +Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility + + +def parse_model(d, ch): # model_dict, input_channels(3) + LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") + anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] + na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + with contextlib.suppress(NameError): + args[j] = eval(a) if isinstance(a, str) else a # eval strings + + n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, + BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x): + c1, c2 = ch[f], args[0] + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) + + args = [c1, c2, *args[1:]] + if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]: + args.insert(2, n) # number of repeats + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum(ch[x] for x in f) + elif m is Detect: + args.append([ch[x] for x in f]) + if isinstance(args[1], int): # number of anchors + args[1] = [list(range(args[1] * 2))] * len(f) + elif m is Contract: + c2 = ch[f] * args[0] ** 2 + elif m is Expand: + c2 = ch[f] // args[0] ** 2 + else: + c2 = ch[f] + + m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum(x.numel() for x in m_.parameters()) # number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') + parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--profile', action='store_true', help='profile model speed') + parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') + parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') + opt = parser.parse_args() + opt.cfg = check_yaml(opt.cfg) # check YAML + print_args(vars(opt)) + device = select_device(opt.device) + + # Create model + im = torch.rand(opt.batch_size, 3, 640, 640).to(device) + model = Model(opt.cfg).to(device) + + # Options + if opt.line_profile: # profile layer by layer + _ = model(im, profile=True) + + elif opt.profile: # profile forward-backward + results = profile(input=im, ops=[model], n=3) + + elif opt.test: # test all models + for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): + try: + _ = Model(cfg) + except Exception as e: + print(f'Error in {cfg}: {e}') + + else: # report fused model summary + model.fuse() diff --git a/asone/detectors/yolov5/yolov5/utils/__init__.py b/asone/detectors/yolov5/yolov5/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov5/yolov5/utils/torch_utils.py b/asone/detectors/yolov5/yolov5/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6888f4844abc1b7478781ad9f0992e55ebfe6bda --- /dev/null +++ b/asone/detectors/yolov5/yolov5/utils/torch_utils.py @@ -0,0 +1,354 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +PyTorch utils +""" + +import math +import os +import platform +import subprocess +import time +import warnings +from contextlib import contextmanager +from copy import deepcopy +from pathlib import Path + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.parallel import DistributedDataParallel as DDP + +# from utils.general import LOGGER, check_version, colorstr, file_date, git_describe + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + +# Suppress PyTorch warnings +warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling') + + +def smart_DDP(model): + # Model DDP creation with checks + assert not check_version(torch.__version__, '1.12.0', pinned=True), \ + 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \ + 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395' + if check_version(torch.__version__, '1.11.0'): + return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) + else: + return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + # Decorator to make all processes in distributed training wait for each local_master to do something + if local_rank not in [-1, 0]: + dist.barrier(device_ids=[local_rank]) + yield + if local_rank == 0: + dist.barrier(device_ids=[0]) + + +def device_count(): + # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows + assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows' + try: + cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows + return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]) + except Exception: + return 0 + + +def select_device(device='', batch_size=0, newline=True): + # device = None or 'cpu' or 0 or '0' or '0,1,2,3' + s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} ' + device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0' + cpu = device == 'cpu' + mps = device == 'mps' # Apple Metal Performance Shaders (MPS) + if cpu or mps: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + elif device: # non-cpu device requested + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available() + assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \ + f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)" + + if not (cpu or mps) and torch.cuda.is_available(): # prefer GPU if available + devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 + n = len(devices) # device count + if n > 1 and batch_size > 0: # check batch_size is divisible by device_count + assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' + space = ' ' * (len(s) + 1) + for i, d in enumerate(devices): + p = torch.cuda.get_device_properties(i) + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + arg = 'cuda:0' + elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available + s += 'MPS\n' + arg = 'mps' + else: # revert to CPU + s += 'CPU\n' + arg = 'cpu' + + if not newline: + s = s.rstrip() + LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe + return torch.device(arg) + + +def time_sync(): + # PyTorch-accurate time + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() + + +def profile(input, ops, n=10, device=None): + results = [] + if not isinstance(device, torch.device): + device = select_device(device) + print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" + f"{'input':>24s}{'output':>24s}") + + for x in input if isinstance(input, list) else [input]: + x = x.to(device) + x.requires_grad = True + for m in ops if isinstance(ops, list) else [ops]: + m = m.to(device) if hasattr(m, 'to') else m # device + m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m + tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward + try: + flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs + except Exception: + flops = 0 + + try: + for _ in range(n): + t[0] = time_sync() + y = m(x) + t[1] = time_sync() + try: + _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() + t[2] = time_sync() + except Exception: # no backward method + # print(e) # for debug + t[2] = float('nan') + tf += (t[1] - t[0]) * 1000 / n # ms per op forward + tb += (t[2] - t[1]) * 1000 / n # ms per op backward + mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB) + s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes + p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters + print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}') + results.append([p, flops, mem, tf, tb, s_in, s_out]) + except Exception as e: + print(e) + results.append(None) + torch.cuda.empty_cache() + return results + + +def is_parallel(model): + # Returns True if model is of type DP or DDP + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def de_parallel(model): + # De-parallelize a model: returns single-GPU model if model is of type DP or DDP + return model.module if is_parallel(model) else model + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + + +def find_modules(model, mclass=nn.Conv2d): + # Finds layer indices matching module class 'mclass' + return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + + +def sparsity(model): + # Return global model sparsity + a, b = 0, 0 + for p in model.parameters(): + a += p.numel() + b += (p == 0).sum() + return b / a + + +def prune(model, amount=0.3): + # Prune model to requested global sparsity + import torch.nn.utils.prune as prune + print('Pruning model... ', end='') + for name, m in model.named_modules(): + if isinstance(m, nn.Conv2d): + prune.l1_unstructured(m, name='weight', amount=amount) # prune + prune.remove(m, 'weight') # make permanent + print(' %.3g global sparsity' % sparsity(model)) + + +def fuse_conv_and_bn(conv, bn): + # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # Prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # Prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def model_info(model, verbose=False, img_size=640): + # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + if verbose: + print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}") + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + print('%5g %40s %9s %12g %20s %10.3g %10.3g' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + + try: # FLOPs + from thop import profile + stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 + img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input + flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs + img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float + fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs + except Exception: + fs = '' + + name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model' + # LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + + +def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) + # Scales img(bs,3,y,x) by ratio constrained to gs-multiple + if ratio == 1.0: + return img + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, weight_decay=1e-5): + # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay + g = [], [], [] # optimizer parameter groups + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + for v in model.modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay) + g[2].append(v.bias) + if isinstance(v, bn): # weight (no decay) + g[1].append(v.weight) + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) + g[0].append(v.weight) + + if name == 'Adam': + optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum + elif name == 'AdamW': + optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0) + elif name == 'RMSProp': + optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum) + elif name == 'SGD': + optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) + else: + raise NotImplementedError(f'Optimizer {name} not implemented.') + + optimizer.add_param_group({'params': g[0], 'weight_decay': weight_decay}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) + LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " + f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") + return optimizer + + +class EarlyStopping: + # YOLOv5 simple early stopper + def __init__(self, patience=30): + self.best_fitness = 0.0 # i.e. mAP + self.best_epoch = 0 + self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop + self.possible_stop = False # possible stop may occur next epoch + + def __call__(self, epoch, fitness): + if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training + self.best_epoch = epoch + self.best_fitness = fitness + delta = epoch - self.best_epoch # epochs without improvement + self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch + stop = delta >= self.patience # stop training if patience exceeded + if stop: + LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. ' + f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n' + f'To update EarlyStopping(patience={self.patience}) pass a new patience value, ' + f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.') + return stop + + +class ModelEMA: + """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models + Keeps a moving average of everything in the model state_dict (parameters and buffers) + For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + """ + + def __init__(self, model, decay=0.9999, tau=2000, updates=0): + # Create EMA + self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA + # if next(model.parameters()).device.type != 'cpu': + # self.ema.half() # FP16 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = de_parallel(model).state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1 - d) * msd[k].detach() + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) diff --git a/asone/detectors/yolov5/yolov5/utils/yolov5_utils.py b/asone/detectors/yolov5/yolov5/utils/yolov5_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6e4b8b9f393a0e63fca4d5bd1c694344748df163 --- /dev/null +++ b/asone/detectors/yolov5/yolov5/utils/yolov5_utils.py @@ -0,0 +1,222 @@ +import contextlib +import time +import numpy as np +import torch +import torchvision +import cv2 +import sys +from pathlib import Path + + + +def box_area(box): + # box = xyxy(4,n) + return (box[2] - box[0]) * (box[3] - box[1]) + + +def box_iou(box1, box2, eps=1e-7): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps) + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + # prediction = torch.Tensor(prediction) + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.3 + 0.03 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + # print(type(x)) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + # LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + + + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, :4] /= gain + clip_coords(coords, img0_shape) + return coords + + +def clip_coords(boxes, shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + +@contextlib.contextmanager +def yolov5_in_syspath(): + """ + Temporarily add yolov5 folder to `sys.path`. + + torch.hub handles it in the same way: https://github.com/pytorch/pytorch/blob/75024e228ca441290b6a1c2e564300ad507d7af6/torch/hub.py#L387 + + Proper fix for: #22, #134, #353, #1155, #1389, #1680, #2531, #3071 + No need for such workarounds: #869, #1052, #2949 + """ + yolov5_folder_dir = str(Path(__file__).parents[1].absolute()) + try: + sys.path.insert(0, yolov5_folder_dir) + yield + finally: + sys.path.remove(yolov5_folder_dir) \ No newline at end of file diff --git a/asone/detectors/yolov5/yolov5_detector.py b/asone/detectors/yolov5/yolov5_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..b24352c898d926d3af476ec9a486dd7c94796658 --- /dev/null +++ b/asone/detectors/yolov5/yolov5_detector.py @@ -0,0 +1,121 @@ +import os +from asone.utils import get_names +import numpy as np +import warnings +import torch +import onnxruntime + +from asone.detectors.yolov5.yolov5.utils.yolov5_utils import (non_max_suppression, + scale_coords, + letterbox) +from asone.detectors.yolov5.yolov5.models.experimental import attempt_load +from asone import utils + +class YOLOv5Detector: + def __init__(self, + weights=None, + use_onnx=False, + use_cuda=True): + + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + if not os.path.exists(weights): + utils.download_weights(weights) + + # Load Model + self.model = self.load_model(use_cuda, weights) + + def load_model(self, use_cuda, weights, fp16=False): + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = fp16 & ((not self.use_onnx or self.use_onnx) and self.device != 'cpu') + # Load onnx + if self.use_onnx: + if use_cuda: + providers = ['CUDAExecutionProvider','CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + model = onnxruntime.InferenceSession(weights, providers=providers) + #Load Pytorch + else: + model = attempt_load(weights, device=self.device, inplace=True, fuse=True) + model.half() if self.fp16 else model.float() + return model + + def image_preprocessing(self, + image: list, + input_shape=(640, 640))-> list: + + original_image = image.copy() + image = letterbox(image, input_shape, stride=32, auto=False)[0] + image = image.transpose((2, 0, 1))[::-1] + image = np.ascontiguousarray(image, dtype=np.float32) + image /= 255 # 0 - 255 to 0.0 - 1.0 + if len(image.shape) == 3: + image = image[None] # expand for batch dim + return original_image, image + + def detect(self, image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False) -> list: + + # Image Preprocessing + original_image, processed_image = self.image_preprocessing(image, input_shape) + + # Inference + if self.use_onnx: + # Input names of ONNX model on which it is exported + input_name = self.model.get_inputs()[0].name + # Run onnx model + pred = self.model.run([self.model.get_outputs()[0].name], {input_name: processed_image})[0] + # Run Pytorch model + else: + processed_image = torch.from_numpy(processed_image).to(self.device) + # Change image floating point precision if fp16 set to true + processed_image = processed_image.half() if self.fp16 else processed_image.float() + pred = self.model(processed_image, augment=False, visualize=False)[0] + + # Post Processing + if isinstance(pred, np.ndarray): + pred = torch.tensor(pred, device=self.device) + predictions = non_max_suppression(pred, conf_thres, + iou_thres, + agnostic=agnostic_nms, + max_det=max_det) + + for i, prediction in enumerate(predictions): # per image + if len(prediction): + prediction[:, :4] = scale_coords( + processed_image.shape[2:], prediction[:, :4], original_image.shape).round() + predictions[i] = prediction + detections = predictions[0].cpu().numpy() + image_info = { + 'width': original_image.shape[1], + 'height': original_image.shape[0], + } + + self.boxes = detections[:, :4] + self.scores = detections[:, 4:5] + self.class_ids = detections[:, 5:6] + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append(class_names.index(_class.lower())) + else: + warnings.warn(f"class {_class} not found in model classes list.") + + detections = detections[np.in1d(detections[:,5].astype(int), filter_class_idx)] + + return detections, image_info + + \ No newline at end of file diff --git a/asone/detectors/yolov6/__init__.py b/asone/detectors/yolov6/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e56e2e3ff24e7abe6d143059a1dd014494ffe714 --- /dev/null +++ b/asone/detectors/yolov6/__init__.py @@ -0,0 +1,2 @@ +from .yolov6_detector import YOLOv6Detector +__all__ = ['YOLOv6Detector'] \ No newline at end of file diff --git a/asone/detectors/yolov6/yolov6/__init__.py b/asone/detectors/yolov6/yolov6/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov6/yolov6/assigners/__init__.py b/asone/detectors/yolov6/yolov6/assigners/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a4467617ba40e00bee9fdb7426952bd0f55026 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/__init__.py @@ -0,0 +1,2 @@ +from .atss_assigner import ATSSAssigner +from .tal_assigner import TaskAlignedAssigner \ No newline at end of file diff --git a/asone/detectors/yolov6/yolov6/assigners/anchor_generator.py b/asone/detectors/yolov6/yolov6/assigners/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..3704620197b76f9da1fe88d535bad048e2f86c8f --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/anchor_generator.py @@ -0,0 +1,52 @@ +import torch + + +def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.5, device='cpu', is_eval=False): + '''Generate anchors from features.''' + anchors = [] + anchor_points = [] + stride_tensor = [] + num_anchors_list = [] + assert feats is not None + if is_eval: + for i, stride in enumerate(fpn_strides): + _, _, h, w = feats[i].shape + shift_x = torch.arange(end=w, device=device) + grid_cell_offset + shift_y = torch.arange(end=h, device=device) + grid_cell_offset + shift_y, shift_x = torch.meshgrid(shift_y, shift_x) + anchor_point = torch.stack( + [shift_x, shift_y], axis=-1).to(torch.float) + anchor_points.append(anchor_point.reshape([-1, 2])) + stride_tensor.append( + torch.full( + (h * w, 1), stride, dtype=torch.float, device=device)) + anchor_points = torch.cat(anchor_points) + stride_tensor = torch.cat(stride_tensor) + return anchor_points, stride_tensor + else: + for i, stride in enumerate(fpn_strides): + _, _, h, w = feats[i].shape + cell_half_size = grid_cell_size * stride * 0.5 + shift_x = (torch.arange(end=w, device=device) + grid_cell_offset) * stride + shift_y = (torch.arange(end=h, device=device) + grid_cell_offset) * stride + shift_y, shift_x = torch.meshgrid(shift_y, shift_x) + anchor = torch.stack( + [ + shift_x - cell_half_size, shift_y - cell_half_size, + shift_x + cell_half_size, shift_y + cell_half_size + ], + axis=-1).clone().to(feats[0].dtype) + anchor_point = torch.stack( + [shift_x, shift_y], axis=-1).clone().to(feats[0].dtype) + + anchors.append(anchor.reshape([-1, 4])) + anchor_points.append(anchor_point.reshape([-1, 2])) + num_anchors_list.append(len(anchors[-1])) + stride_tensor.append( + torch.full( + [num_anchors_list[-1], 1], stride, dtype=feats[0].dtype)) + anchors = torch.cat(anchors) + anchor_points = torch.cat(anchor_points).to(device) + stride_tensor = torch.cat(stride_tensor).to(device) + return anchors, anchor_points, num_anchors_list, stride_tensor + diff --git a/asone/detectors/yolov6/yolov6/assigners/assigner_utils.py b/asone/detectors/yolov6/yolov6/assigners/assigner_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e833f891b3ddc6e100b8ba949fe21fb74f5688b7 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/assigner_utils.py @@ -0,0 +1,89 @@ +import torch +import torch.nn.functional as F + +def dist_calculator(gt_bboxes, anchor_bboxes): + """compute center distance between all bbox and gt + + Args: + gt_bboxes (Tensor): shape(bs*n_max_boxes, 4) + anchor_bboxes (Tensor): shape(num_total_anchors, 4) + Return: + distances (Tensor): shape(bs*n_max_boxes, num_total_anchors) + ac_points (Tensor): shape(num_total_anchors, 2) + """ + gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 + gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 + gt_points = torch.stack([gt_cx, gt_cy], dim=1) + ac_cx = (anchor_bboxes[:, 0] + anchor_bboxes[:, 2]) / 2.0 + ac_cy = (anchor_bboxes[:, 1] + anchor_bboxes[:, 3]) / 2.0 + ac_points = torch.stack([ac_cx, ac_cy], dim=1) + + distances = (gt_points[:, None, :] - ac_points[None, :, :]).pow(2).sum(-1).sqrt() + + return distances, ac_points + +def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): + """select the positive anchors's center in gt + + Args: + xy_centers (Tensor): shape(bs*n_max_boxes, num_total_anchors, 4) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + Return: + (Tensor): shape(bs, n_max_boxes, num_total_anchors) + """ + n_anchors = xy_centers.size(0) + bs, n_max_boxes, _ = gt_bboxes.size() + _gt_bboxes = gt_bboxes.reshape([-1, 4]) + xy_centers = xy_centers.unsqueeze(0).repeat(bs * n_max_boxes, 1, 1) + gt_bboxes_lt = _gt_bboxes[:, 0:2].unsqueeze(1).repeat(1, n_anchors, 1) + gt_bboxes_rb = _gt_bboxes[:, 2:4].unsqueeze(1).repeat(1, n_anchors, 1) + b_lt = xy_centers - gt_bboxes_lt + b_rb = gt_bboxes_rb - xy_centers + bbox_deltas = torch.cat([b_lt, b_rb], dim=-1) + bbox_deltas = bbox_deltas.reshape([bs, n_max_boxes, n_anchors, -1]) + return (bbox_deltas.min(axis=-1)[0] > eps).to(gt_bboxes.dtype) + +def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): + """if an anchor box is assigned to multiple gts, + the one with the highest iou will be selected. + + Args: + mask_pos (Tensor): shape(bs, n_max_boxes, num_total_anchors) + overlaps (Tensor): shape(bs, n_max_boxes, num_total_anchors) + Return: + target_gt_idx (Tensor): shape(bs, num_total_anchors) + fg_mask (Tensor): shape(bs, num_total_anchors) + mask_pos (Tensor): shape(bs, n_max_boxes, num_total_anchors) + """ + fg_mask = mask_pos.sum(axis=-2) + if fg_mask.max() > 1: + mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) + max_overlaps_idx = overlaps.argmax(axis=1) + is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) + is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) + mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) + fg_mask = mask_pos.sum(axis=-2) + target_gt_idx = mask_pos.argmax(axis=-2) + return target_gt_idx, fg_mask , mask_pos + +def iou_calculator(box1, box2, eps=1e-9): + """Calculate iou for batch + + Args: + box1 (Tensor): shape(bs, n_max_boxes, 1, 4) + box2 (Tensor): shape(bs, 1, num_total_anchors, 4) + Return: + (Tensor): shape(bs, n_max_boxes, num_total_anchors) + """ + box1 = box1.unsqueeze(2) # [N, M1, 4] -> [N, M1, 1, 4] + box2 = box2.unsqueeze(1) # [N, M2, 4] -> [N, 1, M2, 4] + px1y1, px2y2 = box1[:, :, :, 0:2], box1[:, :, :, 2:4] + gx1y1, gx2y2 = box2[:, :, :, 0:2], box2[:, :, :, 2:4] + x1y1 = torch.maximum(px1y1, gx1y1) + x2y2 = torch.minimum(px2y2, gx2y2) + overlap = (x2y2 - x1y1).clip(0).prod(-1) + area1 = (px2y2 - px1y1).clip(0).prod(-1) + area2 = (gx2y2 - gx1y1).clip(0).prod(-1) + union = area1 + area2 - overlap + eps + + return overlap / union \ No newline at end of file diff --git a/asone/detectors/yolov6/yolov6/assigners/atss_assigner.py b/asone/detectors/yolov6/yolov6/assigners/atss_assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..c93a5b54c442d4dbdb182c9a1978203c8392f773 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/atss_assigner.py @@ -0,0 +1,163 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from yolov6.assigners.iou2d_calculator import iou2d_calculator +from yolov6.assigners.assigner_utils import dist_calculator, select_candidates_in_gts, select_highest_overlaps, iou_calculator + +class ATSSAssigner(nn.Module): + '''Adaptive Training Sample Selection Assigner''' + def __init__(self, + topk=9, + num_classes=80): + super(ATSSAssigner, self).__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + + @torch.no_grad() + def forward(self, + anc_bboxes, + n_level_bboxes, + gt_labels, + gt_bboxes, + mask_gt, + pd_bboxes): + r"""This code is based on + https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py + + Args: + anc_bboxes (Tensor): shape(num_total_anchors, 4) + n_level_bboxes (List):len(3) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + pd_bboxes (Tensor): shape(bs, n_max_boxes, 4) + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + """ + self.n_anchors = anc_bboxes.size(0) + self.bs = gt_bboxes.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return torch.full( [self.bs, self.n_anchors], self.bg_idx).to(device), \ + torch.zeros([self.bs, self.n_anchors, 4]).to(device), \ + torch.zeros([self.bs, self.n_anchors, self.num_classes]).to(device), \ + torch.zeros([self.bs, self.n_anchors]).to(device) + + + overlaps = iou2d_calculator(gt_bboxes.reshape([-1, 4]), anc_bboxes) + overlaps = overlaps.reshape([self.bs, -1, self.n_anchors]) + + distances, ac_points = dist_calculator(gt_bboxes.reshape([-1, 4]), anc_bboxes) + distances = distances.reshape([self.bs, -1, self.n_anchors]) + + is_in_candidate, candidate_idxs = self.select_topk_candidates( + distances, n_level_bboxes, mask_gt) + + overlaps_thr_per_gt, iou_candidates = self.thres_calculator( + is_in_candidate, candidate_idxs, overlaps) + + # select candidates iou >= threshold as positive + is_pos = torch.where( + iou_candidates > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]), + is_in_candidate, torch.zeros_like(is_in_candidate)) + + is_in_gts = select_candidates_in_gts(ac_points, gt_bboxes) + mask_pos = is_pos * is_in_gts * mask_gt + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps( + mask_pos, overlaps, self.n_max_boxes) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets( + gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # soft label with iou + if pd_bboxes is not None: + ious = iou_calculator(gt_bboxes, pd_bboxes) * mask_pos + ious = ious.max(axis=-2)[0].unsqueeze(-1) + target_scores *= ious + + return target_labels.long(), target_bboxes, target_scores, fg_mask.bool() + + def select_topk_candidates(self, + distances, + n_level_bboxes, + mask_gt): + + mask_gt = mask_gt.repeat(1, 1, self.topk).bool() + level_distances = torch.split(distances, n_level_bboxes, dim=-1) + is_in_candidate_list = [] + candidate_idxs = [] + start_idx = 0 + for per_level_distances, per_level_boxes in zip(level_distances, n_level_bboxes): + + end_idx = start_idx + per_level_boxes + selected_k = min(self.topk, per_level_boxes) + _, per_level_topk_idxs = per_level_distances.topk(selected_k, dim=-1, largest=False) + candidate_idxs.append(per_level_topk_idxs + start_idx) + per_level_topk_idxs = torch.where(mask_gt, + per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs)) + is_in_candidate = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2) + is_in_candidate = torch.where(is_in_candidate > 1, + torch.zeros_like(is_in_candidate), is_in_candidate) + is_in_candidate_list.append(is_in_candidate.to(distances.dtype)) + start_idx = end_idx + + is_in_candidate_list = torch.cat(is_in_candidate_list, dim=-1) + candidate_idxs = torch.cat(candidate_idxs, dim=-1) + + return is_in_candidate_list, candidate_idxs + + def thres_calculator(self, + is_in_candidate, + candidate_idxs, + overlaps): + + n_bs_max_boxes = self.bs * self.n_max_boxes + _candidate_overlaps = torch.where(is_in_candidate > 0, + overlaps, torch.zeros_like(overlaps)) + candidate_idxs = candidate_idxs.reshape([n_bs_max_boxes, -1]) + assist_idxs = self.n_anchors * torch.arange(n_bs_max_boxes, device=candidate_idxs.device) + assist_idxs = assist_idxs[:,None] + faltten_idxs = candidate_idxs + assist_idxs + candidate_overlaps = _candidate_overlaps.reshape(-1)[faltten_idxs] + candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1]) + + overlaps_mean_per_gt = candidate_overlaps.mean(axis=-1, keepdim=True) + overlaps_std_per_gt = candidate_overlaps.std(axis=-1, keepdim=True) + overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt + + return overlaps_thr_per_gt, _candidate_overlaps + + def get_targets(self, + gt_labels, + gt_bboxes, + target_gt_idx, + fg_mask): + + # assigned target labels + batch_idx = torch.arange(self.bs, dtype=gt_labels.dtype, device=gt_labels.device) + batch_idx = batch_idx[...,None] + target_gt_idx = (target_gt_idx + batch_idx * self.n_max_boxes).long() + target_labels = gt_labels.flatten()[target_gt_idx.flatten()] + target_labels = target_labels.reshape([self.bs, self.n_anchors]) + target_labels = torch.where(fg_mask > 0, + target_labels, torch.full_like(target_labels, self.bg_idx)) + + # assigned target boxes + target_bboxes = gt_bboxes.reshape([-1, 4])[target_gt_idx.flatten()] + target_bboxes = target_bboxes.reshape([self.bs, self.n_anchors, 4]) + + # assigned target scores + target_scores = F.one_hot(target_labels.long(), self.num_classes + 1).float() + target_scores = target_scores[:, :, :self.num_classes] + + return target_labels, target_bboxes, target_scores + + diff --git a/asone/detectors/yolov6/yolov6/assigners/iou2d_calculator.py b/asone/detectors/yolov6/yolov6/assigners/iou2d_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..63768015b87d5d48a309103831703871b3647658 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/iou2d_calculator.py @@ -0,0 +1,249 @@ +#This code is based on +#https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/iou_calculators/iou2d_calculator.py + +import torch + + +def cast_tensor_type(x, scale=1., dtype=None): + if dtype == 'fp16': + # scale is for preventing overflows + x = (x / scale).half() + return x + + +def fp16_clamp(x, min=None, max=None): + if not x.is_cuda and x.dtype == torch.float16: + # clamp for cpu float16, tensor fp16 has no clamp implementation + return x.float().clamp(min, max).half() + + return x.clamp(min, max) + + +def iou2d_calculator(bboxes1, bboxes2, mode='iou', is_aligned=False, scale=1., dtype=None): + """2D Overlaps (e.g. IoUs, GIoUs) Calculator.""" + + """Calculate IoU between 2D bboxes. + + Args: + bboxes1 (Tensor): bboxes have shape (m, 4) in + format, or shape (m, 5) in format. + bboxes2 (Tensor): bboxes have shape (m, 4) in + format, shape (m, 5) in format, or be + empty. If ``is_aligned `` is ``True``, then m and n must be + equal. + mode (str): "iou" (intersection over union), "iof" (intersection + over foreground), or "giou" (generalized intersection over + union). + is_aligned (bool, optional): If True, then m and n must be equal. + Default False. + + Returns: + Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,) + """ + assert bboxes1.size(-1) in [0, 4, 5] + assert bboxes2.size(-1) in [0, 4, 5] + if bboxes2.size(-1) == 5: + bboxes2 = bboxes2[..., :4] + if bboxes1.size(-1) == 5: + bboxes1 = bboxes1[..., :4] + + if dtype == 'fp16': + # change tensor type to save cpu and cuda memory and keep speed + bboxes1 = cast_tensor_type(bboxes1, scale, dtype) + bboxes2 = cast_tensor_type(bboxes2, scale, dtype) + overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned) + if not overlaps.is_cuda and overlaps.dtype == torch.float16: + # resume cpu float32 + overlaps = overlaps.float() + return overlaps + + return bbox_overlaps(bboxes1, bboxes2, mode, is_aligned) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6): + """Calculate overlap between two set of bboxes. + + FP16 Contributed by https://github.com/open-mmlab/mmdetection/pull/4889 + Note: + Assume bboxes1 is M x 4, bboxes2 is N x 4, when mode is 'iou', + there are some new generated variable when calculating IOU + using bbox_overlaps function: + + 1) is_aligned is False + area1: M x 1 + area2: N x 1 + lt: M x N x 2 + rb: M x N x 2 + wh: M x N x 2 + overlap: M x N x 1 + union: M x N x 1 + ious: M x N x 1 + + Total memory: + S = (9 x N x M + N + M) * 4 Byte, + + When using FP16, we can reduce: + R = (9 x N x M + N + M) * 4 / 2 Byte + R large than (N + M) * 4 * 2 is always true when N and M >= 1. + Obviously, N + M <= N * M < 3 * N * M, when N >=2 and M >=2, + N + 1 < 3 * N, when N or M is 1. + + Given M = 40 (ground truth), N = 400000 (three anchor boxes + in per grid, FPN, R-CNNs), + R = 275 MB (one times) + + A special case (dense detection), M = 512 (ground truth), + R = 3516 MB = 3.43 GB + + When the batch size is B, reduce: + B x R + + Therefore, CUDA memory runs out frequently. + + Experiments on GeForce RTX 2080Ti (11019 MiB): + + | dtype | M | N | Use | Real | Ideal | + |:----:|:----:|:----:|:----:|:----:|:----:| + | FP32 | 512 | 400000 | 8020 MiB | -- | -- | + | FP16 | 512 | 400000 | 4504 MiB | 3516 MiB | 3516 MiB | + | FP32 | 40 | 400000 | 1540 MiB | -- | -- | + | FP16 | 40 | 400000 | 1264 MiB | 276MiB | 275 MiB | + + 2) is_aligned is True + area1: N x 1 + area2: N x 1 + lt: N x 2 + rb: N x 2 + wh: N x 2 + overlap: N x 1 + union: N x 1 + ious: N x 1 + + Total memory: + S = 11 x N * 4 Byte + + When using FP16, we can reduce: + R = 11 x N * 4 / 2 Byte + + So do the 'giou' (large than 'iou'). + + Time-wise, FP16 is generally faster than FP32. + + When gpu_assign_thr is not -1, it takes more time on cpu + but not reduce memory. + There, we can reduce half the memory and keep the speed. + + If ``is_aligned`` is ``False``, then calculate the overlaps between each + bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned + pair of bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (B, m, 4) in format or empty. + bboxes2 (Tensor): shape (B, n, 4) in format or empty. + B indicates the batch dim, in shape (B1, B2, ..., Bn). + If ``is_aligned`` is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union), "iof" (intersection over + foreground) or "giou" (generalized intersection over union). + Default "iou". + is_aligned (bool, optional): If True, then m and n must be equal. + Default False. + eps (float, optional): A value added to the denominator for numerical + stability. Default 1e-6. + + Returns: + Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> overlaps = bbox_overlaps(bboxes1, bboxes2) + >>> assert overlaps.shape == (3, 3) + >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True) + >>> assert overlaps.shape == (3, ) + + Example: + >>> empty = torch.empty(0, 4) + >>> nonempty = torch.FloatTensor([[0, 0, 10, 9]]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + assert mode in ['iou', 'iof', 'giou'], f'Unsupported mode {mode}' + # Either the boxes are empty or the length of boxes' last dimension is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + + # Batch dim must be the same + # Batch dim: (B1, B2, ... Bn) + assert bboxes1.shape[:-2] == bboxes2.shape[:-2] + batch_shape = bboxes1.shape[:-2] + + rows = bboxes1.size(-2) + cols = bboxes2.size(-2) + if is_aligned: + assert rows == cols + + if rows * cols == 0: + if is_aligned: + return bboxes1.new(batch_shape + (rows, )) + else: + return bboxes1.new(batch_shape + (rows, cols)) + + area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * ( + bboxes1[..., 3] - bboxes1[..., 1]) + area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * ( + bboxes2[..., 3] - bboxes2[..., 1]) + + if is_aligned: + lt = torch.max(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2] + rb = torch.min(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2] + + wh = fp16_clamp(rb - lt, min=0) + overlap = wh[..., 0] * wh[..., 1] + + if mode in ['iou', 'giou']: + union = area1 + area2 - overlap + else: + union = area1 + if mode == 'giou': + enclosed_lt = torch.min(bboxes1[..., :2], bboxes2[..., :2]) + enclosed_rb = torch.max(bboxes1[..., 2:], bboxes2[..., 2:]) + else: + lt = torch.max(bboxes1[..., :, None, :2], + bboxes2[..., None, :, :2]) # [B, rows, cols, 2] + rb = torch.min(bboxes1[..., :, None, 2:], + bboxes2[..., None, :, 2:]) # [B, rows, cols, 2] + + wh = fp16_clamp(rb - lt, min=0) + overlap = wh[..., 0] * wh[..., 1] + + if mode in ['iou', 'giou']: + union = area1[..., None] + area2[..., None, :] - overlap + else: + union = area1[..., None] + if mode == 'giou': + enclosed_lt = torch.min(bboxes1[..., :, None, :2], + bboxes2[..., None, :, :2]) + enclosed_rb = torch.max(bboxes1[..., :, None, 2:], + bboxes2[..., None, :, 2:]) + + eps = union.new_tensor([eps]) + union = torch.max(union, eps) + ious = overlap / union + if mode in ['iou', 'iof']: + return ious + # calculate gious + enclose_wh = fp16_clamp(enclosed_rb - enclosed_lt, min=0) + enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] + enclose_area = torch.max(enclose_area, eps) + gious = ious - (enclose_area - union) / enclose_area + return gious diff --git a/asone/detectors/yolov6/yolov6/assigners/tal_assigner.py b/asone/detectors/yolov6/yolov6/assigners/tal_assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..bcb35cdb648ae53d92ea6b53ec22091e13ba5cbf --- /dev/null +++ b/asone/detectors/yolov6/yolov6/assigners/tal_assigner.py @@ -0,0 +1,151 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from yolov6.assigners.assigner_utils import select_candidates_in_gts, select_highest_overlaps, iou_calculator + +class TaskAlignedAssigner(nn.Module): + def __init__(self, + topk=13, + num_classes=80, + alpha=1.0, + beta=6.0, + eps=1e-9): + super(TaskAlignedAssigner, self).__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + @torch.no_grad() + def forward(self, + pd_scores, + pd_bboxes, + anc_points, + gt_labels, + gt_bboxes, + mask_gt): + """This code referenced to + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + """ + self.bs = pd_scores.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), \ + torch.zeros_like(pd_bboxes).to(device), \ + torch.zeros_like(pd_scores).to(device), \ + torch.zeros_like(pd_scores[..., 0]).to(device) + + + mask_pos, align_metric, overlaps = self.get_pos_mask( + pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt) + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps( + mask_pos, overlaps, self.n_max_boxes) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets( + gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.max(axis=-1, keepdim=True)[0] + pos_overlaps = (overlaps * mask_pos).max(axis=-1, keepdim=True)[0] + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).max(-2)[0].unsqueeze(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, fg_mask.bool() + + def get_pos_mask(self, + pd_scores, + pd_bboxes, + gt_labels, + gt_bboxes, + anc_points, + mask_gt): + + # get anchor_align metric + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) + # get in_gts mask + mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) + # get topk_metric mask + mask_topk = self.select_topk_candidates( + align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) + # merge all mask to a final mask + mask_pos = mask_topk * mask_in_gts * mask_gt + + return mask_pos, align_metric, overlaps + + def get_box_metrics(self, + pd_scores, + pd_bboxes, + gt_labels, + gt_bboxes): + + pd_scores = pd_scores.permute(0, 2, 1) + gt_labels = gt_labels.to(torch.long) + ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) + ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) + ind[1] = gt_labels.squeeze(-1) + bbox_scores = pd_scores[ind[0], ind[1]] + + overlaps = iou_calculator(gt_bboxes, pd_bboxes) + align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) + + return align_metric, overlaps + + def select_topk_candidates(self, + metrics, + largest=True, + topk_mask=None): + + num_anchors = metrics.shape[-1] + topk_metrics, topk_idxs = torch.topk( + metrics, self.topk, axis=-1, largest=largest) + if topk_mask is None: + topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > self.eps).tile( + [1, 1, self.topk]) + topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs)) + is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2) + is_in_topk = torch.where(is_in_topk > 1, + torch.zeros_like(is_in_topk), is_in_topk) + return is_in_topk.to(metrics.dtype) + + def get_targets(self, + gt_labels, + gt_bboxes, + target_gt_idx, + fg_mask): + + # assigned target labels + batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[...,None] + target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes + target_labels = gt_labels.long().flatten()[target_gt_idx] + + # assigned target boxes + target_bboxes = gt_bboxes.reshape([-1, 4])[target_gt_idx] + + # assigned target scores + target_labels[target_labels<0] = 0 + target_scores = F.one_hot(target_labels, self.num_classes) + fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) + target_scores = torch.where(fg_scores_mask > 0, target_scores, + torch.full_like(target_scores, 0)) + + return target_labels, target_bboxes, target_scores \ No newline at end of file diff --git a/asone/detectors/yolov6/yolov6/layers/__init__.py b/asone/detectors/yolov6/yolov6/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov6/yolov6/layers/common.py b/asone/detectors/yolov6/yolov6/layers/common.py new file mode 100644 index 0000000000000000000000000000000000000000..335f23a11ad9ddc7d45b37398cd1c663b49c7117 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/layers/common.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import warnings +from pathlib import Path + +import numpy as np +import torch +import torch.nn as nn +from torch.nn.parameter import Parameter +import torch.nn.init as init + + +class SiLU(nn.Module): + '''Activation of SiLU''' + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +class Conv(nn.Module): + '''Normal Conv with SiLU activation''' + def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False): + super().__init__() + padding = kernel_size // 2 + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=bias, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.act = nn.SiLU() + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + return self.act(self.conv(x)) + + +class SimConv(nn.Module): + '''Normal Conv with ReLU activation''' + def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False): + super().__init__() + padding = kernel_size // 2 + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=bias, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.act = nn.ReLU() + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + return self.act(self.conv(x)) + +class ConvWrapper(nn.Module): + '''Wrapper for normal Conv with SiLU activation''' + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1, bias=True): + super().__init__() + self.block = Conv(in_channels, out_channels, kernel_size, stride, groups, bias) + + def forward(self, x): + return self.block(x) + + +class SimConvWrapper(nn.Module): + '''Wrapper for normal Conv with ReLU activation''' + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1, bias=True): + super().__init__() + self.block = SimConv(in_channels, out_channels, kernel_size, stride, groups, bias) + + def forward(self, x): + return self.block(x) + + +class SimSPPF(nn.Module): + '''Simplified SPPF with ReLU activation''' + def __init__(self, in_channels, out_channels, kernel_size=5): + super().__init__() + c_ = in_channels // 2 # hidden channels + self.cv1 = SimConv(in_channels, c_, 1, 1) + self.cv2 = SimConv(c_ * 4, out_channels, 1, 1) + self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) + + +class SPPF(nn.Module): + '''Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher''' + def __init__(self, in_channels, out_channels, kernel_size=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = in_channels // 2 # hidden channels + self.cv1 = Conv(in_channels, c_, 1, 1) + self.cv2 = Conv(c_ * 4, out_channels, 1, 1) + self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) + + +class Transpose(nn.Module): + '''Normal Transpose, default for upsampling''' + def __init__(self, in_channels, out_channels, kernel_size=2, stride=2): + super().__init__() + self.upsample_transpose = torch.nn.ConvTranspose2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + bias=True + ) + + def forward(self, x): + return self.upsample_transpose(x) + + +class Concat(nn.Module): + def __init__(self, dimension=1): + super().__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): + '''Basic cell for rep-style block, including conv and bn''' + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + return result + + +class RepVGGBlock(nn.Module): + '''RepVGGBlock is a basic rep-style block, including training and deploy status + This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py + ''' + def __init__(self, in_channels, out_channels, kernel_size=3, + stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False): + super(RepVGGBlock, self).__init__() + """ Initialization of the class. + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 1 + dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + padding_mode (string, optional): Default: 'zeros' + deploy: Whether to be deploy status or training status. Default: False + use_se: Whether to use se. Default: False + """ + self.deploy = deploy + self.groups = groups + self.in_channels = in_channels + self.out_channels = out_channels + + assert kernel_size == 3 + assert padding == 1 + + padding_11 = padding - kernel_size // 2 + + self.nonlinearity = nn.ReLU() + + if use_se: + raise NotImplementedError("se block not supported yet") + else: + self.se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + + else: + self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + + def forward(self, inputs): + '''Forward process''' + if hasattr(self, 'rbr_reparam'): + return self.nonlinearity(self.se(self.rbr_reparam(inputs))) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + + return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) + + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, 'id_tensor'): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels, + kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + for para in self.parameters(): + para.detach_() + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + self.deploy = True + + +class RealVGGBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, + dilation=1, groups=1, padding_mode='zeros', use_se=False, + ): + super(RealVGGBlock, self).__init__() + self.relu = nn.ReLU() + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + + if use_se: + raise NotImplementedError("se block not supported yet") + else: + self.se = nn.Identity() + + def forward(self, inputs): + out = self.relu(self.se(self.bn(self.conv(inputs)))) + return out + + +class ScaleLayer(torch.nn.Module): + + def __init__(self, num_features, use_bias=True, scale_init=1.0): + super(ScaleLayer, self).__init__() + self.weight = Parameter(torch.Tensor(num_features)) + init.constant_(self.weight, scale_init) + self.num_features = num_features + if use_bias: + self.bias = Parameter(torch.Tensor(num_features)) + init.zeros_(self.bias) + else: + self.bias = None + + def forward(self, inputs): + if self.bias is None: + return inputs * self.weight.view(1, self.num_features, 1, 1) + else: + return inputs * self.weight.view(1, self.num_features, 1, 1) + self.bias.view(1, self.num_features, 1, 1) + + +# A CSLA block is a LinearAddBlock with is_csla=True +class LinearAddBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, + dilation=1, groups=1, padding_mode='zeros', use_se=False, is_csla=False, conv_scale_init=1.0): + super(LinearAddBlock, self).__init__() + self.in_channels = in_channels + self.relu = nn.ReLU() + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.scale_conv = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=conv_scale_init) + self.conv_1x1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=0, bias=False) + self.scale_1x1 = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=conv_scale_init) + if in_channels == out_channels and stride == 1: + self.scale_identity = ScaleLayer(num_features=out_channels, use_bias=False, scale_init=1.0) + self.bn = nn.BatchNorm2d(out_channels) + if is_csla: # Make them constant + self.scale_1x1.requires_grad_(False) + self.scale_conv.requires_grad_(False) + if use_se: + raise NotImplementedError("se block not supported yet") + else: + self.se = nn.Identity() + + def forward(self, inputs): + out = self.scale_conv(self.conv(inputs)) + self.scale_1x1(self.conv_1x1(inputs)) + if hasattr(self, 'scale_identity'): + out += self.scale_identity(inputs) + out = self.relu(self.se(self.bn(out))) + return out + + +class DetectBackend(nn.Module): + def __init__(self, weights='yolov6s.pt', device=None, dnn=True): + + super().__init__() + assert isinstance(weights, str) and Path(weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.' + from yolov6.utils.checkpoint import load_checkpoint + model = load_checkpoint(weights, map_location=device) + stride = int(model.stride.max()) + self.__dict__.update(locals()) # assign all variables to self + + def forward(self, im, val=False): + y, _ = self.model(im) + if isinstance(y, np.ndarray): + y = torch.tensor(y, device=self.device) + return y + + +class RepBlock(nn.Module): + ''' + RepBlock is a stage block with rep-style basic block + ''' + def __init__(self, in_channels, out_channels, n=1, block=RepVGGBlock, basic_block=RepVGGBlock): + super().__init__() + + self.conv1 = block(in_channels, out_channels) + self.block = nn.Sequential(*(block(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None + if block == BottleRep: + self.conv1 = BottleRep(in_channels, out_channels, basic_block=basic_block, weight=True) + n = n // 2 + self.block = nn.Sequential(*(BottleRep(out_channels, out_channels, basic_block=basic_block, weight=True) for _ in range(n - 1))) if n > 1 else None + + def forward(self, x): + x = self.conv1(x) + if self.block is not None: + x = self.block(x) + return x + + +class BottleRep(nn.Module): + + def __init__(self, in_channels, out_channels, basic_block=RepVGGBlock, weight=False): + super().__init__() + self.conv1 = basic_block(in_channels, out_channels) + self.conv2 = basic_block(out_channels, out_channels) + if in_channels != out_channels: + self.shortcut = False + else: + self.shortcut = True + if weight: + self.alpha = Parameter(torch.ones(1)) + else: + self.alpha = 1.0 + + def forward(self, x): + outputs = self.conv1(x) + outputs = self.conv2(outputs) + return outputs + self.alpha * x if self.shortcut else outputs + + + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class Conv_C3(nn.Module): + '''Standard convolution in BepC3-Block''' + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.ReLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + def forward(self, x): + return self.act(self.bn(self.conv(x))) + def forward_fuse(self, x): + return self.act(self.conv(x)) + + +class BepC3(nn.Module): + '''Beer-mug RepC3 Block''' + def __init__(self, in_channels, out_channels, n=1, e=0.5, concat=True, block=RepVGGBlock): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(out_channels * e) # hidden channels + self.cv1 = Conv_C3(in_channels, c_, 1, 1) + self.cv2 = Conv_C3(in_channels, c_, 1, 1) + self.cv3 = Conv_C3(2 * c_, out_channels, 1, 1) + if block == ConvWrapper: + self.cv1 = Conv_C3(in_channels, c_, 1, 1, act=nn.SiLU()) + self.cv2 = Conv_C3(in_channels, c_, 1, 1, act=nn.SiLU()) + self.cv3 = Conv_C3(2 * c_, out_channels, 1, 1, act=nn.SiLU()) + + self.m = RepBlock(in_channels=c_, out_channels=c_, n=n, block=BottleRep, basic_block=block) + self.concat = concat + if not concat: + self.cv3 = Conv_C3(c_, out_channels, 1, 1) + + def forward(self, x): + if self.concat is True: + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) + else: + return self.cv3(self.m(self.cv1(x))) + + +def get_block(mode): + if mode == 'repvgg': + return RepVGGBlock + elif mode == 'hyper_search': + return LinearAddBlock + elif mode == 'repopt': + return RealVGGBlock + elif mode == 'conv_relu': + return SimConvWrapper + elif mode == 'conv_silu': + return ConvWrapper + else: + raise NotImplementedError("Undefied Repblock choice for mode {}".format(mode)) diff --git a/asone/detectors/yolov6/yolov6/layers/dbb_transforms.py b/asone/detectors/yolov6/yolov6/layers/dbb_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..cd93d0e23ad459d3cfa8d1a608383bbcb3a0cbfb --- /dev/null +++ b/asone/detectors/yolov6/yolov6/layers/dbb_transforms.py @@ -0,0 +1,50 @@ +import torch +import numpy as np +import torch.nn.functional as F + + +def transI_fusebn(kernel, bn): + gamma = bn.weight + std = (bn.running_var + bn.eps).sqrt() + return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std + + +def transII_addbranch(kernels, biases): + return sum(kernels), sum(biases) + + +def transIII_1x1_kxk(k1, b1, k2, b2, groups): + if groups == 1: + k = F.conv2d(k2, k1.permute(1, 0, 2, 3)) # + b_hat = (k2 * b1.reshape(1, -1, 1, 1)).sum((1, 2, 3)) + else: + k_slices = [] + b_slices = [] + k1_T = k1.permute(1, 0, 2, 3) + k1_group_width = k1.size(0) // groups + k2_group_width = k2.size(0) // groups + for g in range(groups): + k1_T_slice = k1_T[:, g*k1_group_width:(g+1)*k1_group_width, :, :] + k2_slice = k2[g*k2_group_width:(g+1)*k2_group_width, :, :, :] + k_slices.append(F.conv2d(k2_slice, k1_T_slice)) + b_slices.append((k2_slice * b1[g * k1_group_width:(g+1) * k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3))) + k, b_hat = transIV_depthconcat(k_slices, b_slices) + return k, b_hat + b2 + + +def transIV_depthconcat(kernels, biases): + return torch.cat(kernels, dim=0), torch.cat(biases) + + +def transV_avg(channels, kernel_size, groups): + input_dim = channels // groups + k = torch.zeros((channels, input_dim, kernel_size, kernel_size)) + k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2 + return k + + +# This has not been tested with non-square kernels (kernel.size(2) != kernel.size(3)) nor even-size kernels +def transVI_multiscale(kernel, target_kernel_size): + H_pixels_to_pad = (target_kernel_size - kernel.size(2)) // 2 + W_pixels_to_pad = (target_kernel_size - kernel.size(3)) // 2 + return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad]) diff --git a/asone/detectors/yolov6/yolov6/models/__init__.py b/asone/detectors/yolov6/yolov6/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov6/yolov6/models/efficientrep.py b/asone/detectors/yolov6/yolov6/models/efficientrep.py new file mode 100644 index 0000000000000000000000000000000000000000..1c6556e9b60f3bfa3d8ed5f4c8ce1dfc89cc7f15 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/efficientrep.py @@ -0,0 +1,220 @@ +from torch import nn +from yolov6.layers.common import BottleRep, RepVGGBlock, RepBlock, BepC3, SimSPPF, SPPF, ConvWrapper + + +class EfficientRep(nn.Module): + '''EfficientRep Backbone + EfficientRep is handcrafted by hardware-aware neural network design. + With rep-style struct, EfficientRep is friendly to high-computation hardware(e.g. GPU). + ''' + + def __init__( + self, + in_channels=3, + channels_list=None, + num_repeats=None, + block=RepVGGBlock + ): + super().__init__() + + assert channels_list is not None + assert num_repeats is not None + + self.stem = block( + in_channels=in_channels, + out_channels=channels_list[0], + kernel_size=3, + stride=2 + ) + + self.ERBlock_2 = nn.Sequential( + block( + in_channels=channels_list[0], + out_channels=channels_list[1], + kernel_size=3, + stride=2 + ), + RepBlock( + in_channels=channels_list[1], + out_channels=channels_list[1], + n=num_repeats[1], + block=block, + ) + ) + + self.ERBlock_3 = nn.Sequential( + block( + in_channels=channels_list[1], + out_channels=channels_list[2], + kernel_size=3, + stride=2 + ), + RepBlock( + in_channels=channels_list[2], + out_channels=channels_list[2], + n=num_repeats[2], + block=block, + ) + ) + + self.ERBlock_4 = nn.Sequential( + block( + in_channels=channels_list[2], + out_channels=channels_list[3], + kernel_size=3, + stride=2 + ), + RepBlock( + in_channels=channels_list[3], + out_channels=channels_list[3], + n=num_repeats[3], + block=block, + ) + ) + + self.ERBlock_5 = nn.Sequential( + block( + in_channels=channels_list[3], + out_channels=channels_list[4], + kernel_size=3, + stride=2, + ), + RepBlock( + in_channels=channels_list[4], + out_channels=channels_list[4], + n=num_repeats[4], + block=block, + ), + SimSPPF( + in_channels=channels_list[4], + out_channels=channels_list[4], + kernel_size=5 + ) + ) + + def forward(self, x): + + outputs = [] + x = self.stem(x) + x = self.ERBlock_2(x) + x = self.ERBlock_3(x) + outputs.append(x) + x = self.ERBlock_4(x) + outputs.append(x) + x = self.ERBlock_5(x) + outputs.append(x) + + return tuple(outputs) + + +class CSPBepBackbone(nn.Module): + """ + CSPBepBackbone module. + """ + + def __init__( + self, + in_channels=3, + channels_list=None, + num_repeats=None, + block=RepVGGBlock, + csp_e=float(1)/2, + ): + super().__init__() + + assert channels_list is not None + assert num_repeats is not None + + self.stem = block( + in_channels=in_channels, + out_channels=channels_list[0], + kernel_size=3, + stride=2 + ) + + self.ERBlock_2 = nn.Sequential( + block( + in_channels=channels_list[0], + out_channels=channels_list[1], + kernel_size=3, + stride=2 + ), + BepC3( + in_channels=channels_list[1], + out_channels=channels_list[1], + n=num_repeats[1], + e=csp_e, + block=block, + ) + ) + + self.ERBlock_3 = nn.Sequential( + block( + in_channels=channels_list[1], + out_channels=channels_list[2], + kernel_size=3, + stride=2 + ), + BepC3( + in_channels=channels_list[2], + out_channels=channels_list[2], + n=num_repeats[2], + e=csp_e, + block=block, + ) + ) + + self.ERBlock_4 = nn.Sequential( + block( + in_channels=channels_list[2], + out_channels=channels_list[3], + kernel_size=3, + stride=2 + ), + BepC3( + in_channels=channels_list[3], + out_channels=channels_list[3], + n=num_repeats[3], + e=csp_e, + block=block, + ) + ) + + channel_merge_layer = SimSPPF + if block == ConvWrapper: + channel_merge_layer = SPPF + + self.ERBlock_5 = nn.Sequential( + block( + in_channels=channels_list[3], + out_channels=channels_list[4], + kernel_size=3, + stride=2, + ), + BepC3( + in_channels=channels_list[4], + out_channels=channels_list[4], + n=num_repeats[4], + e=csp_e, + block=block, + ), + channel_merge_layer( + in_channels=channels_list[4], + out_channels=channels_list[4], + kernel_size=5 + ) + ) + + def forward(self, x): + + outputs = [] + x = self.stem(x) + x = self.ERBlock_2(x) + x = self.ERBlock_3(x) + outputs.append(x) + x = self.ERBlock_4(x) + outputs.append(x) + x = self.ERBlock_5(x) + outputs.append(x) + + return tuple(outputs) diff --git a/asone/detectors/yolov6/yolov6/models/effidehead.py b/asone/detectors/yolov6/yolov6/models/effidehead.py new file mode 100644 index 0000000000000000000000000000000000000000..7d00e218219e2fe620138def01249c883e75abad --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/effidehead.py @@ -0,0 +1,239 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import math +from yolov6.layers.common import * +from yolov6.assigners.anchor_generator import generate_anchors +from yolov6.utils.general import dist2bbox + + +class Detect(nn.Module): + '''Efficient Decoupled Head + With hardware-aware degisn, the decoupled head is optimized with + hybridchannels methods. + ''' + def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_layers=None, use_dfl=True, reg_max=16): # detection layer + super().__init__() + assert head_layers is not None + self.nc = num_classes # number of classes + self.no = num_classes + 5 # number of outputs per anchor + self.nl = num_layers # number of detection layers + if isinstance(anchors, (list, tuple)): + self.na = len(anchors[0]) // 2 + else: + self.na = anchors + self.anchors = anchors + self.grid = [torch.zeros(1)] * num_layers + self.prior_prob = 1e-2 + self.inplace = inplace + stride = [8, 16, 32] # strides computed during build + self.stride = torch.tensor(stride) + self.use_dfl = use_dfl + self.reg_max = reg_max + self.proj_conv = nn.Conv2d(self.reg_max + 1, 1, 1, bias=False) + self.grid_cell_offset = 0.5 + self.grid_cell_size = 5.0 + + # Init decouple head + self.stems = nn.ModuleList() + self.cls_convs = nn.ModuleList() + self.reg_convs = nn.ModuleList() + self.cls_preds = nn.ModuleList() + self.reg_preds = nn.ModuleList() + + # Efficient decoupled head layers + for i in range(num_layers): + idx = i*5 + self.stems.append(head_layers[idx]) + self.cls_convs.append(head_layers[idx+1]) + self.reg_convs.append(head_layers[idx+2]) + self.cls_preds.append(head_layers[idx+3]) + self.reg_preds.append(head_layers[idx+4]) + + def initialize_biases(self): + + for conv in self.cls_preds: + b = conv.bias.view(-1, ) + b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob)) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + w = conv.weight + w.data.fill_(0.) + conv.weight = torch.nn.Parameter(w, requires_grad=True) + + for conv in self.reg_preds: + b = conv.bias.view(-1, ) + b.data.fill_(1.0) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + w = conv.weight + w.data.fill_(0.) + conv.weight = torch.nn.Parameter(w, requires_grad=True) + + self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False) + self.proj_conv.weight = nn.Parameter(self.proj.view([1, self.reg_max + 1, 1, 1]).clone().detach(), + requires_grad=False) + + def forward(self, x): + if self.training: + cls_score_list = [] + reg_distri_list = [] + + for i in range(self.nl): + x[i] = self.stems[i](x[i]) + cls_x = x[i] + reg_x = x[i] + cls_feat = self.cls_convs[i](cls_x) + cls_output = self.cls_preds[i](cls_feat) + reg_feat = self.reg_convs[i](reg_x) + reg_output = self.reg_preds[i](reg_feat) + + cls_output = torch.sigmoid(cls_output) + cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1))) + reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1))) + + cls_score_list = torch.cat(cls_score_list, axis=1) + reg_distri_list = torch.cat(reg_distri_list, axis=1) + + return x, cls_score_list, reg_distri_list + else: + cls_score_list = [] + reg_dist_list = [] + anchor_points, stride_tensor = generate_anchors( + x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True) + + for i in range(self.nl): + b, _, h, w = x[i].shape + l = h * w + x[i] = self.stems[i](x[i]) + cls_x = x[i] + reg_x = x[i] + cls_feat = self.cls_convs[i](cls_x) + cls_output = self.cls_preds[i](cls_feat) + reg_feat = self.reg_convs[i](reg_x) + reg_output = self.reg_preds[i](reg_feat) + + if self.use_dfl: + reg_output = reg_output.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3) + reg_output = self.proj_conv(F.softmax(reg_output, dim=1)) + + cls_output = torch.sigmoid(cls_output) + cls_score_list.append(cls_output.reshape([b, self.nc, l])) + reg_dist_list.append(reg_output.reshape([b, 4, l])) + + cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1) + reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1) + + + pred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh') + pred_bboxes *= stride_tensor + return torch.cat( + [ + pred_bboxes, + torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype), + cls_score_list + ], + axis=-1) + + +def build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max=16): + head_layers = nn.Sequential( + # stem0 + Conv( + in_channels=channels_list[6], + out_channels=channels_list[6], + kernel_size=1, + stride=1 + ), + # cls_conv0 + Conv( + in_channels=channels_list[6], + out_channels=channels_list[6], + kernel_size=3, + stride=1 + ), + # reg_conv0 + Conv( + in_channels=channels_list[6], + out_channels=channels_list[6], + kernel_size=3, + stride=1 + ), + # cls_pred0 + nn.Conv2d( + in_channels=channels_list[6], + out_channels=num_classes * num_anchors, + kernel_size=1 + ), + # reg_pred0 + nn.Conv2d( + in_channels=channels_list[6], + out_channels=4 * (reg_max + num_anchors), + kernel_size=1 + ), + # stem1 + Conv( + in_channels=channels_list[8], + out_channels=channels_list[8], + kernel_size=1, + stride=1 + ), + # cls_conv1 + Conv( + in_channels=channels_list[8], + out_channels=channels_list[8], + kernel_size=3, + stride=1 + ), + # reg_conv1 + Conv( + in_channels=channels_list[8], + out_channels=channels_list[8], + kernel_size=3, + stride=1 + ), + # cls_pred1 + nn.Conv2d( + in_channels=channels_list[8], + out_channels=num_classes * num_anchors, + kernel_size=1 + ), + # reg_pred1 + nn.Conv2d( + in_channels=channels_list[8], + out_channels=4 * (reg_max + num_anchors), + kernel_size=1 + ), + # stem2 + Conv( + in_channels=channels_list[10], + out_channels=channels_list[10], + kernel_size=1, + stride=1 + ), + # cls_conv2 + Conv( + in_channels=channels_list[10], + out_channels=channels_list[10], + kernel_size=3, + stride=1 + ), + # reg_conv2 + Conv( + in_channels=channels_list[10], + out_channels=channels_list[10], + kernel_size=3, + stride=1 + ), + # cls_pred2 + nn.Conv2d( + in_channels=channels_list[10], + out_channels=num_classes * num_anchors, + kernel_size=1 + ), + # reg_pred2 + nn.Conv2d( + in_channels=channels_list[10], + out_channels=4 * (reg_max + num_anchors), + kernel_size=1 + ) + ) + return head_layers diff --git a/asone/detectors/yolov6/yolov6/models/end2end.py b/asone/detectors/yolov6/yolov6/models/end2end.py new file mode 100644 index 0000000000000000000000000000000000000000..fce3e76ca52951e83c386539206674075f365efb --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/end2end.py @@ -0,0 +1,260 @@ +import torch +import torch.nn as nn +import random + + +class ORT_NMS(torch.autograd.Function): + '''ONNX-Runtime NMS operation''' + @staticmethod + def forward(ctx, + boxes, + scores, + max_output_boxes_per_class=torch.tensor([100]), + iou_threshold=torch.tensor([0.45]), + score_threshold=torch.tensor([0.25])): + device = boxes.device + batch = scores.shape[0] + num_det = random.randint(0, 100) + batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device) + idxs = torch.arange(100, 100 + num_det).to(device) + zeros = torch.zeros((num_det,), dtype=torch.int64).to(device) + selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous() + selected_indices = selected_indices.to(torch.int64) + return selected_indices + + @staticmethod + def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold): + return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) + + +class TRT8_NMS(torch.autograd.Function): + '''TensorRT NMS operation''' + @staticmethod + def forward( + ctx, + boxes, + scores, + background_class=-1, + box_coding=1, + iou_threshold=0.45, + max_output_boxes=100, + plugin_version="1", + score_activation=0, + score_threshold=0.25, + ): + batch_size, num_boxes, num_classes = scores.shape + num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) + det_boxes = torch.randn(batch_size, max_output_boxes, 4) + det_scores = torch.randn(batch_size, max_output_boxes) + det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32) + return num_det, det_boxes, det_scores, det_classes + + @staticmethod + def symbolic(g, + boxes, + scores, + background_class=-1, + box_coding=1, + iou_threshold=0.45, + max_output_boxes=100, + plugin_version="1", + score_activation=0, + score_threshold=0.25): + out = g.op("TRT::EfficientNMS_TRT", + boxes, + scores, + background_class_i=background_class, + box_coding_i=box_coding, + iou_threshold_f=iou_threshold, + max_output_boxes_i=max_output_boxes, + plugin_version_s=plugin_version, + score_activation_i=score_activation, + score_threshold_f=score_threshold, + outputs=4) + nums, boxes, scores, classes = out + return nums, boxes, scores, classes + +class TRT7_NMS(torch.autograd.Function): + '''TensorRT NMS operation''' + @staticmethod + def forward( + ctx, + boxes, + scores, + plugin_version="1", + shareLocation=1, + backgroundLabelId=-1, + numClasses=80, + topK=1000, + keepTopK=100, + scoreThreshold=0.25, + iouThreshold=0.45, + isNormalized=0, + clipBoxes=0, + scoreBits=16, + caffeSemantics=1, + ): + batch_size, num_boxes, numClasses = scores.shape + num_det = torch.randint(0, keepTopK, (batch_size, 1), dtype=torch.int32) + det_boxes = torch.randn(batch_size, keepTopK, 4) + det_scores = torch.randn(batch_size, keepTopK) + det_classes = torch.randint(0, numClasses, (batch_size, keepTopK)).float() + return num_det, det_boxes, det_scores, det_classes + @staticmethod + def symbolic(g, + boxes, + scores, + plugin_version='1', + shareLocation=1, + backgroundLabelId=-1, + numClasses=80, + topK=1000, + keepTopK=100, + scoreThreshold=0.25, + iouThreshold=0.45, + isNormalized=0, + clipBoxes=0, + scoreBits=16, + caffeSemantics=1, + ): + out = g.op("TRT::BatchedNMSDynamic_TRT", # BatchedNMS_TRT BatchedNMSDynamic_TRT + boxes, + scores, + shareLocation_i=shareLocation, + plugin_version_s=plugin_version, + backgroundLabelId_i=backgroundLabelId, + numClasses_i=numClasses, + topK_i=topK, + keepTopK_i=keepTopK, + scoreThreshold_f=scoreThreshold, + iouThreshold_f=iouThreshold, + isNormalized_i=isNormalized, + clipBoxes_i=clipBoxes, + scoreBits_i=scoreBits, + caffeSemantics_i=caffeSemantics, + outputs=4) + nums, boxes, scores, classes = out + return nums, boxes, scores, classes + + +class ONNX_ORT(nn.Module): + '''onnx module with ONNX-Runtime NMS operation.''' + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None): + super().__init__() + self.device = device if device else torch.device("cpu") + self.max_obj = torch.tensor([max_obj]).to(device) + self.iou_threshold = torch.tensor([iou_thres]).to(device) + self.score_threshold = torch.tensor([score_thres]).to(device) + self.max_wh = max_wh + self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=self.device) + + def forward(self, x): + box = x[:, :, :4] + conf = x[:, :, 4:5] + score = x[:, :, 5:] + score *= conf + box @= self.convert_matrix + objScore, objCls = score.max(2, keepdim=True) + dis = objCls.float() * self.max_wh + nmsbox = box + dis + objScore1 = objScore.transpose(1, 2).contiguous() + selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold) + X, Y = selected_indices[:, 0], selected_indices[:, 2] + resBoxes = box[X, Y, :] + resClasses = objCls[X, Y, :].float() + resScores = objScore[X, Y, :] + X = X.unsqueeze(1).float() + return torch.cat([X, resBoxes, resClasses, resScores], 1) + +class ONNX_TRT7(nn.Module): + '''onnx module with TensorRT NMS operation.''' + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None): + super().__init__() + assert max_wh is None + self.device = device if device else torch.device('cpu') + self.shareLocation = 1 + self.backgroundLabelId = -1 + self.numClasses = 80 + self.topK = 1000 + self.keepTopK = max_obj + self.scoreThreshold = score_thres + self.iouThreshold = iou_thres + self.isNormalized = 0 + self.clipBoxes = 0 + self.scoreBits = 16 + self.caffeSemantics = 1 + self.plugin_version = '1' + self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=self.device) + def forward(self, x): + box = x[:, :, :4] + conf = x[:, :, 4:5] + score = x[:, :, 5:] + score *= conf + box @= self.convert_matrix + box = box.unsqueeze(2) + self.numClasses = int(score.shape[2]) + num_det, det_boxes, det_scores, det_classes = TRT7_NMS.apply(box, score, self.plugin_version, + self.shareLocation, + self.backgroundLabelId, + self.numClasses, + self.topK, + self.keepTopK, + self.scoreThreshold, + self.iouThreshold, + self.isNormalized, + self.clipBoxes, + self.scoreBits, + self.caffeSemantics, + ) + return num_det, det_boxes, det_scores, det_classes.int() + + +class ONNX_TRT8(nn.Module): + '''onnx module with TensorRT NMS operation.''' + def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None): + super().__init__() + assert max_wh is None + self.device = device if device else torch.device('cpu') + self.background_class = -1, + self.box_coding = 1, + self.iou_threshold = iou_thres + self.max_obj = max_obj + self.plugin_version = '1' + self.score_activation = 0 + self.score_threshold = score_thres + + def forward(self, x): + box = x[:, :, :4] + conf = x[:, :, 4:5] + score = x[:, :, 5:] + score *= conf + num_det, det_boxes, det_scores, det_classes = TRT8_NMS.apply(box, score, self.background_class, self.box_coding, + self.iou_threshold, self.max_obj, + self.plugin_version, self.score_activation, + self.score_threshold) + return num_det, det_boxes, det_scores, det_classes + + +class End2End(nn.Module): + '''export onnx or tensorrt model with NMS operation.''' + def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, trt_version=8, with_preprocess=False): + super().__init__() + device = device if device else torch.device('cpu') + self.with_preprocess = with_preprocess + self.model = model.to(device) + TRT = ONNX_TRT8 if trt_version >= 8 else ONNX_TRT7 + self.patch_model = TRT if max_wh is None else ONNX_ORT + self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device) + self.end2end.eval() + + def forward(self, x): + if self.with_preprocess: + x = x[:,[2,1,0],...] + x = x * (1/255) + x = self.model(x) + x = self.end2end(x) + return x diff --git a/asone/detectors/yolov6/yolov6/models/loss.py b/asone/detectors/yolov6/yolov6/models/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..6501571f722ef7c2eba5b5d62559ce64a0f8f9c4 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/loss.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch +import torch.nn as nn +import numpy as np +import torch.nn.functional as F +from yolov6.assigners.anchor_generator import generate_anchors +from yolov6.utils.general import dist2bbox, bbox2dist, xywh2xyxy +from yolov6.utils.figure_iou import IOUloss +from yolov6.assigners.atss_assigner import ATSSAssigner +from yolov6.assigners.tal_assigner import TaskAlignedAssigner + + +class ComputeLoss: + '''Loss computation func.''' + def __init__(self, + fpn_strides=[8, 16, 32], + grid_cell_size=5.0, + grid_cell_offset=0.5, + num_classes=80, + ori_img_size=640, + warmup_epoch=4, + use_dfl=True, + reg_max=16, + iou_type='giou', + loss_weight={ + 'class': 1.0, + 'iou': 2.5, + 'dfl': 0.5} + ): + + self.fpn_strides = fpn_strides + self.grid_cell_size = grid_cell_size + self.grid_cell_offset = grid_cell_offset + self.num_classes = num_classes + self.ori_img_size = ori_img_size + + self.warmup_epoch = warmup_epoch + self.warmup_assigner = ATSSAssigner(9, num_classes=self.num_classes) + self.formal_assigner = TaskAlignedAssigner(topk=13, num_classes=self.num_classes, alpha=1.0, beta=6.0) + + self.use_dfl = use_dfl + self.reg_max = reg_max + self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False) + self.iou_type = iou_type + self.varifocal_loss = VarifocalLoss().cuda() + self.bbox_loss = BboxLoss(self.num_classes, self.reg_max, self.use_dfl, self.iou_type).cuda() + self.loss_weight = loss_weight + + def __call__( + self, + outputs, + targets, + epoch_num + ): + + feats, pred_scores, pred_distri = outputs + anchors, anchor_points, n_anchors_list, stride_tensor = \ + generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device) + + assert pred_scores.type() == pred_distri.type() + gt_bboxes_scale = torch.full((1,4), self.ori_img_size).type_as(pred_scores) + batch_size = pred_scores.shape[0] + + # targets + targets =self.preprocess(targets, batch_size, gt_bboxes_scale) + gt_labels = targets[:, :, :1] + gt_bboxes = targets[:, :, 1:] #xyxy + mask_gt = (gt_bboxes.sum(-1, keepdim=True) > 0).float() + + # pboxes + anchor_points_s = anchor_points / stride_tensor + pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) #xyxy + + if epoch_num < self.warmup_epoch: + target_labels, target_bboxes, target_scores, fg_mask = \ + self.warmup_assigner( + anchors, + n_anchors_list, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes.detach() * stride_tensor) + else: + target_labels, target_bboxes, target_scores, fg_mask = \ + self.formal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * stride_tensor, + anchor_points, + gt_labels, + gt_bboxes, + mask_gt) + + # rescale bbox + target_bboxes /= stride_tensor + + # cls loss + target_labels = torch.where(fg_mask > 0, target_labels, torch.full_like(target_labels, self.num_classes)) + one_hot_label = F.one_hot(target_labels.long(), self.num_classes + 1)[..., :-1] + loss_cls = self.varifocal_loss(pred_scores, target_scores, one_hot_label) + + target_scores_sum = target_scores.sum() + loss_cls /= target_scores_sum + + # bbox loss + loss_iou, loss_dfl = self.bbox_loss(pred_distri, pred_bboxes, anchor_points_s, target_bboxes, + target_scores, target_scores_sum, fg_mask) + + loss = self.loss_weight['class'] * loss_cls + \ + self.loss_weight['iou'] * loss_iou + \ + self.loss_weight['dfl'] * loss_dfl + + return loss, \ + torch.cat(((self.loss_weight['iou'] * loss_iou).unsqueeze(0), + (self.loss_weight['dfl'] * loss_dfl).unsqueeze(0), + (self.loss_weight['class'] * loss_cls).unsqueeze(0))).detach() + + def preprocess(self, targets, batch_size, scale_tensor): + targets_list = np.zeros((batch_size, 1, 5)).tolist() + for i, item in enumerate(targets.cpu().numpy().tolist()): + targets_list[int(item[0])].append(item[1:]) + max_len = max((len(l) for l in targets_list)) + targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device) + batch_target = targets[:, :, 1:5].mul_(scale_tensor) + targets[..., 1:] = xywh2xyxy(batch_target) + return targets + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + batch_size, n_anchors, _ = pred_dist.shape + pred_dist = F.softmax(pred_dist.view(batch_size, n_anchors, 4, self.reg_max + 1), dim=-1).matmul(self.proj.to(pred_dist.device)) + return dist2bbox(pred_dist, anchor_points) + + +class VarifocalLoss(nn.Module): + def __init__(self): + super(VarifocalLoss, self).__init__() + + def forward(self, pred_score,gt_score, label, alpha=0.75, gamma=2.0): + + weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy(pred_score.float(), gt_score.float(), reduction='none') * weight).sum() + + return loss + + +class BboxLoss(nn.Module): + def __init__(self, num_classes, reg_max, use_dfl=False, iou_type='giou'): + super(BboxLoss, self).__init__() + self.num_classes = num_classes + self.iou_loss = IOUloss(box_format='xyxy', iou_type=iou_type, eps=1e-10) + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, + target_bboxes, target_scores, target_scores_sum, fg_mask): + + # select positive samples mask + num_pos = fg_mask.sum() + if num_pos > 0: + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) + pred_bboxes_pos = torch.masked_select(pred_bboxes, + bbox_mask).reshape([-1, 4]) + target_bboxes_pos = torch.masked_select( + target_bboxes, bbox_mask).reshape([-1, 4]) + bbox_weight = torch.masked_select( + target_scores.sum(-1), fg_mask).unsqueeze(-1) + loss_iou = self.iou_loss(pred_bboxes_pos, + target_bboxes_pos) * bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat( + [1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select( + pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1]) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select( + target_ltrb, bbox_mask).reshape([-1, 4]) + loss_dfl = self._df_loss(pred_dist_pos, + target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.).to(pred_dist.device) + + else: + loss_iou = torch.tensor(0.).to(pred_dist.device) + loss_dfl = torch.tensor(0.).to(pred_dist.device) + + return loss_iou, loss_dfl + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy( + pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction='none').view( + target_left.shape) * weight_left + loss_right = F.cross_entropy( + pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction='none').view( + target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + diff --git a/asone/detectors/yolov6/yolov6/models/loss_distill.py b/asone/detectors/yolov6/yolov6/models/loss_distill.py new file mode 100644 index 0000000000000000000000000000000000000000..7e2d0d0e95e1005653f3c904c90dc2a73343d1fe --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/loss_distill.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch +import torch.nn as nn +import numpy as np +import torch.nn.functional as F +from yolov6.assigners.anchor_generator import generate_anchors +from yolov6.utils.general import dist2bbox, bbox2dist, xywh2xyxy +from yolov6.utils.figure_iou import IOUloss +from yolov6.assigners.atss_assigner import ATSSAssigner +from yolov6.assigners.tal_assigner import TaskAlignedAssigner + + +class ComputeLoss: + '''Loss computation func.''' + def __init__(self, + fpn_strides=[8, 16, 32], + grid_cell_size=5.0, + grid_cell_offset=0.5, + num_classes=80, + ori_img_size=640, + warmup_epoch=4, + use_dfl=True, + reg_max=16, + iou_type='giou', + loss_weight={ + 'class': 1.0, + 'iou': 2.5, + 'dfl': 0.5, + 'cwd': 10.0}, + distill_feat = False, + distill_weight={ + 'class': 1.0, + 'dfl': 1.0, + } + ): + + self.fpn_strides = fpn_strides + self.grid_cell_size = grid_cell_size + self.grid_cell_offset = grid_cell_offset + self.num_classes = num_classes + self.ori_img_size = ori_img_size + + self.warmup_epoch = warmup_epoch + self.warmup_assigner = ATSSAssigner(9, num_classes=self.num_classes) + self.formal_assigner = TaskAlignedAssigner(topk=13, num_classes=self.num_classes, alpha=1.0, beta=6.0) + + self.use_dfl = use_dfl + self.reg_max = reg_max + self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False) + self.iou_type = iou_type + self.varifocal_loss = VarifocalLoss().cuda() + self.bbox_loss = BboxLoss(self.num_classes, self.reg_max, self.use_dfl, self.iou_type).cuda() + self.loss_weight = loss_weight + self.distill_feat = distill_feat + self.distill_weight = distill_weight + + def __call__( + self, + outputs, + t_outputs, + s_featmaps, + t_featmaps, + targets, + epoch_num, + max_epoch, + temperature + ): + + feats, pred_scores, pred_distri = outputs + t_feats, t_pred_scores, t_pred_distri = t_outputs + anchors, anchor_points, n_anchors_list, stride_tensor = \ + generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device) + t_anchors, t_anchor_points, t_n_anchors_list, t_stride_tensor = \ + generate_anchors(t_feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device) + + assert pred_scores.type() == pred_distri.type() + gt_bboxes_scale = torch.full((1,4), self.ori_img_size).type_as(pred_scores) + batch_size = pred_scores.shape[0] + + # targets + targets =self.preprocess(targets, batch_size, gt_bboxes_scale) + gt_labels = targets[:, :, :1] + gt_bboxes = targets[:, :, 1:] #xyxy + mask_gt = (gt_bboxes.sum(-1, keepdim=True) > 0).float() + + # pboxes + anchor_points_s = anchor_points / stride_tensor + pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) #xyxy + t_anchor_points_s = t_anchor_points / t_stride_tensor + t_pred_bboxes = self.bbox_decode(t_anchor_points_s, t_pred_distri) #xyxy + + if epoch_num < self.warmup_epoch: + target_labels, target_bboxes, target_scores, fg_mask = \ + self.warmup_assigner( + anchors, + n_anchors_list, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes.detach() * stride_tensor) + else: + target_labels, target_bboxes, target_scores, fg_mask = \ + self.formal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * stride_tensor, + anchor_points, + gt_labels, + gt_bboxes, + mask_gt) + + # rescale bbox + target_bboxes /= stride_tensor + + # cls loss + target_labels = torch.where(fg_mask > 0, target_labels, torch.full_like(target_labels, self.num_classes)) + one_hot_label = F.one_hot(target_labels, self.num_classes + 1)[..., :-1] + loss_cls = self.varifocal_loss(pred_scores, target_scores, one_hot_label) + + target_scores_sum = target_scores.sum() + loss_cls /= target_scores_sum + + # bbox loss + loss_iou, loss_dfl, d_loss_dfl = self.bbox_loss(pred_distri, pred_bboxes, t_pred_distri, t_pred_bboxes, temperature, anchor_points_s, + target_bboxes, target_scores, target_scores_sum, fg_mask) + + logits_student = pred_scores + logits_teacher = t_pred_scores + distill_num_classes = self.num_classes + d_loss_cls = self.distill_loss_cls(logits_student, logits_teacher, distill_num_classes, temperature) + if self.distill_feat: + d_loss_cw = self.distill_loss_cw(s_featmaps, t_featmaps) + else: + d_loss_cw = torch.tensor(0.).to(feats[0].device) + import math + distill_weightdecay = ((1 - math.cos(epoch_num * math.pi / max_epoch)) / 2) * (0.01- 1) + 1 + d_loss_dfl *= distill_weightdecay + d_loss_cls *= distill_weightdecay + d_loss_cw *= distill_weightdecay + loss_cls_all = loss_cls + d_loss_cls * self.distill_weight['class'] + loss_dfl_all = loss_dfl + d_loss_dfl * self.distill_weight['dfl'] + loss = self.loss_weight['class'] * loss_cls_all + \ + self.loss_weight['iou'] * loss_iou + \ + self.loss_weight['dfl'] * loss_dfl_all + \ + self.loss_weight['cwd'] * d_loss_cw + + return loss, \ + torch.cat(((self.loss_weight['iou'] * loss_iou).unsqueeze(0), + (self.loss_weight['dfl'] * loss_dfl_all).unsqueeze(0), + (self.loss_weight['class'] * loss_cls_all).unsqueeze(0), + (self.loss_weight['cwd'] * d_loss_cw).unsqueeze(0))).detach() + + def distill_loss_cls(self, logits_student, logits_teacher, num_classes, temperature=20): + logits_student = logits_student.view(-1, num_classes) + logits_teacher = logits_teacher.view(-1, num_classes) + pred_student = F.softmax(logits_student / temperature, dim=1) + pred_teacher = F.softmax(logits_teacher / temperature, dim=1) + log_pred_student = torch.log(pred_student) + + d_loss_cls = F.kl_div(log_pred_student, pred_teacher, reduction="sum") + d_loss_cls *= temperature**2 + return d_loss_cls + + def distill_loss_cw(self, s_feats, t_feats, temperature=1): + N,C,H,W = s_feats[0].shape + # print(N,C,H,W) + loss_cw = F.kl_div(F.log_softmax(s_feats[0].view(N,C,H*W)/temperature, dim=2), + F.log_softmax(t_feats[0].view(N,C,H*W).detach()/temperature, dim=2), + reduction='sum', + log_target=True) * (temperature * temperature)/ (N*C) + + N,C,H,W = s_feats[1].shape + # print(N,C,H,W) + loss_cw += F.kl_div(F.log_softmax(s_feats[1].view(N,C,H*W)/temperature, dim=2), + F.log_softmax(t_feats[1].view(N,C,H*W).detach()/temperature, dim=2), + reduction='sum', + log_target=True) * (temperature * temperature)/ (N*C) + + N,C,H,W = s_feats[2].shape + # print(N,C,H,W) + loss_cw += F.kl_div(F.log_softmax(s_feats[2].view(N,C,H*W)/temperature, dim=2), + F.log_softmax(t_feats[2].view(N,C,H*W).detach()/temperature, dim=2), + reduction='sum', + log_target=True) * (temperature * temperature)/ (N*C) + # print(loss_cw) + return loss_cw + + def preprocess(self, targets, batch_size, scale_tensor): + targets_list = np.zeros((batch_size, 1, 5)).tolist() + for i, item in enumerate(targets.cpu().numpy().tolist()): + targets_list[int(item[0])].append(item[1:]) + max_len = max((len(l) for l in targets_list)) + targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device) + batch_target = targets[:, :, 1:5].mul_(scale_tensor) + targets[..., 1:] = xywh2xyxy(batch_target) + return targets + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + batch_size, n_anchors, _ = pred_dist.shape + pred_dist = F.softmax(pred_dist.view(batch_size, n_anchors, 4, self.reg_max + 1), dim=-1).matmul(self.proj.to(pred_dist.device)) + return dist2bbox(pred_dist, anchor_points) + + +class VarifocalLoss(nn.Module): + def __init__(self): + super(VarifocalLoss, self).__init__() + + def forward(self, pred_score,gt_score, label, alpha=0.75, gamma=2.0): + + weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy(pred_score.float(), gt_score.float(), reduction='none') * weight).sum() + + return loss + + +class BboxLoss(nn.Module): + def __init__(self, num_classes, reg_max, use_dfl=False, iou_type='giou'): + super(BboxLoss, self).__init__() + self.num_classes = num_classes + self.iou_loss = IOUloss(box_format='xyxy', iou_type=iou_type, eps=1e-10) + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, t_pred_dist, t_pred_bboxes, temperature, anchor_points, + target_bboxes, target_scores, target_scores_sum, fg_mask): + # select positive samples mask + num_pos = fg_mask.sum() + if num_pos > 0: + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) + pred_bboxes_pos = torch.masked_select(pred_bboxes, + bbox_mask).reshape([-1, 4]) + t_pred_bboxes_pos = torch.masked_select(t_pred_bboxes, + bbox_mask).reshape([-1, 4]) + target_bboxes_pos = torch.masked_select( + target_bboxes, bbox_mask).reshape([-1, 4]) + bbox_weight = torch.masked_select( + target_scores.sum(-1), fg_mask).unsqueeze(-1) + loss_iou = self.iou_loss(pred_bboxes_pos, + target_bboxes_pos) * bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat( + [1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select( + pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1]) + t_pred_dist_pos = torch.masked_select( + t_pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1]) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select( + target_ltrb, bbox_mask).reshape([-1, 4]) + loss_dfl = self._df_loss(pred_dist_pos, + target_ltrb_pos) * bbox_weight + d_loss_dfl = self.distill_loss_dfl(pred_dist_pos, t_pred_dist_pos, temperature) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + d_loss_dfl = d_loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.).to(pred_dist.device) + d_loss_dfl = torch.tensor(0.).to(pred_dist.device) + + else: + + loss_iou = torch.tensor(0.).to(pred_dist.device) + loss_dfl = torch.tensor(0.).to(pred_dist.device) + d_loss_dfl = torch.tensor(0.).to(pred_dist.device) + + return loss_iou, loss_dfl, d_loss_dfl + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy( + pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction='none').view( + target_left.shape) * weight_left + loss_right = F.cross_entropy( + pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction='none').view( + target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + def distill_loss_dfl(self, logits_student, logits_teacher, temperature=20): + + logits_student = logits_student.view(-1,17) + logits_teacher = logits_teacher.view(-1,17) + pred_student = F.softmax(logits_student / temperature, dim=1) + pred_teacher = F.softmax(logits_teacher / temperature, dim=1) + log_pred_student = torch.log(pred_student) + + d_loss_dfl = F.kl_div(log_pred_student, pred_teacher, reduction="none").sum(1).mean() + d_loss_dfl *= temperature**2 + return d_loss_dfl diff --git a/asone/detectors/yolov6/yolov6/models/reppan.py b/asone/detectors/yolov6/yolov6/models/reppan.py new file mode 100644 index 0000000000000000000000000000000000000000..17c75c722b3b6131bab685b8f54a7aeb330b82a4 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/reppan.py @@ -0,0 +1,242 @@ +import torch +from torch import nn +from yolov6.layers.common import RepBlock, RepVGGBlock, BottleRep, BepC3, SimConv, Transpose + +_QUANT=False +class RepPANNeck(nn.Module): + """RepPANNeck Module + EfficientRep is the default backbone of this model. + RepPANNeck has the balance of feature fusion ability and hardware efficiency. + """ + + def __init__( + self, + channels_list=None, + num_repeats=None, + block=RepVGGBlock + ): + super().__init__() + + assert channels_list is not None + assert num_repeats is not None + + self.Rep_p4 = RepBlock( + in_channels=channels_list[3] + channels_list[5], + out_channels=channels_list[5], + n=num_repeats[5], + block=block + ) + + self.Rep_p3 = RepBlock( + in_channels=channels_list[2] + channels_list[6], + out_channels=channels_list[6], + n=num_repeats[6], + block=block + ) + + self.Rep_n3 = RepBlock( + in_channels=channels_list[6] + channels_list[7], + out_channels=channels_list[8], + n=num_repeats[7], + block=block + ) + + self.Rep_n4 = RepBlock( + in_channels=channels_list[5] + channels_list[9], + out_channels=channels_list[10], + n=num_repeats[8], + block=block + ) + + self.reduce_layer0 = SimConv( + in_channels=channels_list[4], + out_channels=channels_list[5], + kernel_size=1, + stride=1 + ) + + self.upsample0 = Transpose( + in_channels=channels_list[5], + out_channels=channels_list[5], + ) + + self.reduce_layer1 = SimConv( + in_channels=channels_list[5], + out_channels=channels_list[6], + kernel_size=1, + stride=1 + ) + + self.upsample1 = Transpose( + in_channels=channels_list[6], + out_channels=channels_list[6] + ) + + self.downsample2 = SimConv( + in_channels=channels_list[6], + out_channels=channels_list[7], + kernel_size=3, + stride=2 + ) + + self.downsample1 = SimConv( + in_channels=channels_list[8], + out_channels=channels_list[9], + kernel_size=3, + stride=2 + ) + + def upsample_enable_quant(self): + print("Insert fakequant after upsample") + # Insert fakequant after upsample op to build TensorRT engine + from pytorch_quantization import nn as quant_nn + from pytorch_quantization.tensor_quant import QuantDescriptor + conv2d_input_default_desc = QuantDescriptor(num_bits=8, calib_method='histogram') + self.upsample_feat0_quant = quant_nn.TensorQuantizer(conv2d_input_default_desc) + self.upsample_feat1_quant = quant_nn.TensorQuantizer(conv2d_input_default_desc) + global _QUANT + _QUANT = True + + def forward(self, input): + + (x2, x1, x0) = input + + fpn_out0 = self.reduce_layer0(x0) + upsample_feat0 = self.upsample0(fpn_out0) + if _QUANT: + upsample_feat0 = self.upsample_feat0_quant(upsample_feat0) + f_concat_layer0 = torch.cat([upsample_feat0, x1], 1) + f_out0 = self.Rep_p4(f_concat_layer0) + + fpn_out1 = self.reduce_layer1(f_out0) + upsample_feat1 = self.upsample1(fpn_out1) + if _QUANT: + upsample_feat1 = self.upsample_feat1_quant(upsample_feat1) + f_concat_layer1 = torch.cat([upsample_feat1, x2], 1) + pan_out2 = self.Rep_p3(f_concat_layer1) + + down_feat1 = self.downsample2(pan_out2) + p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1) + pan_out1 = self.Rep_n3(p_concat_layer1) + + down_feat0 = self.downsample1(pan_out1) + p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1) + pan_out0 = self.Rep_n4(p_concat_layer2) + + outputs = [pan_out2, pan_out1, pan_out0] + + return outputs + + +class CSPRepPANNeck(nn.Module): + """ + CSPRepPANNeck module. + """ + + def __init__( + self, + channels_list=None, + num_repeats=None, + block=BottleRep, + csp_e=float(1)/2 + ): + super().__init__() + + assert channels_list is not None + assert num_repeats is not None + + self.Rep_p4 = BepC3( + in_channels=channels_list[3] + channels_list[5], # 512 + 256 + out_channels=channels_list[5], # 256 + n=num_repeats[5], + e=csp_e, + block=block + ) + + self.Rep_p3 = BepC3( + in_channels=channels_list[2] + channels_list[6], # 256 + 128 + out_channels=channels_list[6], # 128 + n=num_repeats[6], + e=csp_e, + block=block + ) + + self.Rep_n3 = BepC3( + in_channels=channels_list[6] + channels_list[7], # 128 + 128 + out_channels=channels_list[8], # 256 + n=num_repeats[7], + e=csp_e, + block=block + ) + + self.Rep_n4 = BepC3( + in_channels=channels_list[5] + channels_list[9], # 256 + 256 + out_channels=channels_list[10], # 512 + n=num_repeats[8], + e=csp_e, + block=block + ) + + self.reduce_layer0 = SimConv( + in_channels=channels_list[4], # 1024 + out_channels=channels_list[5], # 256 + kernel_size=1, + stride=1 + ) + + self.upsample0 = Transpose( + in_channels=channels_list[5], # 256 + out_channels=channels_list[5], # 256 + ) + + self.reduce_layer1 = SimConv( + in_channels=channels_list[5], # 256 + out_channels=channels_list[6], # 128 + kernel_size=1, + stride=1 + ) + + self.upsample1 = Transpose( + in_channels=channels_list[6], # 128 + out_channels=channels_list[6] # 128 + ) + + self.downsample2 = SimConv( + in_channels=channels_list[6], # 128 + out_channels=channels_list[7], # 128 + kernel_size=3, + stride=2 + ) + + self.downsample1 = SimConv( + in_channels=channels_list[8], # 256 + out_channels=channels_list[9], # 256 + kernel_size=3, + stride=2 + ) + + def forward(self, input): + + (x2, x1, x0) = input + + fpn_out0 = self.reduce_layer0(x0) + upsample_feat0 = self.upsample0(fpn_out0) + f_concat_layer0 = torch.cat([upsample_feat0, x1], 1) + f_out0 = self.Rep_p4(f_concat_layer0) + + fpn_out1 = self.reduce_layer1(f_out0) + upsample_feat1 = self.upsample1(fpn_out1) + f_concat_layer1 = torch.cat([upsample_feat1, x2], 1) + pan_out2 = self.Rep_p3(f_concat_layer1) + + down_feat1 = self.downsample2(pan_out2) + p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1) + pan_out1 = self.Rep_n3(p_concat_layer1) + + down_feat0 = self.downsample1(pan_out1) + p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1) + pan_out0 = self.Rep_n4(p_concat_layer2) + + outputs = [pan_out2, pan_out1, pan_out0] + + return outputs diff --git a/asone/detectors/yolov6/yolov6/models/yolo.py b/asone/detectors/yolov6/yolov6/models/yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..85f75f93112f42085fbe21de245b759a3435581a --- /dev/null +++ b/asone/detectors/yolov6/yolov6/models/yolo.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from yolov6.layers.common import * +from yolov6.utils.torch_utils import initialize_weights +from yolov6.models.efficientrep import * +from yolov6.models.reppan import * +from yolov6.models.effidehead import Detect, build_effidehead_layer + + +class Model(nn.Module): + '''YOLOv6 model with backbone, neck and head. + The default parts are EfficientRep Backbone, Rep-PAN and + Efficient Decoupled Head. + ''' + def __init__(self, config, channels=3, num_classes=None, anchors=None): # model, input channels, number of classes + super().__init__() + # Build network + num_layers = config.model.head.num_layers + #self.mode = config.training_mode + self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers) + + # Init Detect head + begin_indices = config.model.head.begin_indices + out_indices_head = config.model.head.out_indices + self.stride = self.detect.stride + self.detect.i = begin_indices + self.detect.f = out_indices_head + self.detect.initialize_biases() + + # Init weights + initialize_weights(self) + + def forward(self, x): + export_mode = torch.onnx.is_in_onnx_export() + x = self.backbone(x) + x = self.neck(x) + if export_mode == False: + featmaps = [] + featmaps.extend(x) + x = self.detect(x) + return x if export_mode is True else [x, featmaps] + + def _apply(self, fn): + self = super()._apply(fn) + self.detect.stride = fn(self.detect.stride) + self.detect.grid = list(map(fn, self.detect.grid)) + return self + + +def make_divisible(x, divisor): + # Upward revision the value x to make it evenly divisible by the divisor. + return math.ceil(x / divisor) * divisor + + +def build_network(config, channels, num_classes, anchors, num_layers): + depth_mul = config.model.depth_multiple + width_mul = config.model.width_multiple + num_repeat_backbone = config.model.backbone.num_repeats + channels_list_backbone = config.model.backbone.out_channels + num_repeat_neck = config.model.neck.num_repeats + channels_list_neck = config.model.neck.out_channels + num_anchors = config.model.head.anchors + use_dfl = config.model.head.use_dfl + reg_max = config.model.head.reg_max + num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)] + channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)] + + block = get_block(config.training_mode) + BACKBONE = eval(config.model.backbone.type) + NECK = eval(config.model.neck.type) + + if 'CSP' in config.model.backbone.type: + backbone = BACKBONE( + in_channels=channels, + channels_list=channels_list, + num_repeats=num_repeat, + block=block, + csp_e=config.model.backbone.csp_e + ) + + neck = NECK( + channels_list=channels_list, + num_repeats=num_repeat, + block=block, + csp_e=config.model.neck.csp_e + ) + else: + backbone = BACKBONE( + in_channels=channels, + channels_list=channels_list, + num_repeats=num_repeat, + block=block + ) + + neck = NECK( + channels_list=channels_list, + num_repeats=num_repeat, + block=block + ) + + head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max) + + head = Detect(num_classes, anchors, num_layers, head_layers=head_layers, use_dfl=use_dfl) + + return backbone, neck, head + + +def build_model(cfg, num_classes, device): + model = Model(cfg, channels=3, num_classes=num_classes, anchors=cfg.model.head.anchors).to(device) + return model diff --git a/asone/detectors/yolov6/yolov6/utils/__init__.py b/asone/detectors/yolov6/yolov6/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov6/yolov6/utils/events.py b/asone/detectors/yolov6/yolov6/utils/events.py new file mode 100644 index 0000000000000000000000000000000000000000..39fcb18a6c375565a83967cfe76e8e0731c37a62 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/utils/events.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import yaml +import logging +import shutil + + +def set_logging(name=None): + rank = int(os.getenv('RANK', -1)) + logging.basicConfig(format="%(message)s", level=logging.INFO if (rank in (-1, 0)) else logging.WARNING) + return logging.getLogger(name) + + +LOGGER = set_logging(__name__) +NCOLS = shutil.get_terminal_size().columns + + +def load_yaml(file_path): + """Load data from yaml file.""" + if isinstance(file_path, str): + with open(file_path, errors='ignore') as f: + data_dict = yaml.safe_load(f) + return data_dict + + +def save_yaml(data_dict, save_path): + """Save data to yaml file""" + with open(save_path, 'w') as f: + yaml.safe_dump(data_dict, f, sort_keys=False) + + +def write_tblog(tblogger, epoch, results, losses): + """Display mAP and loss information to log.""" + tblogger.add_scalar("val/mAP@0.5", results[0], epoch + 1) + tblogger.add_scalar("val/mAP@0.50:0.95", results[1], epoch + 1) + + tblogger.add_scalar("train/iou_loss", losses[0], epoch + 1) + tblogger.add_scalar("train/l1_loss", losses[1], epoch + 1) + tblogger.add_scalar("train/obj_loss", losses[2], epoch + 1) + tblogger.add_scalar("train/cls_loss", losses[3], epoch + 1) + + tblogger.add_scalar("x/lr0", results[2], epoch + 1) + tblogger.add_scalar("x/lr1", results[3], epoch + 1) + tblogger.add_scalar("x/lr2", results[4], epoch + 1) + +def write_tbimg(tblogger, imgs, step, type='train'): + """Display train_batch and validation predictions to tensorboard.""" + if type == 'train': + tblogger.add_image(f'train_batch', imgs, step + 1, dataformats='HWC') + elif type == 'val': + for idx, img in enumerate(imgs): + tblogger.add_image(f'val_img_{idx + 1}', img, step + 1, dataformats='HWC') + else: + LOGGER.warning('WARNING: Unknown image type to visualize.\n') diff --git a/asone/detectors/yolov6/yolov6/utils/general.py b/asone/detectors/yolov6/yolov6/utils/general.py new file mode 100644 index 0000000000000000000000000000000000000000..9824126a041009c58e185b1c4f56b6a3520758c9 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/utils/general.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +import os +import glob +import torch +from pathlib import Path + + +def increment_name(path): + '''increase save directory's id''' + path = Path(path) + sep = '' + if path.exists(): + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') + for n in range(1, 9999): + p = f'{path}{sep}{n}{suffix}' + if not os.path.exists(p): + break + path = Path(p) + return path + + +def find_latest_checkpoint(search_dir='.'): + '''Find the most recent saved checkpoint in search_dir.''' + checkpoint_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(checkpoint_list, key=os.path.getctime) if checkpoint_list else '' + + +def dist2bbox(distance, anchor_points, box_format='xyxy'): + '''Transform distance(ltrb) to box(xywh or xyxy).''' + lt, rb = torch.split(distance, 2, -1) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if box_format == 'xyxy': + bbox = torch.cat([x1y1, x2y2], -1) + elif box_format == 'xywh': + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + bbox = torch.cat([c_xy, wh], -1) + return bbox + + +def bbox2dist(anchor_points, bbox, reg_max): + '''Transform bbox(xyxy) to dist(ltrb).''' + x1y1, x2y2 = torch.split(bbox, 2, -1) + lt = anchor_points - x1y1 + rb = x2y2 - anchor_points + dist = torch.cat([lt, rb], -1).clip(0, reg_max - 0.01) + return dist + + +def xywh2xyxy(bboxes): + '''Transform bbox(xywh) to box(xyxy).''' + bboxes[..., 0] = bboxes[..., 0] - bboxes[..., 2] * 0.5 + bboxes[..., 1] = bboxes[..., 1] - bboxes[..., 3] * 0.5 + bboxes[..., 2] = bboxes[..., 0] + bboxes[..., 2] + bboxes[..., 3] = bboxes[..., 1] + bboxes[..., 3] + return bboxes + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) \ No newline at end of file diff --git a/asone/detectors/yolov6/yolov6/utils/torch_utils.py b/asone/detectors/yolov6/yolov6/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ca7da642f0c8f2d32c24f91789de478f134ad6 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/utils/torch_utils.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import time +from contextlib import contextmanager +from copy import deepcopy +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from yolov6.utils.events import LOGGER + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + """ + Decorator to make all processes in distributed training wait for each local_master to do something. + """ + if local_rank not in [-1, 0]: + dist.barrier(device_ids=[local_rank]) + yield + if local_rank == 0: + dist.barrier(device_ids=[0]) + + +def time_sync(): + # Waits for all kernels in all streams on a CUDA device to complete if cuda is available. + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = ( + nn.Conv2d( + conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True, + ) + .requires_grad_(False) + .to(conv.weight.device) + ) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # prepare spatial bias + b_conv = ( + torch.zeros(conv.weight.size(0), device=conv.weight.device) + if conv.bias is None + else conv.bias + ) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( + torch.sqrt(bn.running_var + bn.eps) + ) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def fuse_model(model): + from yolov6.layers.common import Conv + + for m in model.modules(): + if type(m) is Conv and hasattr(m, "bn"): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, "bn") # remove batchnorm + m.forward = m.forward_fuse # update forward + return model + + +def get_model_info(model, img_size=640): + """Get model Params and GFlops. + Code base on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/model_utils.py + """ + from thop import profile + stride = 32 + img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) + + flops, params = profile(deepcopy(model), inputs=(img,), verbose=False) + params /= 1e6 + flops /= 1e9 + img_size = img_size if isinstance(img_size, list) else [img_size, img_size] + flops *= img_size[0] * img_size[1] / stride / stride * 2 # Gflops + info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops) + return info diff --git a/asone/detectors/yolov6/yolov6/utils/yolov6_utils.py b/asone/detectors/yolov6/yolov6/utils/yolov6_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..74f28da0131053443053272ae62710046dfb96e1 --- /dev/null +++ b/asone/detectors/yolov6/yolov6/utils/yolov6_utils.py @@ -0,0 +1,258 @@ +import time +import sys +import os +import numpy as np +import cv2 +import torch.nn as nn +import torch +import torchvision + +from asone.detectors.yolov6.yolov6.layers.common import Conv + +def nms(boxes, scores, iou_threshold): + # Sort by score + sorted_indices = np.argsort(scores)[::-1] + + keep_boxes = [] + while sorted_indices.size > 0: + # Pick the last box + box_id = sorted_indices[0] + keep_boxes.append(box_id) + + # Compute IoU of the picked box with the rest + ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) + + # Remove boxes with IoU over the threshold + keep_indices = np.where(ious < iou_threshold)[0] + + # print(keep_indices.shape, sorted_indices.shape) + sorted_indices = sorted_indices[keep_indices + 1] + + return keep_boxes + + +def process_output(output, img_height, img_width, + input_width, input_height, + conf_thres, iou_thres): + predictions = np.squeeze(output) + + # Filter out object confidence scores below threshold + obj_conf = predictions[:, 4] + predictions = predictions[obj_conf > conf_thres] + obj_conf = obj_conf[obj_conf > conf_thres] + + # Multiply class confidence with bounding box confidence + predictions[:, 5:] *= obj_conf[:, np.newaxis] + + # Get the scores + scores = np.max(predictions[:, 5:], axis=1) + + # Filter out the objects with a low score + predictions = predictions[obj_conf > conf_thres] + scores = scores[scores > conf_thres] + + # Get the class with the highest confidence + class_ids = np.argmax(predictions[:, 5:], axis=1) + + # Get bounding boxes for each object + boxes = process_and_scale_boxes(predictions, img_height, img_width, + input_width, input_height) + + # Apply non-maxima suppression to suppress weak, overlapping bounding boxes + indices = nms(boxes, scores, iou_thres) + + return boxes[indices], scores[indices], class_ids[indices] + +def compute_iou(box, boxes): + # Compute xmin, ymin, xmax, ymax for both boxes + xmin = np.maximum(box[0], boxes[:, 0]) + ymin = np.maximum(box[1], boxes[:, 1]) + xmax = np.minimum(box[2], boxes[:, 2]) + ymax = np.minimum(box[3], boxes[:, 3]) + + # Compute intersection area + intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) + + # Compute union area + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + union_area = box_area + boxes_area - intersection_area + + # Compute IoU + iou = intersection_area / union_area + + return iou + + +def xywh2xyxy(x): + # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 + y[..., 1] = x[..., 1] - x[..., 3] / 2 + y[..., 2] = x[..., 0] + x[..., 2] / 2 + y[..., 3] = x[..., 1] + x[..., 3] / 2 + return y + +def prepare_input(image, input_width, input_height): + + input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + # Resize input image + input_img = cv2.resize(input_img, (input_width, input_height)) + + # Scale input pixel values to 0 to 1 + input_img = input_img / 255.0 + input_img = input_img.transpose(2, 0, 1) + input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32) + + return input_tensor + +def process_and_scale_boxes(predictions, img_height, img_width, + input_width, input_height): + + predictions = np.delete(predictions, 0, axis=1) + # Extract boxes from predictions + boxes = predictions[:, :4] + # Scale boxes to original image dimensions + boxes /= np.array([input_width, input_height, input_width, input_height]) + boxes *= np.array([img_width, img_height, img_width, img_height]) + # Convert boxes to xyxy format + # boxes = xywh2xyxy(boxes) + + boxes = boxes[:,:4] + class_ids = predictions[:,4:5] + scores = predictions[:,5:] + return boxes, scores, class_ids + +def load_pytorch(weights, map_location=None, inplace=True, fuse=False): + """Load model from checkpoint file.""" + ckpt = torch.load(weights, map_location=map_location) # load + model = ckpt['ema' if ckpt.get('ema') else 'model'].float() + if fuse: + model = fuse_model(model).eval() + else: + model = model.eval() + return model + +def fuse_model(model): + for m in model.modules(): + if type(m) is Conv and hasattr(m, "bn"): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, "bn") # remove batchnorm + m.forward = m.forward_fuse # update forward + return model + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = ( + nn.Conv2d( + conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True, + ) + .requires_grad_(False) + .to(conv.weight.device) + ) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # prepare spatial bias + b_conv = ( + torch.zeros(conv.weight.size(0), device=conv.weight.device) + if conv.bias is None + else conv.bias + ) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( + torch.sqrt(bn.running_var + bn.eps) + ) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300): + """Runs Non-Maximum Suppression (NMS) on inference results. + This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775 + Args: + prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes. + conf_thres: (float) confidence threshold. + iou_thres: (float) iou threshold. + classes: (None or list[int]), if a list is provided, nms only keep the classes you provide. + agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively. + multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label. + max_det:(int), max number of output bboxes. + + Returns: + list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. + """ + + num_classes = prediction.shape[2] - 5 # number of classes + pred_candidates = prediction[..., 4] > conf_thres # candidates + + # Check the parameters. + assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.' + assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.' + + # Function settings. + max_wh = 4096 # maximum box width and height + max_nms = 30000 # maximum number of boxes put into torchvision.ops.nms() + time_limit = 10.0 # quit the function when nms cost time exceed the limit time. + multi_label &= num_classes > 1 # multiple labels per box + + tik = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + for img_idx, x in enumerate(prediction): # image index, image inference + x = x[pred_candidates[img_idx]] # confidence + + # If no box remains, skip the next process. + if not x.shape[0]: + continue + + # confidence multiply the objectness + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix's shape is (n,6), each row represents (xyxy, conf, cls) + if multi_label: + box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1) + else: # Only keep the class with highest scores. + conf, class_idx = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class, only keep boxes whose category is in classes. + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Check shape + num_box = x.shape[0] # number of boxes + if not num_box: # no boxes kept. + continue + elif num_box > max_nms: # excess max boxes' number. + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + class_offset = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + class_offset, x[:, 4] # boxes (offset by class), scores + keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if keep_box_idx.shape[0] > max_det: # limit detections + keep_box_idx = keep_box_idx[:max_det] + + output[img_idx] = x[keep_box_idx] + if (time.time() - tik) > time_limit: + print(f'WARNING: NMS cost time exceed the limited {time_limit}s.') + break # time limit exceeded + + return output + + + + diff --git a/asone/detectors/yolov6/yolov6_detector.py b/asone/detectors/yolov6/yolov6_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..d45aa13821562a614293888aa882fedfb30ae21f --- /dev/null +++ b/asone/detectors/yolov6/yolov6_detector.py @@ -0,0 +1,139 @@ +import os +import sys +from asone.utils import get_names +import numpy as np +import warnings +import torch +import onnxruntime + +from asone import utils +from asone.detectors.yolov6.yolov6.utils.yolov6_utils import (prepare_input, load_pytorch, + non_max_suppression, process_and_scale_boxes) +sys.path.append(os.path.dirname(__file__)) + +class YOLOv6Detector: + def __init__(self, + weights=None, + use_onnx=False, + use_cuda=True): + + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + if not os.path.exists(weights): + utils.download_weights(weights) + #If incase weighst is a list of paths then select path at first index + weights = str(weights[0] if isinstance(weights, list) else weights) + + # Load Model + self.model = self.load_model(use_cuda, weights) + + if use_onnx: + # Get Some ONNX model details + self.input_shape, self.input_height, self.input_width = self.ONNXModel_detail(self.model) + self.input_names, self.output_names = self.ONNXModel_names(self.model) + + + def load_model(self, use_cuda, weights, fp16=False): + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = fp16 & ((not self.use_onnx or self.use_onnx) and self.device != 'cpu') + # Load onnx + if self.use_onnx: + if use_cuda: + providers = ['CUDAExecutionProvider','CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + model = onnxruntime.InferenceSession(weights, providers=providers) + #Load Pytorch + else: + model = load_pytorch(weights, map_location=self.device) + model.half() if self.fp16 else model.float() + return model + + def ONNXModel_detail(self, model): + # Get Model Input + model_inputs = model.get_inputs() + # Input shape + input_shape = model_inputs[0].shape + input_height = input_shape[2] + input_width = input_shape[3] + + return input_shape, input_height, input_width + + def ONNXModel_names(self, model): + # Get Model Input + model_inputs = model.get_inputs() + input_names = [model_inputs[i].name for i in range(len(model_inputs))] + # Get Model Output + model_outputs = model.get_outputs() + output_names = [model_outputs[i].name for i in range(len(model_outputs))] + + return input_names, output_names + + def detect(self, image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False) -> list: + + # Prepare Input + img_height, img_width = image.shape[:2] + processed_image = prepare_input(image, input_shape[0], input_shape[1]) + + # Perform Inference on the Image + if self.use_onnx: + # Run ONNX model + prediction = self.model.run(self.output_names, + {self.input_names[0]: processed_image})[0] + # Run Pytorch model + else: + processed_image = torch.from_numpy(processed_image).to(self.device) + # Change image floating point precision if fp16 set to true + processed_image = processed_image.half() if self.fp16 else processed_image.float() + prediction = self.model(processed_image)[0] + + # Post Procesing, non-max-suppression and rescaling + if self.use_onnx: + # Process ONNX Output + + boxes, scores, class_ids = process_and_scale_boxes(prediction, img_height, img_width, + input_shape[1], input_shape[0]) + detection = [] + for box in range(len(boxes)): + pred = np.append(boxes[box], scores[box]) + pred = np.append(pred, class_ids[box]) + detection.append(pred) + detection = np.array(detection) + else: + detection = non_max_suppression(prediction, + conf_thres, + iou_thres, + agnostic=agnostic_nms, + max_det=max_det)[0] + + detection = detection.detach().cpu().numpy() + detection[:, :4] /= np.array([input_shape[1], input_shape[0], input_shape[1], input_shape[0]]) + detection[:, :4] *= np.array([img_width, img_height, img_width, img_height]) + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append(class_names.index(_class.lower())) + else: + warnings.warn(f"class {_class} not found in model classes list.") + + detection = detection[np.in1d(detection[:,5].astype(int), filter_class_idx)] + + image_info = { + 'width': image.shape[1], + 'height': image.shape[0], + } + + return detection, image_info \ No newline at end of file diff --git a/asone/detectors/yolov7/__init__.py b/asone/detectors/yolov7/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3a607d192f227a35393c79c68d98b6f974bcc34a --- /dev/null +++ b/asone/detectors/yolov7/__init__.py @@ -0,0 +1,2 @@ +from .yolov7_detector import YOLOv7Detector +__all__ = ['YOLOv7Detector'] \ No newline at end of file diff --git a/asone/detectors/yolov7/yolov7/__init__.py b/asone/detectors/yolov7/yolov7/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov7/yolov7/models/__init__.py b/asone/detectors/yolov7/yolov7/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4955e7efc290cc4d8188ce1c84093ad5c5feabe5 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/models/__init__.py @@ -0,0 +1,3 @@ +import os +import sys +sys.path.append(os.path.dirname(__file__)) diff --git a/asone/detectors/yolov7/yolov7/models/common.py b/asone/detectors/yolov7/yolov7/models/common.py new file mode 100644 index 0000000000000000000000000000000000000000..e48cb67a3889f500406733aac63f25c3ff12bff7 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/models/common.py @@ -0,0 +1,2015 @@ +import math +from copy import copy +from pathlib import Path + +import numpy as np +import pandas as pd +import requests +import torch +import torch.nn as nn +import torch.nn.functional as F +from PIL import Image +from torch.cuda import amp + +from asone.detectors.yolov7.yolov7.utils.torch_utils import time_synchronized + + +##### basic #### + +def autopad(k, p=None): # kernel, padding + # Pad to 'same' + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class MP(nn.Module): + def __init__(self, k=2): + super(MP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return self.m(x) + + +class SP(nn.Module): + def __init__(self, k=3, s=1): + super(SP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=s, padding=k // 2) + + def forward(self, x): + return self.m(x) + + +class ReOrg(nn.Module): + def __init__(self): + super(ReOrg, self).__init__() + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +class Concat(nn.Module): + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class Chuncat(nn.Module): + def __init__(self, dimension=1): + super(Chuncat, self).__init__() + self.d = dimension + + def forward(self, x): + x1 = [] + x2 = [] + for xi in x: + xi1, xi2 = xi.chunk(2, self.d) + x1.append(xi1) + x2.append(xi2) + return torch.cat(x1+x2, self.d) + + +class Shortcut(nn.Module): + def __init__(self, dimension=0): + super(Shortcut, self).__init__() + self.d = dimension + + def forward(self, x): + return x[0]+x[1] + + +class Foldcut(nn.Module): + def __init__(self, dimension=0): + super(Foldcut, self).__init__() + self.d = dimension + + def forward(self, x): + x1, x2 = x.chunk(2, self.d) + return x1+x2 + + +class Conv(nn.Module): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Conv, self).__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class RobustConv(nn.Module): + # Robust convolution (use high kernel size 7-11 for: downsampling and other layers). Train for 300 - 450 epochs. + def __init__(self, c1, c2, k=7, s=1, p=None, g=1, act=True, layer_scale_init_value=1e-6): # ch_in, ch_out, kernel, stride, padding, groups + super(RobustConv, self).__init__() + self.conv_dw = Conv(c1, c1, k=k, s=s, p=p, g=c1, act=act) + self.conv1x1 = nn.Conv2d(c1, c2, 1, 1, 0, groups=1, bias=True) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones(c2)) if layer_scale_init_value > 0 else None + + def forward(self, x): + x = x.to(memory_format=torch.channels_last) + x = self.conv1x1(self.conv_dw(x)) + if self.gamma is not None: + x = x.mul(self.gamma.reshape(1, -1, 1, 1)) + return x + + +class RobustConv2(nn.Module): + # Robust convolution 2 (use [32, 5, 2] or [32, 7, 4] or [32, 11, 8] for one of the paths in CSP). + def __init__(self, c1, c2, k=7, s=4, p=None, g=1, act=True, layer_scale_init_value=1e-6): # ch_in, ch_out, kernel, stride, padding, groups + super(RobustConv2, self).__init__() + self.conv_strided = Conv(c1, c1, k=k, s=s, p=p, g=c1, act=act) + self.conv_deconv = nn.ConvTranspose2d(in_channels=c1, out_channels=c2, kernel_size=s, stride=s, + padding=0, bias=True, dilation=1, groups=1 + ) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones(c2)) if layer_scale_init_value > 0 else None + + def forward(self, x): + x = self.conv_deconv(self.conv_strided(x)) + if self.gamma is not None: + x = x.mul(self.gamma.reshape(1, -1, 1, 1)) + return x + + +def DWConv(c1, c2, k=1, s=1, act=True): + # Depthwise convolution + return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) + + +class GhostConv(nn.Module): + # Ghost Convolution https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups + super(GhostConv, self).__init__() + c_ = c2 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k, s, None, g, act) + self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) + + def forward(self, x): + y = self.cv1(x) + return torch.cat([y, self.cv2(y)], 1) + + +class Stem(nn.Module): + # Stem + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Stem, self).__init__() + c_ = int(c2/2) # hidden channels + self.cv1 = Conv(c1, c_, 3, 2) + self.cv2 = Conv(c_, c_, 1, 1) + self.cv3 = Conv(c_, c_, 3, 2) + self.pool = torch.nn.MaxPool2d(2, stride=2) + self.cv4 = Conv(2 * c_, c2, 1, 1) + + def forward(self, x): + x = self.cv1(x) + return self.cv4(torch.cat((self.cv3(self.cv2(x)), self.pool(x)), dim=1)) + + +class DownC(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, n=1, k=2): + super(DownC, self).__init__() + c_ = int(c1) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2//2, 3, k) + self.cv3 = Conv(c1, c2//2, 1, 1) + self.mp = nn.MaxPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return torch.cat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), dim=1) + + +class SPP(nn.Module): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=(5, 9, 13)): + super(SPP, self).__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class Bottleneck(nn.Module): + # Darknet bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Bottleneck, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class Res(nn.Module): + # ResNet bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Res, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 3, 1, g=g) + self.cv3 = Conv(c_, c2, 1, 1) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x))) + + +class ResX(Res): + # ResNet bottleneck + def __init__(self, c1, c2, shortcut=True, g=32, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super().__init__(c1, c2, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + + +class Ghost(nn.Module): + # Ghost Bottleneck https://github.com/huawei-noah/ghostnet + def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride + super(Ghost, self).__init__() + c_ = c2 // 2 + self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), + Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() + + def forward(self, x): + return self.conv(x) + self.shortcut(x) + +##### end of basic ##### + + +##### cspnet ##### + +class SPPCSPC(nn.Module): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): + super(SPPCSPC, self).__init__() + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(c_, c_, 3, 1) + self.cv4 = Conv(c_, c_, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.cv5 = Conv(4 * c_, c_, 1, 1) + self.cv6 = Conv(c_, c_, 3, 1) + self.cv7 = Conv(2 * c_, c2, 1, 1) + + def forward(self, x): + x1 = self.cv4(self.cv3(self.cv1(x))) + y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) + y2 = self.cv2(x) + return self.cv7(torch.cat((y1, y2), dim=1)) + +class GhostSPPCSPC(SPPCSPC): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): + super().__init__(c1, c2, n, shortcut, g, e, k) + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = GhostConv(c1, c_, 1, 1) + self.cv2 = GhostConv(c1, c_, 1, 1) + self.cv3 = GhostConv(c_, c_, 3, 1) + self.cv4 = GhostConv(c_, c_, 1, 1) + self.cv5 = GhostConv(4 * c_, c_, 1, 1) + self.cv6 = GhostConv(c_, c_, 3, 1) + self.cv7 = GhostConv(2 * c_, c2, 1, 1) + + +class GhostStem(Stem): + # Stem + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__(c1, c2, k, s, p, g, act) + c_ = int(c2/2) # hidden channels + self.cv1 = GhostConv(c1, c_, 3, 2) + self.cv2 = GhostConv(c_, c_, 1, 1) + self.cv3 = GhostConv(c_, c_, 3, 2) + self.cv4 = GhostConv(2 * c_, c2, 1, 1) + + +class BottleneckCSPA(nn.Module): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPA, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.m(self.cv1(x)) + y2 = self.cv2(x) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class BottleneckCSPB(nn.Module): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPB, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x1 = self.cv1(x) + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class BottleneckCSPC(nn.Module): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(BottleneckCSPC, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(c_, c_, 1, 1) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(torch.cat((y1, y2), dim=1)) + + +class ResCSPA(BottleneckCSPA): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class ResCSPB(BottleneckCSPB): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class ResCSPC(BottleneckCSPC): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class ResXCSPA(ResCSPA): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class ResXCSPB(ResCSPB): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class ResXCSPC(ResCSPC): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Res(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class GhostCSPA(BottleneckCSPA): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Ghost(c_, c_) for _ in range(n)]) + + +class GhostCSPB(BottleneckCSPB): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[Ghost(c_, c_) for _ in range(n)]) + + +class GhostCSPC(BottleneckCSPC): + # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[Ghost(c_, c_) for _ in range(n)]) + +##### end of cspnet ##### + + +##### yolor ##### + +class ImplicitA(nn.Module): + def __init__(self, channel, mean=0., std=.02): + super(ImplicitA, self).__init__() + self.channel = channel + self.mean = mean + self.std = std + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=self.mean, std=self.std) + + def forward(self, x): + return self.implicit + x + + +class ImplicitM(nn.Module): + def __init__(self, channel, mean=0., std=.02): + super(ImplicitM, self).__init__() + self.channel = channel + self.mean = mean + self.std = std + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=self.mean, std=self.std) + + def forward(self, x): + return self.implicit * x + +##### end of yolor ##### + + +##### repvgg ##### + +class RepConv(nn.Module): + # Represented convolution + # https://arxiv.org/abs/2101.03697 + + def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False): + super(RepConv, self).__init__() + + self.deploy = deploy + self.groups = g + self.in_channels = c1 + self.out_channels = c2 + + assert k == 3 + assert autopad(k, p) == 1 + + padding_11 = autopad(k, p) - k // 2 + + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + if deploy: + self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True) + + else: + self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None) + + self.rbr_dense = nn.Sequential( + nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False), + nn.BatchNorm2d(num_features=c2), + ) + + self.rbr_1x1 = nn.Sequential( + nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False), + nn.BatchNorm2d(num_features=c2), + ) + + def forward(self, inputs): + if hasattr(self, "rbr_reparam"): + return self.act(self.rbr_reparam(inputs)) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + + return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) + + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return ( + kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, + bias3x3 + bias1x1 + biasid, + ) + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + kernel = branch[0].weight + running_mean = branch[1].running_mean + running_var = branch[1].running_var + gamma = branch[1].weight + beta = branch[1].bias + eps = branch[1].eps + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, "id_tensor"): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros( + (self.in_channels, input_dim, 3, 3), dtype=np.float32 + ) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def repvgg_convert(self): + kernel, bias = self.get_equivalent_kernel_bias() + return ( + kernel.detach().cpu().numpy(), + bias.detach().cpu().numpy(), + ) + + def fuse_conv_bn(self, conv, bn): + + std = (bn.running_var + bn.eps).sqrt() + bias = bn.bias - bn.running_mean * bn.weight / std + + t = (bn.weight / std).reshape(-1, 1, 1, 1) + weights = conv.weight * t + + bn = nn.Identity() + conv = nn.Conv2d(in_channels = conv.in_channels, + out_channels = conv.out_channels, + kernel_size = conv.kernel_size, + stride=conv.stride, + padding = conv.padding, + dilation = conv.dilation, + groups = conv.groups, + bias = True, + padding_mode = conv.padding_mode) + + conv.weight = torch.nn.Parameter(weights) + conv.bias = torch.nn.Parameter(bias) + return conv + + def fuse_repvgg_block(self): + if self.deploy: + return + print(f"RepConv.fuse_repvgg_block") + + self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1]) + + self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1]) + rbr_1x1_bias = self.rbr_1x1.bias + weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1]) + + # Fuse self.rbr_identity + if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)): + # print(f"fuse: rbr_identity == BatchNorm2d or SyncBatchNorm") + identity_conv_1x1 = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + groups=self.groups, + bias=False) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze() + # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") + identity_conv_1x1.weight.data.fill_(0.0) + identity_conv_1x1.weight.data.fill_diagonal_(1.0) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3) + # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") + + identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity) + bias_identity_expanded = identity_conv_1x1.bias + weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1]) + else: + # print(f"fuse: rbr_identity != BatchNorm2d, rbr_identity = {self.rbr_identity}") + bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) ) + weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) ) + + + #print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ") + #print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ") + #print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ") + + self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded) + self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded) + + self.rbr_reparam = self.rbr_dense + self.deploy = True + + if self.rbr_identity is not None: + del self.rbr_identity + self.rbr_identity = None + + if self.rbr_1x1 is not None: + del self.rbr_1x1 + self.rbr_1x1 = None + + if self.rbr_dense is not None: + del self.rbr_dense + self.rbr_dense = None + + +class RepBottleneck(Bottleneck): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super().__init__(c1, c2, shortcut=True, g=1, e=0.5) + c_ = int(c2 * e) # hidden channels + self.cv2 = RepConv(c_, c2, 3, 1, g=g) + + +class RepBottleneckCSPA(BottleneckCSPA): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class RepBottleneckCSPB(BottleneckCSPB): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class RepBottleneckCSPC(BottleneckCSPC): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + +class RepRes(Res): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super().__init__(c1, c2, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.cv2 = RepConv(c_, c_, 3, 1, g=g) + + +class RepResCSPA(ResCSPA): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepRes(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class RepResCSPB(ResCSPB): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[RepRes(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class RepResCSPC(ResCSPC): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepRes(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class RepResX(ResX): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=32, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super().__init__(c1, c2, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.cv2 = RepConv(c_, c_, 3, 1, g=g) + + +class RepResXCSPA(ResXCSPA): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepResX(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class RepResXCSPB(ResXCSPB): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2) # hidden channels + self.m = nn.Sequential(*[RepResX(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + + +class RepResXCSPC(ResXCSPC): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=32, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*[RepResX(c_, c_, shortcut, g, e=0.5) for _ in range(n)]) + +##### end of repvgg ##### + + +##### transformer ##### + +class TransformerLayer(nn.Module): + # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) + def __init__(self, c, num_heads): + super().__init__() + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x + x = self.fc2(self.fc1(x)) + x + return x + + +class TransformerBlock(nn.Module): + # Vision Transformer https://arxiv.org/abs/2010.11929 + def __init__(self, c1, c2, num_heads, num_layers): + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) # learnable position embedding + self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) + self.c2 = c2 + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2) + p = p.unsqueeze(0) + p = p.transpose(0, 3) + p = p.squeeze(3) + e = self.linear(p) + x = p + e + + x = self.tr(x) + x = x.unsqueeze(3) + x = x.transpose(0, 3) + x = x.reshape(b, self.c2, w, h) + return x + +##### end of transformer ##### + + +##### yolov5 ##### + +class Focus(nn.Module): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super(Focus, self).__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) + # self.contract = Contract(gain=2) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) + # return self.conv(self.contract(x)) + + +class SPPF(nn.Module): + # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + x = self.cv1(x) + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) + + +class Contract(nn.Module): + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' + s = self.gain + x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) + return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) + + +class Expand(nn.Module): + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' + s = self.gain + x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) + return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) + + +class NMS(nn.Module): + # Non-Maximum Suppression (NMS) module + conf = 0.25 # confidence threshold + iou = 0.45 # IoU threshold + classes = None # (optional list) filter by class + + def __init__(self): + super(NMS, self).__init__() + + def forward(self, x): + return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) + + +class autoShape(nn.Module): + # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + classes = None # (optional list) filter by class + + def __init__(self, model): + super(autoShape, self).__init__() + self.model = model.eval() + + def autoshape(self): + print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() + return self + + @torch.no_grad() + def forward(self, imgs, size=640, augment=False, profile=False): + # Inference from various sources. For height=640, width=1280, RGB images example inputs are: + # filename: imgs = 'data/samples/zidane.jpg' + # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' + # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) + # PIL: = Image.open('image.jpg') # HWC x(640,1280,3) + # numpy: = np.zeros((640,1280,3)) # HWC + # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) + # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images + + t = [time_synchronized()] + p = next(self.model.parameters()) # for device and type + if isinstance(imgs, torch.Tensor): # torch + with amp.autocast(enabled=p.device.type != 'cpu'): + return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference + + # Pre-process + n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images + shape0, shape1, files = [], [], [] # image and inference shapes, filenames + for i, im in enumerate(imgs): + f = f'image{i}' # filename + if isinstance(im, str): # filename or uri + im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im + elif isinstance(im, Image.Image): # PIL Image + im, f = np.asarray(im), getattr(im, 'filename', f) or f + files.append(Path(f).with_suffix('.jpg').name) + if im.shape[0] < 5: # image in CHW + im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) + im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input + s = im.shape[:2] # HWC + shape0.append(s) # image shape + g = (size / max(s)) # gain + shape1.append([y * g for y in s]) + imgs[i] = im # update + shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape + x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad + x = np.stack(x, 0) if n > 1 else x[0][None] # stack + x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 + t.append(time_synchronized()) + + with amp.autocast(enabled=p.device.type != 'cpu'): + # Inference + y = self.model(x, augment, profile)[0] # forward + t.append(time_synchronized()) + + # Post-process + y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS + for i in range(n): + scale_coords(shape1, y[i][:, :4], shape0[i]) + + t.append(time_synchronized()) + return Detections(imgs, y, files, t, self.names, x.shape) + + +class Detections: + # detections class for YOLOv5 inference results + def __init__(self, imgs, pred, files, times=None, names=None, shape=None): + super(Detections, self).__init__() + d = pred[0].device # device + gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations + self.imgs = imgs # list of images as numpy arrays + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) + self.names = names # class names + self.files = files # image filenames + self.xyxy = pred # xyxy pixels + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized + self.n = len(self.pred) # number of images (batch size) + self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) + self.s = shape # inference BCHW shape + + def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): + colors = color_list() + for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): + str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' + if pred is not None: + for c in pred[:, -1].unique(): + n = (pred[:, -1] == c).sum() # detections per class + str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + if show or save or render: + for *box, conf, cls in pred: # xyxy, confidence, class + label = f'{self.names[int(cls)]} {conf:.2f}' + plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) + img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np + if pprint: + print(str.rstrip(', ')) + if show: + img.show(self.files[i]) # show + if save: + f = self.files[i] + img.save(Path(save_dir) / f) # save + print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') + if render: + self.imgs[i] = np.asarray(img) + + def print(self): + self.display(pprint=True) # print results + print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) + + def show(self): + self.display(show=True) # show results + + def save(self, save_dir='runs/hub/exp'): + save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir + Path(save_dir).mkdir(parents=True, exist_ok=True) + self.display(save=True, save_dir=save_dir) # save results + + def render(self): + self.display(render=True) # render results + return self.imgs + + def pandas(self): + # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) + new = copy(self) # return copy + ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns + cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns + for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): + a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update + setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) + return new + + def tolist(self): + # return a list of Detections objects, i.e. 'for result in results.tolist():' + x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] + for d in x: + for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: + setattr(d, k, getattr(d, k)[0]) # pop out of list + return x + + def __len__(self): + return self.n + + +class Classify(nn.Module): + # Classification head, i.e. x(b,c1,20,20) to x(b,c2) + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super(Classify, self).__init__() + self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) + self.flat = nn.Flatten() + + def forward(self, x): + z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list + return self.flat(self.conv(z)) # flatten to x(b,c2) + +##### end of yolov5 ###### + + +##### orepa ##### + +def transI_fusebn(kernel, bn): + gamma = bn.weight + std = (bn.running_var + bn.eps).sqrt() + return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std + + +class ConvBN(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, deploy=False, nonlinear=None): + super().__init__() + if nonlinear is None: + self.nonlinear = nn.Identity() + else: + self.nonlinear = nonlinear + if deploy: + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) + else: + self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) + self.bn = nn.BatchNorm2d(num_features=out_channels) + + def forward(self, x): + if hasattr(self, 'bn'): + return self.nonlinear(self.bn(self.conv(x))) + else: + return self.nonlinear(self.conv(x)) + + def switch_to_deploy(self): + kernel, bias = transI_fusebn(self.conv.weight, self.bn) + conv = nn.Conv2d(in_channels=self.conv.in_channels, out_channels=self.conv.out_channels, kernel_size=self.conv.kernel_size, + stride=self.conv.stride, padding=self.conv.padding, dilation=self.conv.dilation, groups=self.conv.groups, bias=True) + conv.weight.data = kernel + conv.bias.data = bias + for para in self.parameters(): + para.detach_() + self.__delattr__('conv') + self.__delattr__('bn') + self.conv = conv + +class OREPA_3x3_RepConv(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, + internal_channels_1x1_3x3=None, + deploy=False, nonlinear=None, single_init=False): + super(OREPA_3x3_RepConv, self).__init__() + self.deploy = deploy + + if nonlinear is None: + self.nonlinear = nn.Identity() + else: + self.nonlinear = nonlinear + + self.kernel_size = kernel_size + self.in_channels = in_channels + self.out_channels = out_channels + self.groups = groups + assert padding == kernel_size // 2 + + self.stride = stride + self.padding = padding + self.dilation = dilation + + self.branch_counter = 0 + + self.weight_rbr_origin = nn.Parameter(torch.Tensor(out_channels, int(in_channels/self.groups), kernel_size, kernel_size)) + nn.init.kaiming_uniform_(self.weight_rbr_origin, a=math.sqrt(1.0)) + self.branch_counter += 1 + + + if groups < out_channels: + self.weight_rbr_avg_conv = nn.Parameter(torch.Tensor(out_channels, int(in_channels/self.groups), 1, 1)) + self.weight_rbr_pfir_conv = nn.Parameter(torch.Tensor(out_channels, int(in_channels/self.groups), 1, 1)) + nn.init.kaiming_uniform_(self.weight_rbr_avg_conv, a=1.0) + nn.init.kaiming_uniform_(self.weight_rbr_pfir_conv, a=1.0) + self.weight_rbr_avg_conv.data + self.weight_rbr_pfir_conv.data + self.register_buffer('weight_rbr_avg_avg', torch.ones(kernel_size, kernel_size).mul(1.0/kernel_size/kernel_size)) + self.branch_counter += 1 + + else: + raise NotImplementedError + self.branch_counter += 1 + + if internal_channels_1x1_3x3 is None: + internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels # For mobilenet, it is better to have 2X internal channels + + if internal_channels_1x1_3x3 == in_channels: + self.weight_rbr_1x1_kxk_idconv1 = nn.Parameter(torch.zeros(in_channels, int(in_channels/self.groups), 1, 1)) + id_value = np.zeros((in_channels, int(in_channels/self.groups), 1, 1)) + for i in range(in_channels): + id_value[i, i % int(in_channels/self.groups), 0, 0] = 1 + id_tensor = torch.from_numpy(id_value).type_as(self.weight_rbr_1x1_kxk_idconv1) + self.register_buffer('id_tensor', id_tensor) + + else: + self.weight_rbr_1x1_kxk_conv1 = nn.Parameter(torch.Tensor(internal_channels_1x1_3x3, int(in_channels/self.groups), 1, 1)) + nn.init.kaiming_uniform_(self.weight_rbr_1x1_kxk_conv1, a=math.sqrt(1.0)) + self.weight_rbr_1x1_kxk_conv2 = nn.Parameter(torch.Tensor(out_channels, int(internal_channels_1x1_3x3/self.groups), kernel_size, kernel_size)) + nn.init.kaiming_uniform_(self.weight_rbr_1x1_kxk_conv2, a=math.sqrt(1.0)) + self.branch_counter += 1 + + expand_ratio = 8 + self.weight_rbr_gconv_dw = nn.Parameter(torch.Tensor(in_channels*expand_ratio, 1, kernel_size, kernel_size)) + self.weight_rbr_gconv_pw = nn.Parameter(torch.Tensor(out_channels, in_channels*expand_ratio, 1, 1)) + nn.init.kaiming_uniform_(self.weight_rbr_gconv_dw, a=math.sqrt(1.0)) + nn.init.kaiming_uniform_(self.weight_rbr_gconv_pw, a=math.sqrt(1.0)) + self.branch_counter += 1 + + if out_channels == in_channels and stride == 1: + self.branch_counter += 1 + + self.vector = nn.Parameter(torch.Tensor(self.branch_counter, self.out_channels)) + self.bn = nn.BatchNorm2d(out_channels) + + self.fre_init() + + nn.init.constant_(self.vector[0, :], 0.25) #origin + nn.init.constant_(self.vector[1, :], 0.25) #avg + nn.init.constant_(self.vector[2, :], 0.0) #prior + nn.init.constant_(self.vector[3, :], 0.5) #1x1_kxk + nn.init.constant_(self.vector[4, :], 0.5) #dws_conv + + + def fre_init(self): + prior_tensor = torch.Tensor(self.out_channels, self.kernel_size, self.kernel_size) + half_fg = self.out_channels/2 + for i in range(self.out_channels): + for h in range(3): + for w in range(3): + if i < half_fg: + prior_tensor[i, h, w] = math.cos(math.pi*(h+0.5)*(i+1)/3) + else: + prior_tensor[i, h, w] = math.cos(math.pi*(w+0.5)*(i+1-half_fg)/3) + + self.register_buffer('weight_rbr_prior', prior_tensor) + + def weight_gen(self): + + weight_rbr_origin = torch.einsum('oihw,o->oihw', self.weight_rbr_origin, self.vector[0, :]) + + weight_rbr_avg = torch.einsum('oihw,o->oihw', torch.einsum('oihw,hw->oihw', self.weight_rbr_avg_conv, self.weight_rbr_avg_avg), self.vector[1, :]) + + weight_rbr_pfir = torch.einsum('oihw,o->oihw', torch.einsum('oihw,ohw->oihw', self.weight_rbr_pfir_conv, self.weight_rbr_prior), self.vector[2, :]) + + weight_rbr_1x1_kxk_conv1 = None + if hasattr(self, 'weight_rbr_1x1_kxk_idconv1'): + weight_rbr_1x1_kxk_conv1 = (self.weight_rbr_1x1_kxk_idconv1 + self.id_tensor).squeeze() + elif hasattr(self, 'weight_rbr_1x1_kxk_conv1'): + weight_rbr_1x1_kxk_conv1 = self.weight_rbr_1x1_kxk_conv1.squeeze() + else: + raise NotImplementedError + weight_rbr_1x1_kxk_conv2 = self.weight_rbr_1x1_kxk_conv2 + + if self.groups > 1: + g = self.groups + t, ig = weight_rbr_1x1_kxk_conv1.size() + o, tg, h, w = weight_rbr_1x1_kxk_conv2.size() + weight_rbr_1x1_kxk_conv1 = weight_rbr_1x1_kxk_conv1.view(g, int(t/g), ig) + weight_rbr_1x1_kxk_conv2 = weight_rbr_1x1_kxk_conv2.view(g, int(o/g), tg, h, w) + weight_rbr_1x1_kxk = torch.einsum('gti,gothw->goihw', weight_rbr_1x1_kxk_conv1, weight_rbr_1x1_kxk_conv2).view(o, ig, h, w) + else: + weight_rbr_1x1_kxk = torch.einsum('ti,othw->oihw', weight_rbr_1x1_kxk_conv1, weight_rbr_1x1_kxk_conv2) + + weight_rbr_1x1_kxk = torch.einsum('oihw,o->oihw', weight_rbr_1x1_kxk, self.vector[3, :]) + + weight_rbr_gconv = self.dwsc2full(self.weight_rbr_gconv_dw, self.weight_rbr_gconv_pw, self.in_channels) + weight_rbr_gconv = torch.einsum('oihw,o->oihw', weight_rbr_gconv, self.vector[4, :]) + + weight = weight_rbr_origin + weight_rbr_avg + weight_rbr_1x1_kxk + weight_rbr_pfir + weight_rbr_gconv + + return weight + + def dwsc2full(self, weight_dw, weight_pw, groups): + + t, ig, h, w = weight_dw.size() + o, _, _, _ = weight_pw.size() + tg = int(t/groups) + i = int(ig*groups) + weight_dw = weight_dw.view(groups, tg, ig, h, w) + weight_pw = weight_pw.squeeze().view(o, groups, tg) + + weight_dsc = torch.einsum('gtihw,ogt->ogihw', weight_dw, weight_pw) + return weight_dsc.view(o, i, h, w) + + def forward(self, inputs): + weight = self.weight_gen() + out = F.conv2d(inputs, weight, bias=None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) + + return self.nonlinear(self.bn(out)) + +class RepConv_OREPA(nn.Module): + + def __init__(self, c1, c2, k=3, s=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False, nonlinear=nn.SiLU()): + super(RepConv_OREPA, self).__init__() + self.deploy = deploy + self.groups = groups + self.in_channels = c1 + self.out_channels = c2 + + self.padding = padding + self.dilation = dilation + self.groups = groups + + assert k == 3 + assert padding == 1 + + padding_11 = padding - k // 2 + + if nonlinear is None: + self.nonlinearity = nn.Identity() + else: + self.nonlinearity = nonlinear + + if use_se: + self.se = SEBlock(self.out_channels, internal_neurons=self.out_channels // 16) + else: + self.se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + + else: + self.rbr_identity = nn.BatchNorm2d(num_features=self.in_channels) if self.out_channels == self.in_channels and s == 1 else None + self.rbr_dense = OREPA_3x3_RepConv(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s, padding=padding, groups=groups, dilation=1) + self.rbr_1x1 = ConvBN(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1, stride=s, padding=padding_11, groups=groups, dilation=1) + print('RepVGG Block, identity = ', self.rbr_identity) + + + def forward(self, inputs): + if hasattr(self, 'rbr_reparam'): + return self.nonlinearity(self.se(self.rbr_reparam(inputs))) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + + out1 = self.rbr_dense(inputs) + out2 = self.rbr_1x1(inputs) + out3 = id_out + out = out1 + out2 + out3 + + return self.nonlinearity(self.se(out)) + + + # Optional. This improves the accuracy and facilitates quantization. + # 1. Cancel the original weight decay on rbr_dense.conv.weight and rbr_1x1.conv.weight. + # 2. Use like this. + # loss = criterion(....) + # for every RepVGGBlock blk: + # loss += weight_decay_coefficient * 0.5 * blk.get_cust_L2() + # optimizer.zero_grad() + # loss.backward() + + # Not used for OREPA + def get_custom_L2(self): + K3 = self.rbr_dense.weight_gen() + K1 = self.rbr_1x1.conv.weight + t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + + l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum() # The L2 loss of the "circle" of weights in 3x3 kernel. Use regular L2 on them. + eq_kernel = K3[:, :, 1:2, 1:2] * t3 + K1 * t1 # The equivalent resultant central point of 3x3 kernel. + l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum() # Normalize for an L2 coefficient comparable to regular L2. + return l2_loss_eq_kernel + l2_loss_circle + + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1,1,1,1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if not isinstance(branch, nn.BatchNorm2d): + if isinstance(branch, OREPA_3x3_RepConv): + kernel = branch.weight_gen() + elif isinstance(branch, ConvBN): + kernel = branch.conv.weight + else: + raise NotImplementedError + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + else: + if not hasattr(self, 'id_tensor'): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + print(f"RepConv_OREPA.switch_to_deploy") + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.in_channels, out_channels=self.rbr_dense.out_channels, + kernel_size=self.rbr_dense.kernel_size, stride=self.rbr_dense.stride, + padding=self.rbr_dense.padding, dilation=self.rbr_dense.dilation, groups=self.rbr_dense.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + for para in self.parameters(): + para.detach_() + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + +##### end of orepa ##### + + +##### swin transformer ##### + +class WindowAttention(nn.Module): + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + nn.init.normal_(self.relative_position_bias_table, std=.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + # print(attn.dtype, v.dtype) + try: + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + except: + #print(attn.dtype, v.dtype) + x = (attn.half() @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + +class Mlp(nn.Module): + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.SiLU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + +def window_partition(x, window_size): + + B, H, W, C = x.shape + assert H % window_size == 0, 'feature map h and w can not divide by window size' + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + +def window_reverse(windows, window_size, H, W): + + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SwinTransformerLayer(nn.Module): + + def __init__(self, dim, num_heads, window_size=8, shift_size=0, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.SiLU, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + # if min(self.input_resolution) <= self.window_size: + # # if window size is larger than input resolution, we don't partition windows + # self.shift_size = 0 + # self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def create_mask(self, H, W): + # calculate attention mask for SW-MSA + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + return attn_mask + + def forward(self, x): + # reshape x[b c h w] to x[b l c] + _, _, H_, W_ = x.shape + + Padding = False + if min(H_, W_) < self.window_size or H_ % self.window_size!=0 or W_ % self.window_size!=0: + Padding = True + # print(f'img_size {min(H_, W_)} is less than (or not divided by) window_size {self.window_size}, Padding.') + pad_r = (self.window_size - W_ % self.window_size) % self.window_size + pad_b = (self.window_size - H_ % self.window_size) % self.window_size + x = F.pad(x, (0, pad_r, 0, pad_b)) + + # print('2', x.shape) + B, C, H, W = x.shape + L = H * W + x = x.permute(0, 2, 3, 1).contiguous().view(B, L, C) # b, L, c + + # create mask from init to forward + if self.shift_size > 0: + attn_mask = self.create_mask(H, W).to(x.device) + else: + attn_mask = None + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + x = x.permute(0, 2, 1).contiguous().view(-1, C, H, W) # b c h w + + if Padding: + x = x[:, :, :H_, :W_] # reverse padding + + return x + + +class SwinTransformerBlock(nn.Module): + def __init__(self, c1, c2, num_heads, num_layers, window_size=8): + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + + # remove input_resolution + self.blocks = nn.Sequential(*[SwinTransformerLayer(dim=c2, num_heads=num_heads, window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2) for i in range(num_layers)]) + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + x = self.blocks(x) + return x + + +class STCSPA(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(STCSPA, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformerBlock(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.m(self.cv1(x)) + y2 = self.cv2(x) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class STCSPB(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(STCSPB, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformerBlock(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x1 = self.cv1(x) + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class STCSPC(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(STCSPC, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(c_, c_, 1, 1) + self.cv4 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformerBlock(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(torch.cat((y1, y2), dim=1)) + +##### end of swin transformer ##### + + +##### swin transformer v2 ##### + +class WindowAttention_v2(nn.Module): + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0., + pretrained_window_size=[0, 0]): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.pretrained_window_size = pretrained_window_size + self.num_heads = num_heads + + self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True) + + # mlp to generate continuous relative position bias + self.cpb_mlp = nn.Sequential(nn.Linear(2, 512, bias=True), + nn.ReLU(inplace=True), + nn.Linear(512, num_heads, bias=False)) + + # get relative_coords_table + relative_coords_h = torch.arange(-(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32) + relative_coords_w = torch.arange(-(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32) + relative_coords_table = torch.stack( + torch.meshgrid([relative_coords_h, + relative_coords_w])).permute(1, 2, 0).contiguous().unsqueeze(0) # 1, 2*Wh-1, 2*Ww-1, 2 + if pretrained_window_size[0] > 0: + relative_coords_table[:, :, :, 0] /= (pretrained_window_size[0] - 1) + relative_coords_table[:, :, :, 1] /= (pretrained_window_size[1] - 1) + else: + relative_coords_table[:, :, :, 0] /= (self.window_size[0] - 1) + relative_coords_table[:, :, :, 1] /= (self.window_size[1] - 1) + relative_coords_table *= 8 # normalize to -8, 8 + relative_coords_table = torch.sign(relative_coords_table) * torch.log2( + torch.abs(relative_coords_table) + 1.0) / np.log2(8) + + self.register_buffer("relative_coords_table", relative_coords_table) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(dim)) + self.v_bias = nn.Parameter(torch.zeros(dim)) + else: + self.q_bias = None + self.v_bias = None + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + + B_, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias)) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + # cosine attention + attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1)) + logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01))).exp() + attn = attn * logit_scale + + relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads) + relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + relative_position_bias = 16 * torch.sigmoid(relative_position_bias) + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + try: + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + except: + x = (attn.half() @ v).transpose(1, 2).reshape(B_, N, C) + + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, ' \ + f'pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + +class Mlp_v2(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.SiLU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition_v2(x, window_size): + + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse_v2(windows, window_size, H, W): + + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SwinTransformerLayer_v2(nn.Module): + + def __init__(self, dim, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.SiLU, norm_layer=nn.LayerNorm, pretrained_window_size=0): + super().__init__() + self.dim = dim + #self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + #if min(self.input_resolution) <= self.window_size: + # # if window size is larger than input resolution, we don't partition windows + # self.shift_size = 0 + # self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention_v2( + dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop, + pretrained_window_size=(pretrained_window_size, pretrained_window_size)) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp_v2(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def create_mask(self, H, W): + # calculate attention mask for SW-MSA + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + return attn_mask + + def forward(self, x): + # reshape x[b c h w] to x[b l c] + _, _, H_, W_ = x.shape + + Padding = False + if min(H_, W_) < self.window_size or H_ % self.window_size!=0 or W_ % self.window_size!=0: + Padding = True + # print(f'img_size {min(H_, W_)} is less than (or not divided by) window_size {self.window_size}, Padding.') + pad_r = (self.window_size - W_ % self.window_size) % self.window_size + pad_b = (self.window_size - H_ % self.window_size) % self.window_size + x = F.pad(x, (0, pad_r, 0, pad_b)) + + # print('2', x.shape) + B, C, H, W = x.shape + L = H * W + x = x.permute(0, 2, 3, 1).contiguous().view(B, L, C) # b, L, c + + # create mask from init to forward + if self.shift_size > 0: + attn_mask = self.create_mask(H, W).to(x.device) + else: + attn_mask = None + + shortcut = x + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition_v2(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse_v2(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + x = x.view(B, H * W, C) + x = shortcut + self.drop_path(self.norm1(x)) + + # FFN + x = x + self.drop_path(self.norm2(self.mlp(x))) + x = x.permute(0, 2, 1).contiguous().view(-1, C, H, W) # b c h w + + if Padding: + x = x[:, :, :H_, :W_] # reverse padding + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class SwinTransformer2Block(nn.Module): + def __init__(self, c1, c2, num_heads, num_layers, window_size=7): + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + + # remove input_resolution + self.blocks = nn.Sequential(*[SwinTransformerLayer_v2(dim=c2, num_heads=num_heads, window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2) for i in range(num_layers)]) + + def forward(self, x): + if self.conv is not None: + x = self.conv(x) + x = self.blocks(x) + return x + + +class ST2CSPA(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(ST2CSPA, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformer2Block(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.m(self.cv1(x)) + y2 = self.cv2(x) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class ST2CSPB(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(ST2CSPB, self).__init__() + c_ = int(c2) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformer2Block(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + x1 = self.cv1(x) + y1 = self.m(x1) + y2 = self.cv2(x1) + return self.cv3(torch.cat((y1, y2), dim=1)) + + +class ST2CSPC(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super(ST2CSPC, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(c_, c_, 1, 1) + self.cv4 = Conv(2 * c_, c2, 1, 1) + num_heads = c_ // 32 + self.m = SwinTransformer2Block(c_, c_, num_heads, n) + #self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(torch.cat((y1, y2), dim=1)) + +##### end of swin transformer v2 ##### diff --git a/asone/detectors/yolov7/yolov7/models/experimental.py b/asone/detectors/yolov7/yolov7/models/experimental.py new file mode 100644 index 0000000000000000000000000000000000000000..1f847b2b5251566a6924f29b2e3a4590ad3f1d50 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/models/experimental.py @@ -0,0 +1,45 @@ +import torch +import torch.nn as nn + +from .common import Conv + +class Ensemble(nn.ModuleList): + # Ensemble of models + def __init__(self): + super(Ensemble, self).__init__() + + def forward(self, x, augment=False): + y = [] + for module in self: + y.append(module(x, augment)[0]) + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 1) # nms ensemble + return y, None # inference, train output + +def attempt_load(weights, map_location=None): + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + ckpt = torch.load(w, map_location=map_location) # load + model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model + + # Compatibility updates + for m in model.modules(): + if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True # pytorch 1.7.0 compatibility + elif type(m) is nn.Upsample: + m.recompute_scale_factor = None # torch 1.11.0 compatibility + elif type(m) is Conv: + m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility + + if len(model) == 1: + return model[-1] # return model + else: + print('Ensemble created with %s\n' % weights) + for k in ['names', 'stride']: + setattr(model, k, getattr(model[-1], k)) + return model # return ensemble + + diff --git a/asone/detectors/yolov7/yolov7/models/yolo.py b/asone/detectors/yolov7/yolov7/models/yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..a15505aea595504f3898259b422cc3e1af2b6117 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/models/yolo.py @@ -0,0 +1,936 @@ +from asone.detectors.yolov7.yolov7.utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, \ + initialize_weights, select_device, copy_attr +from asone.detectors.yolov7.yolov7.models.experimental import * +from asone.detectors.yolov7.yolov7.models.common import * +import torch +import argparse +import logging +import sys +from copy import deepcopy + +# sys.path.append('./') # to run '$ python *.py' files in subdirectories +logger = logging.getLogger(__name__) + + +try: + import thop # for FLOPS computation +except ImportError: + thop = None + + +class Detect(nn.Module): + stride = None # strides computed during build + export = False # onnx export + end2end = False + include_nms = False + concat = False + + def __init__(self, nc=80, anchors=(), ch=()): # detection layer + super(Detect, self).__init__() + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view( + self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch) # output conv + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + y = x[i].sigmoid() + if not torch.onnx.is_in_onnx_export(): + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i] # wh + else: + # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy, wh, conf = y.split((2, 2, self.nc + 1), 4) + # new xy + xy = xy * (2. * self.stride[i]) + \ + (self.stride[i] * (self.grid[i] - 0.5)) + wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, -1, self.no)) + + if self.training: + out = x + elif self.end2end: + out = torch.cat(z, 1) + elif self.include_nms: + z = self.convert(z) + out = (z, ) + elif self.concat: + out = torch.cat(z, 1) + else: + out = (torch.cat(z, 1), x) + + return out + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + def convert(self, z): + z = torch.cat(z, 1) + box = z[:, :, :4] + conf = z[:, :, 4:5] + score = z[:, :, 5:] + score *= conf + convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=z.device) + box @= convert_matrix + return (box, score) + + +class IDetect(nn.Module): + stride = None # strides computed during build + export = False # onnx export + end2end = False + include_nms = False + concat = False + + def __init__(self, nc=80, anchors=(), ch=()): # detection layer + super(IDetect, self).__init__() + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view( + self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch) # output conv + + self.ia = nn.ModuleList(ImplicitA(x) for x in ch) + self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch) + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](self.ia[i](x[i])) # conv + x[i] = self.im[i](x[i]) + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i] # wh + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x) + + def fuseforward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + if not torch.onnx.is_in_onnx_export(): + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i] # wh + else: + # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy, wh, conf = y.split((2, 2, self.nc + 1), 4) + # new xy + xy = xy * (2. * self.stride[i]) + \ + (self.stride[i] * (self.grid[i] - 0.5)) + wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, -1, self.no)) + + if self.training: + out = x + elif self.end2end: + out = torch.cat(z, 1) + elif self.include_nms: + z = self.convert(z) + out = (z, ) + elif self.concat: + out = torch.cat(z, 1) + else: + out = (torch.cat(z, 1), x) + + return out + + def fuse(self): + print("IDetect.fuse") + # fuse ImplicitA and Convolution + for i in range(len(self.m)): + c1, c2, _, _ = self.m[i].weight.shape + c1_, c2_, _, _ = self.ia[i].implicit.shape + self.m[i].bias += torch.matmul(self.m[i].weight.reshape( + c1, c2), self.ia[i].implicit.reshape(c2_, c1_)).squeeze(1) + + # fuse ImplicitM and Convolution + for i in range(len(self.m)): + c1, c2, _, _ = self.im[i].implicit.shape + self.m[i].bias *= self.im[i].implicit.reshape(c2) + self.m[i].weight *= self.im[i].implicit.transpose(0, 1) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + def convert(self, z): + z = torch.cat(z, 1) + box = z[:, :, :4] + conf = z[:, :, 4:5] + score = z[:, :, 5:] + score *= conf + convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=z.device) + box @= convert_matrix + return (box, score) + + +class IKeypoint(nn.Module): + stride = None # strides computed during build + export = False # onnx export + + def __init__(self, nc=80, anchors=(), nkpt=17, ch=(), inplace=True, dw_conv_kpt=False): # detection layer + super(IKeypoint, self).__init__() + self.nc = nc # number of classes + self.nkpt = nkpt + self.dw_conv_kpt = dw_conv_kpt + # number of outputs per anchor for box and class + self.no_det = (nc + 5) + self.no_kpt = 3*self.nkpt # number of outputs per anchor for keypoints + self.no = self.no_det+self.no_kpt + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + self.flip_test = False + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view( + self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no_det * self.na, 1) + for x in ch) # output conv + + self.ia = nn.ModuleList(ImplicitA(x) for x in ch) + self.im = nn.ModuleList(ImplicitM(self.no_det * self.na) for _ in ch) + + if self.nkpt is not None: + if self.dw_conv_kpt: # keypoint head is slightly more complex + self.m_kpt = nn.ModuleList( + nn.Sequential(DWConv(x, x, k=3), Conv(x, x), + DWConv(x, x, k=3), Conv(x, x), + DWConv(x, x, k=3), Conv(x, x), + DWConv(x, x, k=3), Conv(x, x), + DWConv(x, x, k=3), Conv(x, x), + DWConv(x, x, k=3), nn.Conv2d(x, self.no_kpt * self.na, 1)) for x in ch) + else: # keypoint head is a single convolution + self.m_kpt = nn.ModuleList( + nn.Conv2d(x, self.no_kpt * self.na, 1) for x in ch) + + self.inplace = inplace # use in-place ops (e.g. slice assignment) + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + if self.nkpt is None or self.nkpt == 0: + x[i] = self.im[i](self.m[i](self.ia[i](x[i]))) # conv + else: + x[i] = torch.cat( + (self.im[i](self.m[i](self.ia[i](x[i]))), self.m_kpt[i](x[i])), axis=1) + + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + x_det = x[i][..., :6] + x_kpt = x[i][..., 6:] + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + kpt_grid_x = self.grid[i][..., 0:1] + kpt_grid_y = self.grid[i][..., 1:2] + + if self.nkpt == 0: + y = x[i].sigmoid() + else: + y = x_det.sigmoid() + + if self.inplace: + xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * \ + self.stride[i] # xy + wh = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh + if self.nkpt != 0: + x_kpt[..., 0::3] = ( + x_kpt[..., ::3] * 2. - 0.5 + kpt_grid_x.repeat(1, 1, 1, 1, 17)) * self.stride[i] # xy + x_kpt[..., 1::3] = ( + x_kpt[..., 1::3] * 2. - 0.5 + kpt_grid_y.repeat(1, 1, 1, 1, 17)) * self.stride[i] # xy + # x_kpt[..., 0::3] = (x_kpt[..., ::3] + kpt_grid_x.repeat(1,1,1,1,17)) * self.stride[i] # xy + # x_kpt[..., 1::3] = (x_kpt[..., 1::3] + kpt_grid_y.repeat(1,1,1,1,17)) * self.stride[i] # xy + # print('=============') + # print(self.anchor_grid[i].shape) + # print(self.anchor_grid[i][...,0].unsqueeze(4).shape) + #print(x_kpt[..., 0::3].shape) + # x_kpt[..., 0::3] = ((x_kpt[..., 0::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i] # xy + # x_kpt[..., 1::3] = ((x_kpt[..., 1::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i] # xy + # x_kpt[..., 0::3] = (((x_kpt[..., 0::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i] # xy + # x_kpt[..., 1::3] = (((x_kpt[..., 1::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i] # xy + x_kpt[..., 2::3] = x_kpt[..., 2::3].sigmoid() + + y = torch.cat((xy, wh, y[..., 4:], x_kpt), dim=-1) + + else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 + xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * \ + self.stride[i] # xy + wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + if self.nkpt != 0: + y[..., 6:] = (y[..., 6:] * 2. - 0.5 + self.grid[i].repeat( + (1, 1, 1, 1, self.nkpt))) * self.stride[i] # xy + y = torch.cat((xy, wh, y[..., 4:]), -1) + + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + +class IAuxDetect(nn.Module): + stride = None # strides computed during build + export = False # onnx export + end2end = False + include_nms = False + concat = False + + def __init__(self, nc=80, anchors=(), ch=()): # detection layer + super(IAuxDetect, self).__init__() + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view( + self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch[:self.nl]) # output conv + self.m2 = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch[self.nl:]) # output conv + + self.ia = nn.ModuleList(ImplicitA(x) for x in ch[:self.nl]) + self.im = nn.ModuleList(ImplicitM(self.no * self.na) + for _ in ch[:self.nl]) + + def forward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](self.ia[i](x[i])) # conv + x[i] = self.im[i](x[i]) + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + x[i+self.nl] = self.m2[i](x[i+self.nl]) + x[i+self.nl] = x[i+self.nl].view(bs, self.na, self.no, + ny, nx).permute(0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + if not torch.onnx.is_in_onnx_export(): + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i] # wh + else: + # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy, wh, conf = y.split((2, 2, self.nc + 1), 4) + # new xy + xy = xy * (2. * self.stride[i]) + \ + (self.stride[i] * (self.grid[i] - 0.5)) + wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x[:self.nl]) + + def fuseforward(self, x): + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + if not torch.onnx.is_in_onnx_export(): + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i] # wh + else: + xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * \ + self.stride[i] # xy + wh = (y[..., 2:4] * 2) ** 2 * \ + self.anchor_grid[i].data # wh + y = torch.cat((xy, wh, y[..., 4:]), -1) + z.append(y.view(bs, -1, self.no)) + + if self.training: + out = x + elif self.end2end: + out = torch.cat(z, 1) + elif self.include_nms: + z = self.convert(z) + out = (z, ) + elif self.concat: + out = torch.cat(z, 1) + else: + out = (torch.cat(z, 1), x) + + return out + + def fuse(self): + print("IAuxDetect.fuse") + # fuse ImplicitA and Convolution + for i in range(len(self.m)): + c1, c2, _, _ = self.m[i].weight.shape + c1_, c2_, _, _ = self.ia[i].implicit.shape + self.m[i].bias += torch.matmul(self.m[i].weight.reshape( + c1, c2), self.ia[i].implicit.reshape(c2_, c1_)).squeeze(1) + + # fuse ImplicitM and Convolution + for i in range(len(self.m)): + c1, c2, _, _ = self.im[i].implicit.shape + self.m[i].bias *= self.im[i].implicit.reshape(c2) + self.m[i].weight *= self.im[i].implicit.transpose(0, 1) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + def convert(self, z): + z = torch.cat(z, 1) + box = z[:, :, :4] + conf = z[:, :, 4:5] + score = z[:, :, 5:] + score *= conf + convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=z.device) + box @= convert_matrix + return (box, score) + + +class IBin(nn.Module): + stride = None # strides computed during build + export = False # onnx export + + def __init__(self, nc=80, anchors=(), ch=(), bin_count=21): # detection layer + super(IBin, self).__init__() + self.nc = nc # number of classes + self.bin_count = bin_count + + self.w_bin_sigmoid = SigmoidBin( + bin_count=self.bin_count, min=0.0, max=4.0) + self.h_bin_sigmoid = SigmoidBin( + bin_count=self.bin_count, min=0.0, max=4.0) + # classes, x,y,obj + self.no = nc + 3 + \ + self.w_bin_sigmoid.get_length() + self.h_bin_sigmoid.get_length() # w-bce, h-bce + # + self.x_bin_sigmoid.get_length() + self.y_bin_sigmoid.get_length() + + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [torch.zeros(1)] * self.nl # init grid + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) # shape(nl,na,2) + self.register_buffer('anchor_grid', a.clone().view( + self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch) # output conv + + self.ia = nn.ModuleList(ImplicitA(x) for x in ch) + self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch) + + def forward(self, x): + + #self.x_bin_sigmoid.use_fw_regression = True + #self.y_bin_sigmoid.use_fw_regression = True + self.w_bin_sigmoid.use_fw_regression = True + self.h_bin_sigmoid.use_fw_regression = True + + # x = x.copy() # for profiling + z = [] # inference output + self.training |= self.export + for i in range(self.nl): + x[i] = self.m[i](self.ia[i](x[i])) # conv + x[i] = self.im[i](x[i]) + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute( + 0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4]: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) + + y = x[i].sigmoid() + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + # y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + + #px = (self.x_bin_sigmoid.forward(y[..., 0:12]) + self.grid[i][..., 0]) * self.stride[i] + #py = (self.y_bin_sigmoid.forward(y[..., 12:24]) + self.grid[i][..., 1]) * self.stride[i] + + pw = self.w_bin_sigmoid.forward( + y[..., 2:24]) * self.anchor_grid[i][..., 0] + ph = self.h_bin_sigmoid.forward( + y[..., 24:46]) * self.anchor_grid[i][..., 1] + + #y[..., 0] = px + #y[..., 1] = py + y[..., 2] = pw + y[..., 3] = ph + + y = torch.cat((y[..., 0:4], y[..., 46:]), dim=-1) + + z.append(y.view(bs, -1, y.shape[-1])) + + return x if self.training else (torch.cat(z, 1), x) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + + +class Model(nn.Module): + # model, input channels, number of classes + def __init__(self, cfg='yolor-csp-c.yaml', ch=3, nc=None, anchors=None): + super(Model, self).__init__() + self.traced = False + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg) as f: + self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + logger.info( + f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + if anchors: + logger.info( + f'Overriding model.yaml anchors with anchors={anchors}') + self.yaml['anchors'] = round(anchors) # override yaml value + self.model, self.save = parse_model( + deepcopy(self.yaml), ch=[ch]) # model, savelist + self.names = [str(i) for i in range(self.yaml['nc'])] # default names + # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) + + # Build strides, anchors + m = self.model[-1] # Detect() + if isinstance(m, Detect): + s = 256 # 2x min stride + m.stride = torch.tensor( + [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_biases() # only run once + # print('Strides: %s' % m.stride.tolist()) + if isinstance(m, IDetect): + s = 256 # 2x min stride + m.stride = torch.tensor( + [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_biases() # only run once + # print('Strides: %s' % m.stride.tolist()) + if isinstance(m, IAuxDetect): + s = 256 # 2x min stride + m.stride = torch.tensor( + [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:4]]) # forward + # print(m.stride) + check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_aux_biases() # only run once + # print('Strides: %s' % m.stride.tolist()) + if isinstance(m, IBin): + s = 256 # 2x min stride + m.stride = torch.tensor( + [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_biases_bin() # only run once + # print('Strides: %s' % m.stride.tolist()) + if isinstance(m, IKeypoint): + s = 256 # 2x min stride + m.stride = torch.tensor( + [s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward + check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + self._initialize_biases_kpt() # only run once + # print('Strides: %s' % m.stride.tolist()) + + # Init weights, biases + initialize_weights(self) + self.info() + logger.info('') + + def forward(self, x, augment=False, profile=False): + if augment: + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, + gs=int(self.stride.max())) + yi = self.forward_once(xi)[0] # forward + # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save + yi[..., :4] /= si # de-scale + if fi == 2: + yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud + elif fi == 3: + yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr + y.append(yi) + return torch.cat(y, 1), None # augmented inference, train + else: + # single-scale inference, train + return self.forward_once(x, profile) + + def forward_once(self, x, profile=False): + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [ + x if j == -1 else y[j] for j in m.f] # from earlier layers + + if not hasattr(self, 'traced'): + self.traced = False + + if self.traced: + if isinstance(m, Detect) or isinstance(m, IDetect) or isinstance(m, IAuxDetect) or isinstance(m, IKeypoint): + break + + if profile: + c = isinstance(m, (Detect, IDetect, IAuxDetect, IBin)) + o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[ + 0] / 1E9 * 2 if thop else 0 # FLOPS + for _ in range(10): + m(x.copy() if c else x) + t = time_synchronized() + for _ in range(10): + m(x.copy() if c else x) + dt.append((time_synchronized() - t) * 100) + print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) + + x = m(x) # run + + y.append(x if m.i in self.save else None) # save output + + if profile: + print('%.1fms total' % sum(dt)) + return x + + # initialize biases into Detect(), cf is class frequency + def _initialize_biases(self, cf=None): + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Detect() module + for mi, s in zip(m.m, m.stride): # from + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + # obj (8 objects per 640 image) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99) + ) if cf is None else torch.log(cf / cf.sum()) # cls + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + # initialize biases into Detect(), cf is class frequency + def _initialize_aux_biases(self, cf=None): + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Detect() module + for mi, mi2, s in zip(m.m, m.m2, m.stride): # from + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + # obj (8 objects per 640 image) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99) + ) if cf is None else torch.log(cf / cf.sum()) # cls + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + b2 = mi2.bias.view(m.na, -1) # conv.bias(255) to (3,85) + # obj (8 objects per 640 image) + b2.data[:, 4] += math.log(8 / (640 / s) ** 2) + b2.data[:, 5:] += math.log(0.6 / (m.nc - 0.99) + ) if cf is None else torch.log(cf / cf.sum()) # cls + mi2.bias = torch.nn.Parameter(b2.view(-1), requires_grad=True) + + # initialize biases into Detect(), cf is class frequency + def _initialize_biases_bin(self, cf=None): + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Bin() module + bc = m.bin_count + for mi, s in zip(m.m, m.stride): # from + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + old = b[:, (0, 1, 2, bc+3)].data + obj_idx = 2*bc+4 + b[:, :obj_idx].data += math.log(0.6 / (bc + 1 - 0.99)) + # obj (8 objects per 640 image) + b[:, obj_idx].data += math.log(8 / (640 / s) ** 2) + b[:, (obj_idx+1):].data += math.log(0.6 / (m.nc - 0.99) + ) if cf is None else torch.log(cf / cf.sum()) # cls + b[:, (0, 1, 2, bc+3)].data = old + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + # initialize biases into Detect(), cf is class frequency + def _initialize_biases_kpt(self, cf=None): + # https://arxiv.org/abs/1708.02002 section 3.3 + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. + m = self.model[-1] # Detect() module + for mi, s in zip(m.m, m.stride): # from + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + # obj (8 objects per 640 image) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99) + ) if cf is None else torch.log(cf / cf.sum()) # cls + mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + def _print_biases(self): + m = self.model[-1] # Detect() module + for mi in m.m: # from + b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) + print(('%6g Conv2d.bias:' + '%10.3g' * 6) % + (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) + + # def _print_weights(self): + # for m in self.model.modules(): + # if type(m) is Bottleneck: + # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights + + def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + print('Fusing layers... ') + for m in self.model.modules(): + if isinstance(m, RepConv): + #print(f" fuse_repvgg_block") + m.fuse_repvgg_block() + elif isinstance(m, RepConv_OREPA): + #print(f" switch_to_deploy") + m.switch_to_deploy() + elif type(m) is Conv and hasattr(m, 'bn'): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.fuseforward # update forward + elif isinstance(m, (IDetect, IAuxDetect)): + m.fuse() + m.forward = m.fuseforward + self.info() + return self + + def nms(self, mode=True): # add or remove NMS module + present = type(self.model[-1]) is NMS # last layer is NMS + if mode and not present: + print('Adding NMS... ') + m = NMS() # module + m.f = -1 # from + m.i = self.model[-1].i + 1 # index + self.model.add_module(name='%s' % m.i, module=m) # add + self.eval() + elif not mode and present: + print('Removing NMS... ') + self.model = self.model[:-1] # remove + return self + + def autoshape(self): # add autoShape module + print('Adding autoShape... ') + m = autoShape(self) # wrap model + copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', + 'stride'), exclude=()) # copy attributes + return m + + def info(self, verbose=False, img_size=640): # print model information + model_info(self, verbose, img_size) + + +def parse_model(d, ch): # model_dict, input_channels(3) + logger.info('\n%3s%18s%3s%10s %-40s%-30s' % + ('', 'from', 'n', 'params', 'module', 'arguments')) + anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] + na = (len(anchors[0]) // 2) if isinstance(anchors, + list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + # from, number, module, args + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + try: + args[j] = eval(a) if isinstance(a, str) else a # eval strings + except: + pass + + n = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in [nn.Conv2d, Conv, RobustConv, RobustConv2, DWConv, GhostConv, RepConv, RepConv_OREPA, DownC, + SPP, SPPF, SPPCSPC, GhostSPPCSPC, MixConv2d, Focus, Stem, GhostStem, CrossConv, + Bottleneck, BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, + RepBottleneck, RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC, + Res, ResCSPA, ResCSPB, ResCSPC, + RepRes, RepResCSPA, RepResCSPB, RepResCSPC, + ResX, ResXCSPA, ResXCSPB, ResXCSPC, + RepResX, RepResXCSPA, RepResXCSPB, RepResXCSPC, + Ghost, GhostCSPA, GhostCSPB, GhostCSPC, + SwinTransformerBlock, STCSPA, STCSPB, STCSPC, + SwinTransformer2Block, ST2CSPA, ST2CSPB, ST2CSPC]: + c1, c2 = ch[f], args[0] + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) + + args = [c1, c2, *args[1:]] + if m in [DownC, SPPCSPC, GhostSPPCSPC, + BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, + RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC, + ResCSPA, ResCSPB, ResCSPC, + RepResCSPA, RepResCSPB, RepResCSPC, + ResXCSPA, ResXCSPB, ResXCSPC, + RepResXCSPA, RepResXCSPB, RepResXCSPC, + GhostCSPA, GhostCSPB, GhostCSPC, + STCSPA, STCSPB, STCSPC, + ST2CSPA, ST2CSPB, ST2CSPC]: + args.insert(2, n) # number of repeats + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum([ch[x] for x in f]) + elif m is Chuncat: + c2 = sum([ch[x] for x in f]) + elif m is Shortcut: + c2 = ch[f[0]] + elif m is Foldcut: + c2 = ch[f] // 2 + elif m in [Detect, IDetect, IAuxDetect, IBin, IKeypoint]: + args.append([ch[x] for x in f]) + if isinstance(args[1], int): # number of anchors + args[1] = [list(range(args[1] * 2))] * len(f) + elif m is ReOrg: + c2 = ch[f] * 4 + elif m is Contract: + c2 = ch[f] * args[0] ** 2 + elif m is Expand: + c2 = ch[f] // args[0] ** 2 + else: + c2 = ch[f] + + m_ = nn.Sequential(*[m(*args) for _ in range(n)] + ) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum([x.numel() for x in m_.parameters()]) # number params + # attach index, 'from' index, type, number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % + (i, f, n, np, t, args)) # print + save.extend(x % i for x in ([f] if isinstance( + f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cfg', type=str, + default='yolor-csp-c.yaml', help='model.yaml') + parser.add_argument('--device', default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--profile', action='store_true', + help='profile model speed') + opt = parser.parse_args() + opt.cfg = check_file(opt.cfg) # check file + set_logging() + device = select_device(opt.device) + + # Create model + model = Model(opt.cfg).to(device) + model.train() + + if opt.profile: + img = torch.rand(1, 3, 640, 640).to(device) + y = model(img, profile=True) + + # Profile + # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) + # y = model(img, profile=True) + + # Tensorboard + # from torch.utils.tensorboard import SummaryWriter + # tb_writer = SummaryWriter() + # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") + # tb_writer.add_graph(model.model, img) # add model to tensorboard + # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard diff --git a/asone/detectors/yolov7/yolov7/utils/__init__.py b/asone/detectors/yolov7/yolov7/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov7/yolov7/utils/torch_utils.py b/asone/detectors/yolov7/yolov7/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1e631b555508457a4944c11a479176463719c0e8 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/utils/torch_utils.py @@ -0,0 +1,374 @@ +# YOLOR PyTorch utils + +import datetime +import logging +import math +import os +import platform +import subprocess +import time +from contextlib import contextmanager +from copy import deepcopy +from pathlib import Path + +import torch +import torch.backends.cudnn as cudnn +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +try: + import thop # for FLOPS computation +except ImportError: + thop = None +logger = logging.getLogger(__name__) + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + """ + Decorator to make all processes in distributed training wait for each local_master to do something. + """ + if local_rank not in [-1, 0]: + torch.distributed.barrier() + yield + if local_rank == 0: + torch.distributed.barrier() + + +def init_torch_seeds(seed=0): + # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html + torch.manual_seed(seed) + if seed == 0: # slower, more reproducible + cudnn.benchmark, cudnn.deterministic = False, True + else: # faster, less reproducible + cudnn.benchmark, cudnn.deterministic = True, False + + +def date_modified(path=__file__): + # return human-readable file modification date, i.e. '2021-3-26' + t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + +def git_describe(path=Path(__file__).parent): # path must be a directory + # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe + s = f'git -C {path} describe --tags --long --always' + try: + return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1] + except subprocess.CalledProcessError as e: + return '' # not a git repository + + +def select_device(device='', batch_size=None): + # device = 'cpu' or '0' or '0,1,2,3' + s = f'YOLOR 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string + cpu = device.lower() == 'cpu' + if cpu: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + elif device: # non-cpu device requested + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable + assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability + + cuda = not cpu and torch.cuda.is_available() + if cuda: + n = torch.cuda.device_count() + if n > 1 and batch_size: # check that batch_size is compatible with device_count + assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' + space = ' ' * len(s) + for i, d in enumerate(device.split(',') if device else range(n)): + p = torch.cuda.get_device_properties(i) + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB + else: + s += 'CPU\n' + + logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe + return torch.device('cuda:0' if cuda else 'cpu') + + +def time_synchronized(): + # pytorch-accurate time + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() + + +def profile(x, ops, n=100, device=None): + # profile a pytorch module or list of modules. Example usage: + # x = torch.randn(16, 3, 640, 640) # input + # m1 = lambda x: x * torch.sigmoid(x) + # m2 = nn.SiLU() + # profile(x, [m1, m2], n=100) # profile speed over 100 iterations + + device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + x = x.to(device) + x.requires_grad = True + print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') + print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") + for m in ops if isinstance(ops, list) else [ops]: + m = m.to(device) if hasattr(m, 'to') else m # device + m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type + dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward + try: + flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS + except: + flops = 0 + + for _ in range(n): + t[0] = time_synchronized() + y = m(x) + t[1] = time_synchronized() + try: + _ = y.sum().backward() + t[2] = time_synchronized() + except: # no backward method + t[2] = float('nan') + dtf += (t[1] - t[0]) * 1000 / n # ms per op forward + dtb += (t[2] - t[1]) * 1000 / n # ms per op backward + + s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' + s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' + p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters + print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') + + +def is_parallel(model): + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: + m.inplace = True + + +def find_modules(model, mclass=nn.Conv2d): + # Finds layer indices matching module class 'mclass' + return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + + +def sparsity(model): + # Return global model sparsity + a, b = 0., 0. + for p in model.parameters(): + a += p.numel() + b += (p == 0).sum() + return b / a + + +def prune(model, amount=0.3): + # Prune model to requested global sparsity + import torch.nn.utils.prune as prune + print('Pruning model... ', end='') + for name, m in model.named_modules(): + if isinstance(m, nn.Conv2d): + prune.l1_unstructured(m, name='weight', amount=amount) # prune + prune.remove(m, 'weight') # make permanent + print(' %.3g global sparsity' % sparsity(model)) + + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def model_info(model, verbose=False, img_size=640): + # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + if verbose: + print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + print('%5g %40s %9s %12g %20s %10.3g %10.3g' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + + try: # FLOPS + from thop import profile + stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 + img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input + flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS + img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float + fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS + except (ImportError, Exception): + fs = '' + + logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + + +def load_classifier(name='resnet101', n=2): + # Loads a pretrained model reshaped to n-class output + model = torchvision.models.__dict__[name](pretrained=True) + + # ResNet model properties + # input_size = [3, 224, 224] + # input_space = 'RGB' + # input_range = [0, 1] + # mean = [0.485, 0.456, 0.406] + # std = [0.229, 0.224, 0.225] + + # Reshape output to n classes + filters = model.fc.weight.shape[1] + model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) + model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) + model.fc.out_features = n + return model + + +def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) + # scales img(bs,3,y,x) by ratio constrained to gs-multiple + if ratio == 1.0: + return img + else: + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +class ModelEMA: + """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + # Create EMA + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA + # if next(model.parameters()).device.type != 'cpu': + # self.ema.half() # FP16 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1. - d) * msd[k].detach() + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) + + +class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + def _check_input_dim(self, input): + # The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + # is this method that is overwritten by the sub-class + # This original goal of this method was for tensor sanity checks + # If you're ok bypassing those sanity checks (eg. if you trust your inference + # to provide the right dimensional inputs), then you can just use this method + # for easy conversion from SyncBatchNorm + # (unfortunately, SyncBatchNorm does not store the original class - if it did + # we could return the one that was originally created) + return + +def revert_sync_batchnorm(module): + # this is very similar to the function that it is trying to revert: + # https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679 + module_output = module + if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm): + new_cls = BatchNormXd + module_output = BatchNormXd(module.num_features, + module.eps, module.momentum, + module.affine, + module.track_running_stats) + if module.affine: + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + if hasattr(module, "qconfig"): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output + + +class TracedModel(nn.Module): + + def __init__(self, model=None, device=None, img_size=(640,640)): + super(TracedModel, self).__init__() + + print(" Convert model to Traced-model... ") + self.stride = model.stride + self.names = model.names + self.model = model + + self.model = revert_sync_batchnorm(self.model) + self.model.to('cpu') + self.model.eval() + + self.detect_layer = self.model.model[-1] + self.model.traced = True + + rand_example = torch.rand(1, 3, img_size, img_size) + + traced_script_module = torch.jit.trace(self.model, rand_example, strict=False) + #traced_script_module = torch.jit.script(self.model) + traced_script_module.save("traced_model.pt") + print(" traced_script_module saved! ") + self.model = traced_script_module + self.model.to(device) + self.detect_layer.to(device) + print(" model is traced! \n") + + def forward(self, x, augment=False, profile=False): + out = self.model(x) + out = self.detect_layer(out) + return out \ No newline at end of file diff --git a/asone/detectors/yolov7/yolov7/utils/yolov7_utils.py b/asone/detectors/yolov7/yolov7/utils/yolov7_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cb108ff20373635ee2f2046bf805b4e499e1dd16 --- /dev/null +++ b/asone/detectors/yolov7/yolov7/utils/yolov7_utils.py @@ -0,0 +1,230 @@ +import cv2 +import numpy as np +import torch +import torchvision +import time + +def prepare_input(image, input_shape): + input_height, input_width = input_shape + input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Resize input image + input_img = cv2.resize(input_img, (input_width, input_height)) + # Scale input pixel values to 0 to 1 + input_img = input_img / 255.0 + input_img = input_img.transpose(2, 0, 1) + input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32) + + return input_tensor + +def process_output(output, ori_shape, input_shape, conf_threshold, iou_threshold): + predictions = output[0] + # predictions = np.squeeze(output[0]) + + # Filter out object confidence scores below threshold + # obj_conf = predictions[:, 4] + obj_conf = predictions[:, 6] + # predictions = predictions[obj_conf > conf_threshold] + # obj_conf = obj_conf[obj_conf > conf_threshold] + + # print(obj_conf[0]) + + # Multiply class confidence with bounding box confidence + # predictions[:, 5] *= obj_conf[:, np.newaxis] + # predictions[:, 6] *= obj_conf + + # Get the scores + # scores = np.max(predictions[:, 5:], axis=1) + scores = predictions[:, 6] + + # Filter out the objects with a low score + predictions = predictions[obj_conf > conf_threshold] + scores = scores[scores > conf_threshold] + + if len(scores) == 0: + return [] + + # Get the class with the highest confidence + # class_ids = np.argmax(predictions[:, 5:], axis=1) + class_ids = predictions[:, 5].astype(np.uint16) + + # Extract boxes from predictions + boxes = predictions[:, 1:5] + + # Scale boxes to original image dimensions + boxes = rescale_boxes(boxes, ori_shape, input_shape) + + # Convert boxes to xyxy format + # boxes = xywh2xyxy(boxes) + + # Apply non-maxima suppression to suppress weak, overlapping bounding boxes + indices = nms(boxes, scores, iou_threshold) + + dets = [] + for i in indices: + dets.append([*boxes[i], scores[i], class_ids[i]]) + + # return boxes[indices], scores[indices], class_ids[indices] + return np.array(dets) + + +def rescale_boxes(boxes, ori_shape, input_shape): + + input_height, input_width = input_shape + img_height, img_width = ori_shape + # Rescale boxes to original image dimensions + input_shape = np.array([input_width, input_height, input_width, input_height]) + boxes = np.divide(boxes, input_shape, dtype=np.float32) + boxes *= np.array([img_width, img_height, img_width, img_height]) + return boxes + +def nms(boxes, scores, iou_threshold): + # Sort by score + sorted_indices = np.argsort(scores)[::-1] + + keep_boxes = [] + while sorted_indices.size > 0: + # Pick the last box + box_id = sorted_indices[0] + keep_boxes.append(box_id) + + # Compute IoU of the picked box with the rest + ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) + + # Remove boxes with IoU over the threshold + keep_indices = np.where(ious < iou_threshold)[0] + + # print(keep_indices.shape, sorted_indices.shape) + sorted_indices = sorted_indices[keep_indices + 1] + + return keep_boxes + + +def compute_iou(box, boxes): + # Compute xmin, ymin, xmax, ymax for both boxes + xmin = np.maximum(box[0], boxes[:, 0]) + ymin = np.maximum(box[1], boxes[:, 1]) + xmax = np.minimum(box[2], boxes[:, 2]) + ymax = np.minimum(box[3], boxes[:, 3]) + + # Compute intersection area + intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) + + # Compute union area + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + union_area = box_area + boxes_area - intersection_area + + # Compute IoU + iou = intersection_area / union_area + + return iou + + +def xywh2xyxy(x): + # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 + y[..., 1] = x[..., 1] - x[..., 3] / 2 + y[..., 2] = x[..., 0] + x[..., 2] / 2 + y[..., 3] = x[..., 1] + x[..., 3] / 2 + return y + +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, + labels=()): + """Runs Non-Maximum Suppression (NMS) on inference results + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_det = 300 # maximum number of detections per image + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + l = labels[xi] + v = torch.zeros((len(l), nc + 5), device=x.device) + v[:, :4] = l[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + if nc == 1: + x[:, 5:] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5, + # so there is no need to multiplicate. + else: + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f'WARNING: NMS time limit {time_limit}s exceeded') + break # time limit exceeded + + return output + + + + + diff --git a/asone/detectors/yolov7/yolov7_detector.py b/asone/detectors/yolov7/yolov7_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..4e6dc0be492183d638029b3e88fbd8244fd63520 --- /dev/null +++ b/asone/detectors/yolov7/yolov7_detector.py @@ -0,0 +1,123 @@ +import os +import sys +import onnxruntime +import torch +from asone.utils import get_names +import numpy as np +import warnings +from asone.detectors.yolov7.yolov7.utils.yolov7_utils import (prepare_input, + process_output, + non_max_suppression) +from asone.detectors.yolov7.yolov7.models.experimental import attempt_load +from asone import utils + +sys.path.append(os.path.join(os.path.dirname(__file__), 'yolov7')) +class YOLOv7Detector: + def __init__(self, + weights=None, + use_onnx=False, + use_cuda=True): + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + #If incase weighst is a list of paths then select path at first index + + weights = str(weights[0] if isinstance(weights, list) else weights) + + if not os.path.exists(weights): + utils.download_weights(weights) + + + # Load Model + self.model = self.load_model(use_cuda, weights) + + def load_model(self, use_cuda, weights, fp16=False): + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = fp16 & ((not self.use_onnx or self.use_onnx) and self.device != 'cpu') + # Load onnx + if self.use_onnx: + if use_cuda: + providers = ['CUDAExecutionProvider','CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + + model = onnxruntime.InferenceSession(weights, providers=providers) + #Load Pytorch + else: + model = attempt_load(weights, map_location=self.device) + model.half() if self.fp16 else model.float() + return model + + + def detect(self, image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False) -> list: + + # Preprocess input image and also copying original image for later use + original_image = image.copy() + img_height, img_width = original_image.shape[:2] + processed_image = prepare_input(image, input_shape) + + # Perform Inference on the Image + if self.use_onnx: + # Run ONNX model + input_name = self.model.get_inputs()[0].name + prediction = self.model.run([self.model.get_outputs()[0].name], { + input_name: processed_image}) + # Run Pytorch model + else: + processed_image = torch.from_numpy(processed_image).to(self.device) + # Change image floating point precision if fp16 set to true + processed_image = processed_image.half() if self.fp16 else processed_image.float() + + with torch.no_grad(): + prediction = self.model(processed_image, augment=False)[0] + + detection = [] + # Postprocess prediction + if self.use_onnx: + detection = process_output(prediction, + original_image.shape[:2], + input_shape, + conf_thres, + iou_thres) + else: + detection = non_max_suppression(prediction, + conf_thres, + iou_thres, + agnostic=agnostic_nms)[0] + + detection = detection.detach().cpu().numpy() + # Rescaling Bounding Boxes + detection[:, :4] /= np.array([input_shape[1], input_shape[0], input_shape[1], input_shape[0]]) + detection[:, :4] *= np.array([img_width, img_height, img_width, img_height]) + + image_info = { + 'width': original_image.shape[1], + 'height': original_image.shape[0], + } + + if len(detection) > 0: + self.boxes = detection[:, :4] + self.scores = detection[:, 4:5] + self.class_ids = detection[:, 5:6] + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append(class_names.index(_class.lower())) + else: + warnings.warn(f"class {_class} not found in model classes list.") + + detection = detection[np.in1d(detection[:,5].astype(int), filter_class_idx)] + + return detection, image_info diff --git a/asone/detectors/yolov8/__init__.py b/asone/detectors/yolov8/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..faf6aec27583460112b867e7c971d32291d50f41 --- /dev/null +++ b/asone/detectors/yolov8/__init__.py @@ -0,0 +1,2 @@ +from .yolov8_detector import YOLOv8Detector +__all__ = ['YOLOv8Detector'] \ No newline at end of file diff --git a/asone/detectors/yolov8/utils/__init__.py b/asone/detectors/yolov8/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolov8/utils/yolov8_utils.py b/asone/detectors/yolov8/utils/yolov8_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..361fedbe01750ba476da7c6c97e378d51766b724 --- /dev/null +++ b/asone/detectors/yolov8/utils/yolov8_utils.py @@ -0,0 +1,50 @@ +import cv2 +import numpy as np +from ultralytics.yolo.utils import ops +import torch +from ultralytics.yolo.data.augment import LetterBox + +def prepare_input(image, input_shape, stride, pt): + input_tensor = LetterBox(input_shape, auto=pt, stride=stride)(image=image) + input_tensor = input_tensor.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + input_tensor = np.ascontiguousarray(input_tensor).astype(np.float32) # contiguous + input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 + input_tensor = input_tensor[None].astype(np.float32) + return input_tensor + + +def process_output(detections, + ori_shape, + input_shape, + conf_threshold, + iou_threshold, + classes=None, + agnostic=False, + max_det=300, + ): + detections = ops.non_max_suppression(detections, + conf_thres=conf_threshold, + iou_thres=iou_threshold, + classes=classes, + agnostic=agnostic, + max_det=max_det, + ) + + for i in range(len(detections)): + # Extract boxes from predictions + detections[i][:, :4] = ops.scale_boxes(input_shape, detections[i][:, :4], ori_shape).round() + + + return detections[0].cpu().numpy() + + +def rescale_boxes(boxes, ori_shape, input_shape): + + input_height, input_width = input_shape + img_height, img_width = ori_shape + # Rescale boxes to original image dimensions + input_shape = np.array( + [input_width, input_height, input_width, input_height]) + boxes = np.divide(boxes, input_shape, dtype=np.float32) + boxes *= np.array([img_width, img_height, img_width, img_height]) + return boxes diff --git a/asone/detectors/yolov8/yolov8_detector.py b/asone/detectors/yolov8/yolov8_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..e414b177a183171a39d4fbe885ab43cc92474772 --- /dev/null +++ b/asone/detectors/yolov8/yolov8_detector.py @@ -0,0 +1,114 @@ +import os +from asone import utils +from asone.utils import get_names +import onnxruntime +import torch +from .utils.yolov8_utils import prepare_input, process_output +import numpy as np +import warnings +from ultralytics.nn.autobackend import AutoBackend +from ultralytics.nn.tasks import DetectionModel, attempt_load_one_weight + + +class YOLOv8Detector: + def __init__(self, + weights=None, + use_onnx=False, + use_cuda=True): + + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + # If incase weighst is a list of paths then select path at first index + weights = str(weights[0] if isinstance(weights, list) else weights) + + if not os.path.exists(weights): + utils.download_weights(weights) + + # Load Model + self.model = self.load_model(use_cuda, weights) + + def load_model(self, use_cuda, weights, fp16=False): + + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = fp16 & ( + (not self.use_onnx or self.use_onnx) and self.device != 'cpu') + + # Load onnx + if self.use_onnx: + if use_cuda: + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + + model = onnxruntime.InferenceSession(weights, providers=providers) + # Load Pytorch + else: + model, ckpt = attempt_load_one_weight(weights) + model = AutoBackend(model, fp16=False, dnn=False).to(self.device) + model.half() if self.fp16 else model.float() + return model + + def detect(self, image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False + ) -> list: + + # Preprocess input image and also copying original image for later use + original_image = image.copy() + processed_image = prepare_input( + image, input_shape, 32, False if self.use_onnx else True) + + # Perform Inference on the Image + if self.use_onnx: + # Run ONNX model + input_name = self.model.get_inputs()[0].name + prediction = self.model.run([self.model.get_outputs()[0].name], { + input_name: processed_image})[0] + prediction = torch.from_numpy(prediction) + # Run Pytorch model + else: + processed_image = torch.from_numpy(processed_image).to(self.device) + # Change image floating point precision if fp16 set to true + processed_image = processed_image.half() if self.fp16 else processed_image.float() + + with torch.no_grad(): + prediction = self.model(processed_image, augment=False) + + detection = [] + # Postprocess prediction + detection = process_output(prediction, + original_image.shape[:2], + processed_image.shape[2:], + conf_thres, + iou_thres, + agnostic=agnostic_nms, + max_det=max_det) + + image_info = { + 'width': original_image.shape[1], + 'height': original_image.shape[0], + } + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append( + class_names.index(_class.lower())) + else: + warnings.warn( + f"class {_class} not found in model classes list.") + + detection = detection[np.in1d( + detection[:, 5].astype(int), filter_class_idx)] + + return detection, image_info diff --git a/asone/detectors/yolox/__init__.py b/asone/detectors/yolox/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e52a8e1fdf74323b366ab7992a075da2843d37a5 --- /dev/null +++ b/asone/detectors/yolox/__init__.py @@ -0,0 +1,2 @@ +from .yolox_detector import YOLOxDetector +__all__ = ['YOLOxDetector'] \ No newline at end of file diff --git a/asone/detectors/yolox/exps/__init__.py b/asone/detectors/yolox/exps/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ce9fae0677b11bdd96e516f4b0b8a3782daed1ec --- /dev/null +++ b/asone/detectors/yolox/exps/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. diff --git a/asone/detectors/yolox/exps/yolov3.py b/asone/detectors/yolox/exps/yolov3.py new file mode 100644 index 0000000000000000000000000000000000000000..67811d8755f6105141558c6137351f8c8dd17bd9 --- /dev/null +++ b/asone/detectors/yolox/exps/yolov3.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch.nn as nn + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.0 + self.width = 1.0 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from asone.detectors.yolox.yolox.models import YOLOX, YOLOFPN, YOLOXHead + backbone = YOLOFPN() + head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu") + self.model = YOLOX(backbone, head) + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + + return self.model diff --git a/asone/detectors/yolox/exps/yolox_l.py b/asone/detectors/yolox/exps/yolox_l.py new file mode 100644 index 0000000000000000000000000000000000000000..5e7459fe5bd8e0552fa3c4550611a9af5413edab --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_l.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.0 + self.width = 1.0 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/asone/detectors/yolox/exps/yolox_m.py b/asone/detectors/yolox/exps/yolox_m.py new file mode 100644 index 0000000000000000000000000000000000000000..d165217b78e9f78932fb209acf835b5faa78a6c6 --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_m.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.67 + self.width = 0.75 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/asone/detectors/yolox/exps/yolox_nano.py b/asone/detectors/yolox/exps/yolox_nano.py new file mode 100644 index 0000000000000000000000000000000000000000..0308d71675f08e0db2b0e45dd28e98b6a1cf24fb --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_nano.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch.nn as nn + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.random_size = (10, 20) + self.mosaic_scale = (0.5, 1.5) + self.test_size = (416, 416) + self.mosaic_prob = 0.5 + self.enable_mixup = False + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from asone.detectors.yolox.yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN( + self.depth, self.width, in_channels=in_channels, + act=self.act, depthwise=True, + ) + head = YOLOXHead( + self.num_classes, self.width, in_channels=in_channels, + act=self.act, depthwise=True + ) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model diff --git a/asone/detectors/yolox/exps/yolox_s.py b/asone/detectors/yolox/exps/yolox_s.py new file mode 100644 index 0000000000000000000000000000000000000000..6587d8b355b9d958931127c11030c65373285beb --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_s.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/asone/detectors/yolox/exps/yolox_tiny.py b/asone/detectors/yolox/exps/yolox_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..570b8be2e961ffe695dc9388d40fb727f15f63ae --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_tiny.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.375 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False diff --git a/asone/detectors/yolox/exps/yolox_x.py b/asone/detectors/yolox/exps/yolox_x.py new file mode 100644 index 0000000000000000000000000000000000000000..e1c719c6e9d48bb01b5f6d35515717779028a8d4 --- /dev/null +++ b/asone/detectors/yolox/exps/yolox_x.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from asone.detectors.yolox.yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/asone/detectors/yolox/yolox/__init__.py b/asone/detectors/yolox/yolox/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/detectors/yolox/yolox/exp/__init__.py b/asone/detectors/yolox/yolox/exp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..10eacd88a466060d36852c620fdbc9f3856ed19b --- /dev/null +++ b/asone/detectors/yolox/yolox/exp/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +from .base_exp import BaseExp +from .build import get_exp +from .yolox_base import Exp diff --git a/asone/detectors/yolox/yolox/exp/base_exp.py b/asone/detectors/yolox/yolox/exp/base_exp.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb39c063853700b44ec639cca74d46af9bdfd1f --- /dev/null +++ b/asone/detectors/yolox/yolox/exp/base_exp.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import ast +import pprint +from abc import ABCMeta, abstractmethod +from typing import Dict +from tabulate import tabulate +from asone.detectors.yolox.yolox.utils import LRScheduler + +import torch +from torch.nn import Module + + + + +class BaseExp(metaclass=ABCMeta): + """Basic class for any experiment.""" + + def __init__(self): + self.seed = None + self.output_dir = "./YOLOX_outputs" + self.print_interval = 100 + self.eval_interval = 10 + + @abstractmethod + def get_model(self) -> Module: + pass + + @abstractmethod + def get_data_loader( + self, batch_size: int, is_distributed: bool + ) -> Dict[str, torch.utils.data.DataLoader]: + pass + + @abstractmethod + def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: + pass + + @abstractmethod + def get_lr_scheduler( + self, lr: float, iters_per_epoch: int, **kwargs + ) -> LRScheduler: + pass + + @abstractmethod + def get_evaluator(self): + pass + + @abstractmethod + def eval(self, model, evaluator, weights): + pass + + def __repr__(self): + table_header = ["keys", "values"] + exp_table = [ + (str(k), pprint.pformat(v)) + for k, v in vars(self).items() + if not k.startswith("_") + ] + return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") + + def merge(self, cfg_list): + assert len(cfg_list) % 2 == 0 + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): + # only update value with same key + if hasattr(self, k): + src_value = getattr(self, k) + src_type = type(src_value) + if src_value is not None and src_type != type(v): + try: + v = src_type(v) + except Exception: + v = ast.literal_eval(v) + setattr(self, k, v) diff --git a/asone/detectors/yolox/yolox/exp/build.py b/asone/detectors/yolox/yolox/exp/build.py new file mode 100644 index 0000000000000000000000000000000000000000..b9af506964967e8ead6090970089bb12345646ce --- /dev/null +++ b/asone/detectors/yolox/yolox/exp/build.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import importlib +import os +import sys + + +def get_exp_by_file(exp_file): + try: + _dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + _dir = os.path.join(_dir, 'exps') + sys.path.append(_dir) + + current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0]) + exp = current_exp.Exp() + except Exception: + raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file)) + return exp + + +def get_exp_by_name(exp_name): + exp = exp_name.replace("-", "_") # convert string like "yolox-s" to "yolox_s" + module_name = ".".join(["yolox", "exp", "default", exp]) + exp_object = importlib.import_module(module_name).Exp() + return exp_object + + +def get_exp(exp_file=None, exp_name=None): + """ + get Exp object by file or name. If exp_file and exp_name + are both provided, get Exp by exp_file. + + Args: + exp_file (str): file path of experiment. + exp_name (str): name of experiment. "yolo-s", + """ + assert ( + exp_file is not None or exp_name is not None + ), "plz provide exp file or exp name." + if exp_file is not None: + return get_exp_by_file(exp_file) + else: + return get_exp_by_name(exp_name) diff --git a/asone/detectors/yolox/yolox/exp/default/__init__.py b/asone/detectors/yolox/yolox/exp/default/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..deb1546494308889f933c2951a8ad0a2bd1d7b11 --- /dev/null +++ b/asone/detectors/yolox/yolox/exp/default/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +# This file is used for package installation and find default exp file + +import importlib +import sys +from pathlib import Path + +_EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default" + +if _EXP_PATH.is_dir(): + # This is true only for in-place installation (pip install -e, setup.py develop), + # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 + + class _ExpFinder(importlib.abc.MetaPathFinder): + + def find_spec(self, name, path, target=None): + if not name.startswith("yolox.exp.default"): + return + project_name = name.split(".")[-1] + ".py" + target_file = _EXP_PATH / project_name + if not target_file.is_file(): + return + return importlib.util.spec_from_file_location(name, target_file) + + sys.meta_path.append(_ExpFinder()) diff --git a/asone/detectors/yolox/yolox/exp/yolox_base.py b/asone/detectors/yolox/yolox/exp/yolox_base.py new file mode 100644 index 0000000000000000000000000000000000000000..94723e6b4567f12f35108741e636eeffbcffcfc2 --- /dev/null +++ b/asone/detectors/yolox/yolox/exp/yolox_base.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import os +import random + +import torch +import torch.distributed as dist +import torch.nn as nn + +from .base_exp import BaseExp + + +class Exp(BaseExp): + def __init__(self): + super().__init__() + + # ---------------- model config ---------------- # + # detect classes number of model + self.num_classes = 80 + # factor of model depth + self.depth = 1.00 + # factor of model width + self.width = 1.00 + # activation name. For example, if using "relu", then "silu" will be replaced to "relu". + self.act = "silu" + + # ---------------- dataloader config ---------------- # + # set worker to 4 for shorter dataloader init time + # If your training process cost many memory, reduce this value. + self.data_num_workers = 4 + self.input_size = (640, 640) # (height, width) + # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32]. + # To disable multiscale training, set the value to 0. + self.multiscale_range = 5 + # You can uncomment this line to specify a multiscale range + # self.random_size = (14, 26) + # dir of dataset images, if data_dir is None, this project will use `datasets` dir + self.data_dir = None + # name of annotation file for training + self.train_ann = "instances_train2017.json" + # name of annotation file for evaluation + self.val_ann = "instances_val2017.json" + # name of annotation file for testing + self.test_ann = "instances_test2017.json" + + # --------------- transform config ----------------- # + # prob of applying mosaic aug + self.mosaic_prob = 1.0 + # prob of applying mixup aug + self.mixup_prob = 1.0 + # prob of applying hsv aug + self.hsv_prob = 1.0 + # prob of applying flip aug + self.flip_prob = 0.5 + # rotation angle range, for example, if set to 2, the true range is (-2, 2) + self.degrees = 10.0 + # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1) + self.translate = 0.1 + self.mosaic_scale = (0.1, 2) + # apply mixup aug or not + self.enable_mixup = True + self.mixup_scale = (0.5, 1.5) + # shear angle range, for example, if set to 2, the true range is (-2, 2) + self.shear = 2.0 + + # -------------- training config --------------------- # + # epoch number used for warmup + self.warmup_epochs = 5 + # max training epoch + self.max_epoch = 300 + # minimum learning rate during warmup + self.warmup_lr = 0 + self.min_lr_ratio = 0.05 + # learning rate for one image. During training, lr will multiply batchsize. + self.basic_lr_per_img = 0.01 / 64.0 + # name of LRScheduler + self.scheduler = "yoloxwarmcos" + # last #epoch to close augmention like mosaic + self.no_aug_epochs = 15 + # apply EMA during training + self.ema = True + + # weight decay of optimizer + self.weight_decay = 5e-4 + # momentum of optimizer + self.momentum = 0.9 + # log period in iter, for example, + # if set to 1, user could see log every iteration. + self.print_interval = 10 + # eval period in epoch, for example, + # if set to 1, model will be evaluate after every epoch. + self.eval_interval = 10 + # save history checkpoint or not. + # If set to False, yolox will only save latest and best ckpt. + self.save_history_ckpt = True + # name of experiment + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # ----------------- testing config ------------------ # + # output image size during evaluation/test + self.test_size = (640, 640) + # confidence threshold during evaluation/test, + # boxes whose scores are less than test_conf will be filtered + self.test_conf = 0.01 + # nms threshold + self.nmsthre = 0.65 + + def get_model(self): + from asone.detectors.yolox.yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + + if getattr(self, "model", None) is None: + in_channels = [256, 512, 1024] + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + self.model.train() + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + COCODataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import wait_for_the_master + + with wait_for_the_master(): + dataset = COCODataset( + data_dir=self.data_dir, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method. + # Check https://github.com/pytorch/pytorch/issues/63311 for more details. + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def random_resize(self, data_loader, epoch, rank, is_distributed): + tensor = torch.LongTensor(2).cuda() + + if rank == 0: + size_factor = self.input_size[1] * 1.0 / self.input_size[0] + if not hasattr(self, 'random_size'): + min_size = int(self.input_size[0] / 32) - self.multiscale_range + max_size = int(self.input_size[0] / 32) + self.multiscale_range + self.random_size = (min_size, max_size) + size = random.randint(*self.random_size) + size = (int(32 * size), 32 * int(size * size_factor)) + tensor[0] = size[0] + tensor[1] = size[1] + + if is_distributed: + dist.barrier() + dist.broadcast(tensor, 0) + + input_size = (tensor[0].item(), tensor[1].item()) + return input_size + + def preprocess(self, inputs, targets, tsize): + scale_y = tsize[0] / self.input_size[0] + scale_x = tsize[1] / self.input_size[1] + if scale_x != 1 or scale_y != 1: + inputs = nn.functional.interpolate( + inputs, size=tsize, mode="bilinear", align_corners=False + ) + targets[..., 1::2] = targets[..., 1::2] * scale_x + targets[..., 2::2] = targets[..., 2::2] * scale_y + return inputs, targets + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.SGD( + pg0, lr=lr, momentum=self.momentum, nesterov=True + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_lr_scheduler(self, lr, iters_per_epoch): + from yolox.utils import LRScheduler + + scheduler = LRScheduler( + self.scheduler, + lr, + iters_per_epoch, + self.max_epoch, + warmup_epochs=self.warmup_epochs, + warmup_lr_start=self.warmup_lr, + no_aug_epochs=self.no_aug_epochs, + min_lr_ratio=self.min_lr_ratio, + ) + return scheduler + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import COCODataset, ValTransform + + valdataset = COCODataset( + data_dir=self.data_dir, + json_file=self.val_ann if not testdev else self.test_ann, + name="val2017" if not testdev else "test2017", + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator + + def get_trainer(self, args): + from yolox.core import Trainer + trainer = Trainer(self, args) + # NOTE: trainer shouldn't be an attribute of exp object + return trainer + + def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False): + return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) diff --git a/asone/detectors/yolox/yolox/models/__init__.py b/asone/detectors/yolox/yolox/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c74fd3064ac588a7c223018aa31fd2d46f95d062 --- /dev/null +++ b/asone/detectors/yolox/yolox/models/__init__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +from .build import * +from .darknet import CSPDarknet, Darknet +from .losses import IOUloss +from .yolo_fpn import YOLOFPN +from .yolo_head import YOLOXHead +from .yolo_pafpn import YOLOPAFPN +from .yolox import YOLOX diff --git a/asone/detectors/yolox/yolox/models/build.py b/asone/detectors/yolox/yolox/models/build.py new file mode 100644 index 0000000000000000000000000000000000000000..8edc87de9d1dd46b7e693ad15bdbd9ac753bd225 --- /dev/null +++ b/asone/detectors/yolox/yolox/models/build.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch +from torch import nn +from torch.hub import load_state_dict_from_url + +__all__ = [ + "create_yolox_model", + "yolox_nano", + "yolox_tiny", + "yolox_s", + "yolox_m", + "yolox_l", + "yolox_x", + "yolov3", + "yolox_custom" +] + +_CKPT_ROOT_URL = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download" +_CKPT_FULL_PATH = { + "yolox-nano": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_nano.pth", + "yolox-tiny": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_tiny.pth", + "yolox-s": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_s.pth", + "yolox-m": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_m.pth", + "yolox-l": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_l.pth", + "yolox-x": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_x.pth", + "yolov3": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_darknet.pth", +} + + +def create_yolox_model(name: str, pretrained: bool = True, num_classes: int = 80, device=None, + exp_path: str = None, ckpt_path: str = None) -> nn.Module: + """creates and loads a YOLOX model + + Args: + name (str): name of model. for example, "yolox-s", "yolox-tiny" or "yolox_custom" + if you want to load your own model. + pretrained (bool): load pretrained weights into the model. Default to True. + device (str): default device to for model. Default to None. + num_classes (int): number of model classes. Default to 80. + exp_path (str): path to your own experiment file. Required if name="yolox_custom" + ckpt_path (str): path to your own ckpt. Required if name="yolox_custom" and you want to + load a pretrained model + + + Returns: + YOLOX model (nn.Module) + """ + from yolox.exp import get_exp, Exp + + if device is None: + device = "cuda:0" if torch.cuda.is_available() else "cpu" + device = torch.device(device) + + assert name in _CKPT_FULL_PATH or name == "yolox_custom", \ + f"user should use one of value in {_CKPT_FULL_PATH.keys()} or \"yolox_custom\"" + if name in _CKPT_FULL_PATH: + exp: Exp = get_exp(exp_name=name) + exp.num_classes = num_classes + yolox_model = exp.get_model() + if pretrained and num_classes == 80: + weights_url = _CKPT_FULL_PATH[name] + ckpt = load_state_dict_from_url(weights_url, map_location="cpu") + if "model" in ckpt: + ckpt = ckpt["model"] + yolox_model.load_state_dict(ckpt) + else: + assert exp_path is not None, "for a \"yolox_custom\" model exp_path must be provided" + exp: Exp = get_exp(exp_file=exp_path) + yolox_model = exp.get_model() + if ckpt_path: + ckpt = torch.load(ckpt_path, map_location="cpu") + if "model" in ckpt: + ckpt = ckpt["model"] + yolox_model.load_state_dict(ckpt) + + yolox_model.to(device) + return yolox_model + + +def yolox_nano(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-nano", pretrained, num_classes, device) + + +def yolox_tiny(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-tiny", pretrained, num_classes, device) + + +def yolox_s(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-s", pretrained, num_classes, device) + + +def yolox_m(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-m", pretrained, num_classes, device) + + +def yolox_l(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-l", pretrained, num_classes, device) + + +def yolox_x(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolox-x", pretrained, num_classes, device) + + +def yolov3(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module: + return create_yolox_model("yolov3", pretrained, num_classes, device) + + +def yolox_custom(ckpt_path: str = None, exp_path: str = None, device: str = None) -> nn.Module: + return create_yolox_model("yolox_custom", ckpt_path=ckpt_path, exp_path=exp_path, device=device) diff --git a/asone/detectors/yolox/yolox/models/darknet.py b/asone/detectors/yolox/yolox/models/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..b3e053f163ade7b69979bcec86532466ab67eedf --- /dev/null +++ b/asone/detectors/yolox/yolox/models/darknet.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +from torch import nn + +from .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck + + +class Darknet(nn.Module): + # number of blocks from dark2 to dark5. + depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]} + + def __init__( + self, + depth, + in_channels=3, + stem_out_channels=32, + out_features=("dark3", "dark4", "dark5"), + ): + """ + Args: + depth (int): depth of darknet used in model, usually use [21, 53] for this param. + in_channels (int): number of input channels, for example, use 3 for RGB image. + stem_out_channels (int): number of output channels of darknet stem. + It decides channels of darknet layer2 to layer5. + out_features (Tuple[str]): desired output layer name. + """ + super().__init__() + assert out_features, "please provide output features of Darknet" + self.out_features = out_features + self.stem = nn.Sequential( + BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act="lrelu"), + *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2), + ) + in_channels = stem_out_channels * 2 # 64 + + num_blocks = Darknet.depth2blocks[depth] + # create darknet with `stem_out_channels` and `num_blocks` layers. + # to make model structure more clear, we don't use `for` statement in python. + self.dark2 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[0], stride=2) + ) + in_channels *= 2 # 128 + self.dark3 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[1], stride=2) + ) + in_channels *= 2 # 256 + self.dark4 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[2], stride=2) + ) + in_channels *= 2 # 512 + + self.dark5 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[3], stride=2), + *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2), + ) + + def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1): + "starts with conv layer then has `num_blocks` `ResLayer`" + return [ + BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act="lrelu"), + *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)], + ] + + def make_spp_block(self, filters_list, in_filters): + m = nn.Sequential( + *[ + BaseConv(in_filters, filters_list[0], 1, stride=1, act="lrelu"), + BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), + SPPBottleneck( + in_channels=filters_list[1], + out_channels=filters_list[0], + activation="lrelu", + ), + BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), + BaseConv(filters_list[1], filters_list[0], 1, stride=1, act="lrelu"), + ] + ) + return m + + def forward(self, x): + outputs = {} + x = self.stem(x) + outputs["stem"] = x + x = self.dark2(x) + outputs["dark2"] = x + x = self.dark3(x) + outputs["dark3"] = x + x = self.dark4(x) + outputs["dark4"] = x + x = self.dark5(x) + outputs["dark5"] = x + return {k: v for k, v in outputs.items() if k in self.out_features} + + +class CSPDarknet(nn.Module): + def __init__( + self, + dep_mul, + wid_mul, + out_features=("dark3", "dark4", "dark5"), + depthwise=False, + act="silu", + ): + super().__init__() + assert out_features, "please provide output features of Darknet" + self.out_features = out_features + Conv = DWConv if depthwise else BaseConv + + base_channels = int(wid_mul * 64) # 64 + base_depth = max(round(dep_mul * 3), 1) # 3 + + # stem + self.stem = Focus(3, base_channels, ksize=3, act=act) + + # dark2 + self.dark2 = nn.Sequential( + Conv(base_channels, base_channels * 2, 3, 2, act=act), + CSPLayer( + base_channels * 2, + base_channels * 2, + n=base_depth, + depthwise=depthwise, + act=act, + ), + ) + + # dark3 + self.dark3 = nn.Sequential( + Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), + CSPLayer( + base_channels * 4, + base_channels * 4, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark4 + self.dark4 = nn.Sequential( + Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), + CSPLayer( + base_channels * 8, + base_channels * 8, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark5 + self.dark5 = nn.Sequential( + Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), + SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), + CSPLayer( + base_channels * 16, + base_channels * 16, + n=base_depth, + shortcut=False, + depthwise=depthwise, + act=act, + ), + ) + + def forward(self, x): + outputs = {} + x = self.stem(x) + outputs["stem"] = x + x = self.dark2(x) + outputs["dark2"] = x + x = self.dark3(x) + outputs["dark3"] = x + x = self.dark4(x) + outputs["dark4"] = x + x = self.dark5(x) + outputs["dark5"] = x + return {k: v for k, v in outputs.items() if k in self.out_features} diff --git a/asone/detectors/yolox/yolox/models/losses.py b/asone/detectors/yolox/yolox/models/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..77b4d8ef7660880031f4ef23c82ba3a85b6fd254 --- /dev/null +++ b/asone/detectors/yolox/yolox/models/losses.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + + +class IOUloss(nn.Module): + def __init__(self, reduction="none", loss_type="iou"): + super(IOUloss, self).__init__() + self.reduction = reduction + self.loss_type = loss_type + + def forward(self, pred, target): + assert pred.shape[0] == target.shape[0] + + pred = pred.view(-1, 4) + target = target.view(-1, 4) + tl = torch.max( + (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) + ) + br = torch.min( + (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) + ) + + area_p = torch.prod(pred[:, 2:], 1) + area_g = torch.prod(target[:, 2:], 1) + + en = (tl < br).type(tl.type()).prod(dim=1) + area_i = torch.prod(br - tl, 1) * en + area_u = area_p + area_g - area_i + iou = (area_i) / (area_u + 1e-16) + + if self.loss_type == "iou": + loss = 1 - iou ** 2 + elif self.loss_type == "giou": + c_tl = torch.min( + (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) + ) + c_br = torch.max( + (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) + ) + area_c = torch.prod(c_br - c_tl, 1) + giou = iou - (area_c - area_u) / area_c.clamp(1e-16) + loss = 1 - giou.clamp(min=-1.0, max=1.0) + + if self.reduction == "mean": + loss = loss.mean() + elif self.reduction == "sum": + loss = loss.sum() + + return loss diff --git a/asone/detectors/yolox/yolox/models/network_blocks.py b/asone/detectors/yolox/yolox/models/network_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..68aacfc33208eab072422e0647742006984dfdfd --- /dev/null +++ b/asone/detectors/yolox/yolox/models/network_blocks.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + + +class SiLU(nn.Module): + """export-friendly version of nn.SiLU()""" + + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +def get_activation(name="silu", inplace=True): + if name == "silu": + module = nn.SiLU(inplace=inplace) + elif name == "relu": + module = nn.ReLU(inplace=inplace) + elif name == "lrelu": + module = nn.LeakyReLU(0.1, inplace=inplace) + else: + raise AttributeError("Unsupported act type: {}".format(name)) + return module + + +class BaseConv(nn.Module): + """A Conv2d -> Batchnorm -> silu/leaky relu block""" + + def __init__( + self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu" + ): + super().__init__() + # same padding + pad = (ksize - 1) // 2 + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=ksize, + stride=stride, + padding=pad, + groups=groups, + bias=bias, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.act = get_activation(act, inplace=True) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class DWConv(nn.Module): + """Depthwise Conv + Conv""" + + def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"): + super().__init__() + self.dconv = BaseConv( + in_channels, + in_channels, + ksize=ksize, + stride=stride, + groups=in_channels, + act=act, + ) + self.pconv = BaseConv( + in_channels, out_channels, ksize=1, stride=1, groups=1, act=act + ) + + def forward(self, x): + x = self.dconv(x) + return self.pconv(x) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__( + self, + in_channels, + out_channels, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + super().__init__() + hidden_channels = int(out_channels * expansion) + Conv = DWConv if depthwise else BaseConv + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) + self.use_add = shortcut and in_channels == out_channels + + def forward(self, x): + y = self.conv2(self.conv1(x)) + if self.use_add: + y = y + x + return y + + +class ResLayer(nn.Module): + "Residual layer with `in_channels` inputs." + + def __init__(self, in_channels: int): + super().__init__() + mid_channels = in_channels // 2 + self.layer1 = BaseConv( + in_channels, mid_channels, ksize=1, stride=1, act="lrelu" + ) + self.layer2 = BaseConv( + mid_channels, in_channels, ksize=3, stride=1, act="lrelu" + ) + + def forward(self, x): + out = self.layer2(self.layer1(x)) + return x + out + + +class SPPBottleneck(nn.Module): + """Spatial pyramid pooling layer used in YOLOv3-SPP""" + + def __init__( + self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu" + ): + super().__init__() + hidden_channels = in_channels // 2 + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) + self.m = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) + for ks in kernel_sizes + ] + ) + conv2_channels = hidden_channels * (len(kernel_sizes) + 1) + self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation) + + def forward(self, x): + x = self.conv1(x) + x = torch.cat([x] + [m(x) for m in self.m], dim=1) + x = self.conv2(x) + return x + + +class CSPLayer(nn.Module): + """C3 in yolov5, CSP Bottleneck with 3 convolutions""" + + def __init__( + self, + in_channels, + out_channels, + n=1, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + """ + Args: + in_channels (int): input channels. + out_channels (int): output channels. + n (int): number of Bottlenecks. Default value: 1. + """ + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + hidden_channels = int(out_channels * expansion) # hidden channels + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act) + module_list = [ + Bottleneck( + hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act + ) + for _ in range(n) + ] + self.m = nn.Sequential(*module_list) + + def forward(self, x): + x_1 = self.conv1(x) + x_2 = self.conv2(x) + x_1 = self.m(x_1) + x = torch.cat((x_1, x_2), dim=1) + return self.conv3(x) + + +class Focus(nn.Module): + """Focus width and height information into channel space.""" + + def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"): + super().__init__() + self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act) + + def forward(self, x): + # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2) + patch_top_left = x[..., ::2, ::2] + patch_top_right = x[..., ::2, 1::2] + patch_bot_left = x[..., 1::2, ::2] + patch_bot_right = x[..., 1::2, 1::2] + x = torch.cat( + ( + patch_top_left, + patch_bot_left, + patch_top_right, + patch_bot_right, + ), + dim=1, + ) + return self.conv(x) diff --git a/asone/detectors/yolox/yolox/models/yolo_fpn.py b/asone/detectors/yolox/yolox/models/yolo_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..224271f59fd55b1e8e4bf3321d746a85bfe0b09c --- /dev/null +++ b/asone/detectors/yolox/yolox/models/yolo_fpn.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + +from .darknet import Darknet +from .network_blocks import BaseConv + + +class YOLOFPN(nn.Module): + """ + YOLOFPN module. Darknet 53 is the default backbone of this model. + """ + + def __init__( + self, + depth=53, + in_features=["dark3", "dark4", "dark5"], + ): + super().__init__() + + self.backbone = Darknet(depth) + self.in_features = in_features + + # out 1 + self.out1_cbl = self._make_cbl(512, 256, 1) + self.out1 = self._make_embedding([256, 512], 512 + 256) + + # out 2 + self.out2_cbl = self._make_cbl(256, 128, 1) + self.out2 = self._make_embedding([128, 256], 256 + 128) + + # upsample + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + + def _make_cbl(self, _in, _out, ks): + return BaseConv(_in, _out, ks, stride=1, act="lrelu") + + def _make_embedding(self, filters_list, in_filters): + m = nn.Sequential( + *[ + self._make_cbl(in_filters, filters_list[0], 1), + self._make_cbl(filters_list[0], filters_list[1], 3), + self._make_cbl(filters_list[1], filters_list[0], 1), + self._make_cbl(filters_list[0], filters_list[1], 3), + self._make_cbl(filters_list[1], filters_list[0], 1), + ] + ) + return m + + def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"): + with open(filename, "rb") as f: + state_dict = torch.load(f, map_location="cpu") + print("loading pretrained weights...") + self.backbone.load_state_dict(state_dict) + + def forward(self, inputs): + """ + Args: + inputs (Tensor): input image. + + Returns: + Tuple[Tensor]: FPN output features.. + """ + # backbone + out_features = self.backbone(inputs) + x2, x1, x0 = [out_features[f] for f in self.in_features] + + # yolo branch 1 + x1_in = self.out1_cbl(x0) + x1_in = self.upsample(x1_in) + x1_in = torch.cat([x1_in, x1], 1) + out_dark4 = self.out1(x1_in) + + # yolo branch 2 + x2_in = self.out2_cbl(out_dark4) + x2_in = self.upsample(x2_in) + x2_in = torch.cat([x2_in, x2], 1) + out_dark3 = self.out2(x2_in) + + outputs = (out_dark3, out_dark4, x0) + return outputs diff --git a/asone/detectors/yolox/yolox/models/yolo_head.py b/asone/detectors/yolox/yolox/models/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..96ff5995f38dd256011fd81dc1526730361cc5e8 --- /dev/null +++ b/asone/detectors/yolox/yolox/models/yolo_head.py @@ -0,0 +1,645 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import math +from loguru import logger + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from asone.detectors.yolox.yolox.utils import bboxes_iou, meshgrid + +from .losses import IOUloss +from .network_blocks import BaseConv, DWConv + + +class YOLOXHead(nn.Module): + def __init__( + self, + num_classes, + width=1.0, + strides=[8, 16, 32], + in_channels=[256, 512, 1024], + act="silu", + depthwise=False, + ): + """ + Args: + act (str): activation type of conv. Defalut value: "silu". + depthwise (bool): whether apply depthwise conv in conv branch. Defalut value: False. + """ + super().__init__() + + self.n_anchors = 1 + self.num_classes = num_classes + self.decode_in_inference = True # for deploy, set to False + + self.cls_convs = nn.ModuleList() + self.reg_convs = nn.ModuleList() + self.cls_preds = nn.ModuleList() + self.reg_preds = nn.ModuleList() + self.obj_preds = nn.ModuleList() + self.stems = nn.ModuleList() + Conv = DWConv if depthwise else BaseConv + + for i in range(len(in_channels)): + self.stems.append( + BaseConv( + in_channels=int(in_channels[i] * width), + out_channels=int(256 * width), + ksize=1, + stride=1, + act=act, + ) + ) + self.cls_convs.append( + nn.Sequential( + *[ + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + ] + ) + ) + self.reg_convs.append( + nn.Sequential( + *[ + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + ] + ) + ) + self.cls_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=self.n_anchors * self.num_classes, + kernel_size=1, + stride=1, + padding=0, + ) + ) + self.reg_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=4, + kernel_size=1, + stride=1, + padding=0, + ) + ) + self.obj_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=self.n_anchors * 1, + kernel_size=1, + stride=1, + padding=0, + ) + ) + + self.use_l1 = False + self.l1_loss = nn.L1Loss(reduction="none") + self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") + self.iou_loss = IOUloss(reduction="none") + self.strides = strides + self.grids = [torch.zeros(1)] * len(in_channels) + + def initialize_biases(self, prior_prob): + for conv in self.cls_preds: + b = conv.bias.view(self.n_anchors, -1) + b.data.fill_(-math.log((1 - prior_prob) / prior_prob)) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + for conv in self.obj_preds: + b = conv.bias.view(self.n_anchors, -1) + b.data.fill_(-math.log((1 - prior_prob) / prior_prob)) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + def forward(self, xin, labels=None, imgs=None): + outputs = [] + origin_preds = [] + x_shifts = [] + y_shifts = [] + expanded_strides = [] + + for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( + zip(self.cls_convs, self.reg_convs, self.strides, xin) + ): + x = self.stems[k](x) + cls_x = x + reg_x = x + + cls_feat = cls_conv(cls_x) + cls_output = self.cls_preds[k](cls_feat) + + reg_feat = reg_conv(reg_x) + reg_output = self.reg_preds[k](reg_feat) + obj_output = self.obj_preds[k](reg_feat) + + if self.training: + output = torch.cat([reg_output, obj_output, cls_output], 1) + output, grid = self.get_output_and_grid( + output, k, stride_this_level, xin[0].type() + ) + x_shifts.append(grid[:, :, 0]) + y_shifts.append(grid[:, :, 1]) + expanded_strides.append( + torch.zeros(1, grid.shape[1]) + .fill_(stride_this_level) + .type_as(xin[0]) + ) + if self.use_l1: + batch_size = reg_output.shape[0] + hsize, wsize = reg_output.shape[-2:] + reg_output = reg_output.view( + batch_size, self.n_anchors, 4, hsize, wsize + ) + reg_output = reg_output.permute(0, 1, 3, 4, 2).reshape( + batch_size, -1, 4 + ) + origin_preds.append(reg_output.clone()) + + else: + output = torch.cat( + [reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1 + ) + + outputs.append(output) + + if self.training: + return self.get_losses( + imgs, + x_shifts, + y_shifts, + expanded_strides, + labels, + torch.cat(outputs, 1), + origin_preds, + dtype=xin[0].dtype, + ) + else: + self.hw = [x.shape[-2:] for x in outputs] + # [batch, n_anchors_all, 85] + outputs = torch.cat( + [x.flatten(start_dim=2) for x in outputs], dim=2 + ).permute(0, 2, 1) + if self.decode_in_inference: + return self.decode_outputs(outputs, dtype=xin[0].type()) + else: + return outputs + + def get_output_and_grid(self, output, k, stride, dtype): + grid = self.grids[k] + + batch_size = output.shape[0] + n_ch = 5 + self.num_classes + hsize, wsize = output.shape[-2:] + if grid.shape[2:4] != output.shape[2:4]: + yv, xv = meshgrid([torch.arange(hsize), torch.arange(wsize)]) + grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype) + self.grids[k] = grid + + output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize) + output = output.permute(0, 1, 3, 4, 2).reshape( + batch_size, self.n_anchors * hsize * wsize, -1 + ) + grid = grid.view(1, -1, 2) + output[..., :2] = (output[..., :2] + grid) * stride + output[..., 2:4] = torch.exp(output[..., 2:4]) * stride + return output, grid + + def decode_outputs(self, outputs, dtype): + grids = [] + strides = [] + for (hsize, wsize), stride in zip(self.hw, self.strides): + yv, xv = meshgrid([torch.arange(hsize), torch.arange(wsize)]) + grid = torch.stack((xv, yv), 2).view(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + strides.append(torch.full((*shape, 1), stride)) + + grids = torch.cat(grids, dim=1).type(dtype) + strides = torch.cat(strides, dim=1).type(dtype) + + outputs[..., :2] = (outputs[..., :2] + grids) * strides + outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides + return outputs + + def get_losses( + self, + imgs, + x_shifts, + y_shifts, + expanded_strides, + labels, + outputs, + origin_preds, + dtype, + ): + bbox_preds = outputs[:, :, :4] # [batch, n_anchors_all, 4] + obj_preds = outputs[:, :, 4].unsqueeze(-1) # [batch, n_anchors_all, 1] + cls_preds = outputs[:, :, 5:] # [batch, n_anchors_all, n_cls] + + # calculate targets + nlabel = (labels.sum(dim=2) > 0).sum(dim=1) # number of objects + + total_num_anchors = outputs.shape[1] + x_shifts = torch.cat(x_shifts, 1) # [1, n_anchors_all] + y_shifts = torch.cat(y_shifts, 1) # [1, n_anchors_all] + expanded_strides = torch.cat(expanded_strides, 1) + if self.use_l1: + origin_preds = torch.cat(origin_preds, 1) + + cls_targets = [] + reg_targets = [] + l1_targets = [] + obj_targets = [] + fg_masks = [] + + num_fg = 0.0 + num_gts = 0.0 + + for batch_idx in range(outputs.shape[0]): + num_gt = int(nlabel[batch_idx]) + num_gts += num_gt + if num_gt == 0: + cls_target = outputs.new_zeros((0, self.num_classes)) + reg_target = outputs.new_zeros((0, 4)) + l1_target = outputs.new_zeros((0, 4)) + obj_target = outputs.new_zeros((total_num_anchors, 1)) + fg_mask = outputs.new_zeros(total_num_anchors).bool() + else: + gt_bboxes_per_image = labels[batch_idx, :num_gt, 1:5] + gt_classes = labels[batch_idx, :num_gt, 0] + bboxes_preds_per_image = bbox_preds[batch_idx] + + try: + ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg_img, + ) = self.get_assignments( # noqa + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + ) + except RuntimeError as e: + # TODO: the string might change, consider a better way + if "CUDA out of memory. " not in str(e): + raise # RuntimeError might not caused by CUDA OOM + + logger.error( + "OOM RuntimeError is raised due to the huge memory cost during label assignment. \ + CPU mode is applied in this batch. If you want to avoid this issue, \ + try to reduce the batch size or image size." + ) + torch.cuda.empty_cache() + ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg_img, + ) = self.get_assignments( # noqa + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + "cpu", + ) + + torch.cuda.empty_cache() + num_fg += num_fg_img + + cls_target = F.one_hot( + gt_matched_classes.to(torch.int64), self.num_classes + ) * pred_ious_this_matching.unsqueeze(-1) + obj_target = fg_mask.unsqueeze(-1) + reg_target = gt_bboxes_per_image[matched_gt_inds] + if self.use_l1: + l1_target = self.get_l1_target( + outputs.new_zeros((num_fg_img, 4)), + gt_bboxes_per_image[matched_gt_inds], + expanded_strides[0][fg_mask], + x_shifts=x_shifts[0][fg_mask], + y_shifts=y_shifts[0][fg_mask], + ) + + cls_targets.append(cls_target) + reg_targets.append(reg_target) + obj_targets.append(obj_target.to(dtype)) + fg_masks.append(fg_mask) + if self.use_l1: + l1_targets.append(l1_target) + + cls_targets = torch.cat(cls_targets, 0) + reg_targets = torch.cat(reg_targets, 0) + obj_targets = torch.cat(obj_targets, 0) + fg_masks = torch.cat(fg_masks, 0) + if self.use_l1: + l1_targets = torch.cat(l1_targets, 0) + + num_fg = max(num_fg, 1) + loss_iou = ( + self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets) + ).sum() / num_fg + loss_obj = ( + self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets) + ).sum() / num_fg + loss_cls = ( + self.bcewithlog_loss( + cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets + ) + ).sum() / num_fg + if self.use_l1: + loss_l1 = ( + self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets) + ).sum() / num_fg + else: + loss_l1 = 0.0 + + reg_weight = 5.0 + loss = reg_weight * loss_iou + loss_obj + loss_cls + loss_l1 + + return ( + loss, + reg_weight * loss_iou, + loss_obj, + loss_cls, + loss_l1, + num_fg / max(num_gts, 1), + ) + + def get_l1_target(self, l1_target, gt, stride, x_shifts, y_shifts, eps=1e-8): + l1_target[:, 0] = gt[:, 0] / stride - x_shifts + l1_target[:, 1] = gt[:, 1] / stride - y_shifts + l1_target[:, 2] = torch.log(gt[:, 2] / stride + eps) + l1_target[:, 3] = torch.log(gt[:, 3] / stride + eps) + return l1_target + + @torch.no_grad() + def get_assignments( + self, + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + mode="gpu", + ): + + if mode == "cpu": + print("------------CPU Mode for This Batch-------------") + gt_bboxes_per_image = gt_bboxes_per_image.cpu().float() + bboxes_preds_per_image = bboxes_preds_per_image.cpu().float() + gt_classes = gt_classes.cpu().float() + expanded_strides = expanded_strides.cpu().float() + x_shifts = x_shifts.cpu() + y_shifts = y_shifts.cpu() + + fg_mask, is_in_boxes_and_center = self.get_in_boxes_info( + gt_bboxes_per_image, + expanded_strides, + x_shifts, + y_shifts, + total_num_anchors, + num_gt, + ) + + bboxes_preds_per_image = bboxes_preds_per_image[fg_mask] + cls_preds_ = cls_preds[batch_idx][fg_mask] + obj_preds_ = obj_preds[batch_idx][fg_mask] + num_in_boxes_anchor = bboxes_preds_per_image.shape[0] + + if mode == "cpu": + gt_bboxes_per_image = gt_bboxes_per_image.cpu() + bboxes_preds_per_image = bboxes_preds_per_image.cpu() + + pair_wise_ious = bboxes_iou(gt_bboxes_per_image, bboxes_preds_per_image, False) + + gt_cls_per_image = ( + F.one_hot(gt_classes.to(torch.int64), self.num_classes) + .float() + .unsqueeze(1) + .repeat(1, num_in_boxes_anchor, 1) + ) + pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) + + if mode == "cpu": + cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu() + + with torch.cuda.amp.autocast(enabled=False): + cls_preds_ = ( + cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() + * obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() + ) + pair_wise_cls_loss = F.binary_cross_entropy( + cls_preds_.sqrt_(), gt_cls_per_image, reduction="none" + ).sum(-1) + del cls_preds_ + + cost = ( + pair_wise_cls_loss + + 3.0 * pair_wise_ious_loss + + 100000.0 * (~is_in_boxes_and_center) + ) + + ( + num_fg, + gt_matched_classes, + pred_ious_this_matching, + matched_gt_inds, + ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask) + del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss + + if mode == "cpu": + gt_matched_classes = gt_matched_classes.cuda() + fg_mask = fg_mask.cuda() + pred_ious_this_matching = pred_ious_this_matching.cuda() + matched_gt_inds = matched_gt_inds.cuda() + + return ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg, + ) + + def get_in_boxes_info( + self, + gt_bboxes_per_image, + expanded_strides, + x_shifts, + y_shifts, + total_num_anchors, + num_gt, + ): + expanded_strides_per_image = expanded_strides[0] + x_shifts_per_image = x_shifts[0] * expanded_strides_per_image + y_shifts_per_image = y_shifts[0] * expanded_strides_per_image + x_centers_per_image = ( + (x_shifts_per_image + 0.5 * expanded_strides_per_image) + .unsqueeze(0) + .repeat(num_gt, 1) + ) # [n_anchor] -> [n_gt, n_anchor] + y_centers_per_image = ( + (y_shifts_per_image + 0.5 * expanded_strides_per_image) + .unsqueeze(0) + .repeat(num_gt, 1) + ) + + gt_bboxes_per_image_l = ( + (gt_bboxes_per_image[:, 0] - 0.5 * gt_bboxes_per_image[:, 2]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_r = ( + (gt_bboxes_per_image[:, 0] + 0.5 * gt_bboxes_per_image[:, 2]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_t = ( + (gt_bboxes_per_image[:, 1] - 0.5 * gt_bboxes_per_image[:, 3]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_b = ( + (gt_bboxes_per_image[:, 1] + 0.5 * gt_bboxes_per_image[:, 3]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + + b_l = x_centers_per_image - gt_bboxes_per_image_l + b_r = gt_bboxes_per_image_r - x_centers_per_image + b_t = y_centers_per_image - gt_bboxes_per_image_t + b_b = gt_bboxes_per_image_b - y_centers_per_image + bbox_deltas = torch.stack([b_l, b_t, b_r, b_b], 2) + + is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0 + is_in_boxes_all = is_in_boxes.sum(dim=0) > 0 + # in fixed center + + center_radius = 2.5 + + gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat( + 1, total_num_anchors + ) - center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat( + 1, total_num_anchors + ) + center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat( + 1, total_num_anchors + ) - center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat( + 1, total_num_anchors + ) + center_radius * expanded_strides_per_image.unsqueeze(0) + + c_l = x_centers_per_image - gt_bboxes_per_image_l + c_r = gt_bboxes_per_image_r - x_centers_per_image + c_t = y_centers_per_image - gt_bboxes_per_image_t + c_b = gt_bboxes_per_image_b - y_centers_per_image + center_deltas = torch.stack([c_l, c_t, c_r, c_b], 2) + is_in_centers = center_deltas.min(dim=-1).values > 0.0 + is_in_centers_all = is_in_centers.sum(dim=0) > 0 + + # in boxes and in centers + is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all + + is_in_boxes_and_center = ( + is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor] + ) + return is_in_boxes_anchor, is_in_boxes_and_center + + def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): + # Dynamic K + # --------------------------------------------------------------- + matching_matrix = torch.zeros_like(cost, dtype=torch.uint8) + + ious_in_boxes_matrix = pair_wise_ious + n_candidate_k = min(10, ious_in_boxes_matrix.size(1)) + topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1) + dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1) + dynamic_ks = dynamic_ks.tolist() + for gt_idx in range(num_gt): + _, pos_idx = torch.topk( + cost[gt_idx], k=dynamic_ks[gt_idx], largest=False + ) + matching_matrix[gt_idx][pos_idx] = 1 + + del topk_ious, dynamic_ks, pos_idx + + anchor_matching_gt = matching_matrix.sum(0) + if (anchor_matching_gt > 1).sum() > 0: + _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0) + matching_matrix[:, anchor_matching_gt > 1] *= 0 + matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1 + fg_mask_inboxes = matching_matrix.sum(0) > 0 + num_fg = fg_mask_inboxes.sum().item() + + fg_mask[fg_mask.clone()] = fg_mask_inboxes + + matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0) + gt_matched_classes = gt_classes[matched_gt_inds] + + pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[ + fg_mask_inboxes + ] + return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds diff --git a/asone/detectors/yolox/yolox/models/yolo_pafpn.py b/asone/detectors/yolox/yolox/models/yolo_pafpn.py new file mode 100644 index 0000000000000000000000000000000000000000..4c4e18a5c3273ecdd878444cc42965e6a24a0cd1 --- /dev/null +++ b/asone/detectors/yolox/yolox/models/yolo_pafpn.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + +from .darknet import CSPDarknet +from .network_blocks import BaseConv, CSPLayer, DWConv + + +class YOLOPAFPN(nn.Module): + """ + YOLOv3 model. Darknet 53 is the default backbone of this model. + """ + + def __init__( + self, + depth=1.0, + width=1.0, + in_features=("dark3", "dark4", "dark5"), + in_channels=[256, 512, 1024], + depthwise=False, + act="silu", + ): + super().__init__() + self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act) + self.in_features = in_features + self.in_channels = in_channels + Conv = DWConv if depthwise else BaseConv + + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + self.lateral_conv0 = BaseConv( + int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act + ) + self.C3_p4 = CSPLayer( + int(2 * in_channels[1] * width), + int(in_channels[1] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) # cat + + self.reduce_conv1 = BaseConv( + int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act + ) + self.C3_p3 = CSPLayer( + int(2 * in_channels[0] * width), + int(in_channels[0] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + # bottom-up conv + self.bu_conv2 = Conv( + int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act + ) + self.C3_n3 = CSPLayer( + int(2 * in_channels[0] * width), + int(in_channels[1] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + # bottom-up conv + self.bu_conv1 = Conv( + int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act + ) + self.C3_n4 = CSPLayer( + int(2 * in_channels[1] * width), + int(in_channels[2] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + def forward(self, input): + """ + Args: + inputs: input images. + + Returns: + Tuple[Tensor]: FPN feature. + """ + + # backbone + out_features = self.backbone(input) + features = [out_features[f] for f in self.in_features] + [x2, x1, x0] = features + + fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 + f_out0 = self.upsample(fpn_out0) # 512/16 + f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16 + f_out0 = self.C3_p4(f_out0) # 1024->512/16 + + fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 + f_out1 = self.upsample(fpn_out1) # 256/8 + f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8 + pan_out2 = self.C3_p3(f_out1) # 512->256/8 + + p_out1 = self.bu_conv2(pan_out2) # 256->256/16 + p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16 + pan_out1 = self.C3_n3(p_out1) # 512->512/16 + + p_out0 = self.bu_conv1(pan_out1) # 512->512/32 + p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32 + pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 + + outputs = (pan_out2, pan_out1, pan_out0) + return outputs diff --git a/asone/detectors/yolox/yolox/models/yolox.py b/asone/detectors/yolox/yolox/models/yolox.py new file mode 100644 index 0000000000000000000000000000000000000000..8b8ab0481e422aac42158b2070d8de37c9fd56bd --- /dev/null +++ b/asone/detectors/yolox/yolox/models/yolox.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import torch.nn as nn + +from .yolo_head import YOLOXHead +from .yolo_pafpn import YOLOPAFPN + + +class YOLOX(nn.Module): + """ + YOLOX model module. The module list is defined by create_yolov3_modules function. + The network returns loss values from three YOLO layers during training + and detection results during test. + """ + + def __init__(self, backbone=None, head=None): + super().__init__() + if backbone is None: + backbone = YOLOPAFPN() + if head is None: + head = YOLOXHead(80) + + self.backbone = backbone + self.head = head + + def forward(self, x, targets=None): + # fpn output content features of [dark3, dark4, dark5] + fpn_outs = self.backbone(x) + + if self.training: + assert targets is not None + loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head( + fpn_outs, targets, x + ) + outputs = { + "total_loss": loss, + "iou_loss": iou_loss, + "l1_loss": l1_loss, + "conf_loss": conf_loss, + "cls_loss": cls_loss, + "num_fg": num_fg, + } + else: + outputs = self.head(fpn_outs) + + return outputs diff --git a/asone/detectors/yolox/yolox/utils/__init__.py b/asone/detectors/yolox/yolox/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15426396e53575ed7038f792c50dd254984d060f --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +from .allreduce_norm import * +from .boxes import * +from .checkpoint import load_ckpt, save_checkpoint +from .compat import meshgrid +from .demo_utils import * +from .dist import * +from .ema import * +from .logger import WandbLogger, setup_logger +from .lr_scheduler import LRScheduler +from .metric import * +from .model_utils import * +from .setup_env import * +from .visualize import * diff --git a/asone/detectors/yolox/yolox/utils/allreduce_norm.py b/asone/detectors/yolox/yolox/utils/allreduce_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..142c76c78061db6e2c5f4b899bcc5e2f2214f010 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/allreduce_norm.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import pickle +from collections import OrderedDict + +import torch +from torch import distributed as dist +from torch import nn + +from .dist import _get_global_gloo_group, get_world_size + +ASYNC_NORM = ( + nn.BatchNorm1d, + nn.BatchNorm2d, + nn.BatchNorm3d, + nn.InstanceNorm1d, + nn.InstanceNorm2d, + nn.InstanceNorm3d, +) + +__all__ = [ + "get_async_norm_states", + "pyobj2tensor", + "tensor2pyobj", + "all_reduce", + "all_reduce_norm", +] + + +def get_async_norm_states(module): + async_norm_states = OrderedDict() + for name, child in module.named_modules(): + if isinstance(child, ASYNC_NORM): + for k, v in child.state_dict().items(): + async_norm_states[".".join([name, k])] = v + return async_norm_states + + +def pyobj2tensor(pyobj, device="cuda"): + """serialize picklable python object to tensor""" + storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj)) + return torch.ByteTensor(storage).to(device=device) + + +def tensor2pyobj(tensor): + """deserialize tensor to picklable python object""" + return pickle.loads(tensor.cpu().numpy().tobytes()) + + +def _get_reduce_op(op_name): + return { + "sum": dist.ReduceOp.SUM, + "mean": dist.ReduceOp.SUM, + }[op_name.lower()] + + +def all_reduce(py_dict, op="sum", group=None): + """ + Apply all reduce function for python dict object. + NOTE: make sure that every py_dict has the same keys and values are in the same shape. + + Args: + py_dict (dict): dict to apply all reduce op. + op (str): operator, could be "sum" or "mean". + """ + world_size = get_world_size() + if world_size == 1: + return py_dict + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return py_dict + + # all reduce logic across different devices. + py_key = list(py_dict.keys()) + py_key_tensor = pyobj2tensor(py_key) + dist.broadcast(py_key_tensor, src=0) + py_key = tensor2pyobj(py_key_tensor) + + tensor_shapes = [py_dict[k].shape for k in py_key] + tensor_numels = [py_dict[k].numel() for k in py_key] + + flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key]) + dist.all_reduce(flatten_tensor, op=_get_reduce_op(op)) + if op == "mean": + flatten_tensor /= world_size + + split_tensors = [ + x.reshape(shape) + for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) + ] + return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) + + +def all_reduce_norm(module): + """ + All reduce norm statistics in different devices. + """ + states = get_async_norm_states(module) + states = all_reduce(states, op="mean") + module.load_state_dict(states, strict=False) diff --git a/asone/detectors/yolox/yolox/utils/boxes.py b/asone/detectors/yolox/yolox/utils/boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..d2377e6cd9a920ffb38117394d2cdc6fa11b313e --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/boxes.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import numpy as np + +import torch +import torchvision + +__all__ = [ + "filter_box", + "postprocess", + "bboxes_iou", + "matrix_iou", + "adjust_box_anns", + "xyxy2xywh", + "xyxy2cxcywh", +] + + +def filter_box(output, scale_range): + """ + output: (N, 5+class) shape + """ + min_scale, max_scale = scale_range + w = output[:, 2] - output[:, 0] + h = output[:, 3] - output[:, 1] + keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale) + return output[keep] + + +def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [None for _ in range(len(prediction))] + for i, image_pred in enumerate(prediction): + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get score and class with highest confidence + class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + detections = detections[conf_mask] + # print(detections.shape) + # exit() + if not detections.size(0): + continue + if class_agnostic: + nms_out_index = torchvision.ops.nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + nms_thre, + ) + else: + nms_out_index = torchvision.ops.batched_nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + detections[:, 6], + nms_thre, + ) + + detections = detections[nms_out_index] + if output[i] is None: + output[i] = detections + else: + output[i] = torch.cat((output[i], detections)) + + return output + + +def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): + if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: + raise IndexError + + if xyxy: + tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + else: + tl = torch.max( + (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), + ) + br = torch.min( + (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), + ) + + area_a = torch.prod(bboxes_a[:, 2:], 1) + area_b = torch.prod(bboxes_b[:, 2:], 1) + en = (tl < br).type(tl.type()).prod(dim=2) + area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) + return area_i / (area_a[:, None] + area_b - area_i) + + +def matrix_iou(a, b): + """ + return iou of a and b, numpy version for data augenmentation + """ + lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) + area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) + return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12) + + +def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max): + bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max) + bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max) + return bbox + + +def xyxy2xywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + return bboxes + + +def xyxy2cxcywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5 + bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5 + return bboxes diff --git a/asone/detectors/yolox/yolox/utils/checkpoint.py b/asone/detectors/yolox/yolox/utils/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c200e41da9ad8b720369a2181c9642724622ca --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/checkpoint.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import os +import shutil +from loguru import logger + +import torch + + +def load_ckpt(model, ckpt): + model_state_dict = model.state_dict() + load_dict = {} + for key_model, v in model_state_dict.items(): + if key_model not in ckpt: + logger.warning( + "{} is not in the ckpt. Please double check and see if this is desired.".format( + key_model + ) + ) + continue + v_ckpt = ckpt[key_model] + if v.shape != v_ckpt.shape: + logger.warning( + "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( + key_model, v_ckpt.shape, key_model, v.shape + ) + ) + continue + load_dict[key_model] = v_ckpt + + model.load_state_dict(load_dict, strict=False) + return model + + +def save_checkpoint(state, is_best, save_dir, model_name=""): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + filename = os.path.join(save_dir, model_name + "_ckpt.pth") + torch.save(state, filename) + if is_best: + best_filename = os.path.join(save_dir, "best_ckpt.pth") + shutil.copyfile(filename, best_filename) diff --git a/asone/detectors/yolox/yolox/utils/compat.py b/asone/detectors/yolox/yolox/utils/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..1324077e67215451aa8351f47f5112cd0e5e1018 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/compat.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch + +_TORCH_VER = [int(x) for x in torch.__version__.split(".")[:2]] + +__all__ = ["meshgrid"] + + +def meshgrid(*tensors): + if _TORCH_VER >= [1, 10]: + return torch.meshgrid(*tensors, indexing="ij") + else: + return torch.meshgrid(*tensors) diff --git a/asone/detectors/yolox/yolox/utils/demo_utils.py b/asone/detectors/yolox/yolox/utils/demo_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..71222379497bd4a57d464afb63baebe43e9c447b --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/demo_utils.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import os + +import numpy as np + +__all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"] + + +def mkdir(path): + if not os.path.exists(path): + os.makedirs(path) + + +def nms(boxes, scores, nms_thr): + """Single class NMS implemented in Numpy.""" + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= nms_thr)[0] + order = order[inds + 1] + + return keep + + +def multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True): + """Multiclass NMS implemented in Numpy""" + if class_agnostic: + nms_method = multiclass_nms_class_agnostic + else: + nms_method = multiclass_nms_class_aware + return nms_method(boxes, scores, nms_thr, score_thr) + + +def multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy. Class-aware version.""" + final_dets = [] + num_classes = scores.shape[1] + for cls_ind in range(num_classes): + cls_scores = scores[:, cls_ind] + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + continue + else: + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if len(keep) > 0: + cls_inds = np.ones((len(keep), 1)) * cls_ind + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1 + ) + final_dets.append(dets) + if len(final_dets) == 0: + return None + return np.concatenate(final_dets, 0) + + +def multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy. Class-agnostic version.""" + cls_inds = scores.argmax(1) + cls_scores = scores[np.arange(len(cls_inds)), cls_inds] + + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + return None + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + valid_cls_inds = cls_inds[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if keep: + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1 + ) + return dets + + +def demo_postprocess(outputs, img_size, p6=False): + + grids = [] + expanded_strides = [] + + if not p6: + strides = [8, 16, 32] + else: + strides = [8, 16, 32, 64] + + hsizes = [img_size[0] // stride for stride in strides] + wsizes = [img_size[1] // stride for stride in strides] + + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) + + grids = np.concatenate(grids, 1) + expanded_strides = np.concatenate(expanded_strides, 1) + outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides + outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides + + return outputs diff --git a/asone/detectors/yolox/yolox/utils/dist.py b/asone/detectors/yolox/yolox/utils/dist.py new file mode 100644 index 0000000000000000000000000000000000000000..9e8fea93346f2b52270c07ba61f2cc17c3c07047 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/dist.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# This file mainly comes from +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py +# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Megvii Inc. All rights reserved. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import os +import pickle +import time +from contextlib import contextmanager +from loguru import logger + +import numpy as np + +import torch +from torch import distributed as dist + +__all__ = [ + "get_num_devices", + "wait_for_the_master", + "is_main_process", + "synchronize", + "get_world_size", + "get_rank", + "get_local_rank", + "get_local_size", + "time_synchronized", + "gather", + "all_gather", +] + +_LOCAL_PROCESS_GROUP = None + + +def get_num_devices(): + gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None) + if gpu_list is not None: + return len(gpu_list.split(',')) + else: + devices_list_info = os.popen("nvidia-smi -L") + devices_list_info = devices_list_info.read().strip().split("\n") + return len(devices_list_info) + + +@contextmanager +def wait_for_the_master(local_rank: int = None): + """ + Make all processes waiting for the master to do some task. + + Args: + local_rank (int): the rank of the current process. Default to None. + If None, it will use the rank of the current process. + """ + if local_rank is None: + local_rank = get_local_rank() + + if local_rank > 0: + dist.barrier() + yield + if local_rank == 0: + if not dist.is_available(): + return + if not dist.is_initialized(): + return + else: + dist.barrier() + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if _LOCAL_PROCESS_GROUP is None: + return get_rank() + + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) + for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros( + (max_size - local_size,), dtype=torch.uint8, device=tensor.device + ) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def time_synchronized(): + """pytorch-accurate time""" + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() diff --git a/asone/detectors/yolox/yolox/utils/ema.py b/asone/detectors/yolox/yolox/utils/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..73acbca6796d3cdd07397e657167acdbd5a57647 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/ema.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import math +from copy import deepcopy + +import torch +import torch.nn as nn + +__all__ = ["ModelEMA", "is_parallel"] + + +def is_parallel(model): + """check if model is in parallel mode.""" + parallel_type = ( + nn.parallel.DataParallel, + nn.parallel.DistributedDataParallel, + ) + return isinstance(model, parallel_type) + + +class ModelEMA: + """ + Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + """ + Args: + model (nn.Module): model to apply EMA. + decay (float): ema decay reate. + updates (int): counter of EMA updates. + """ + # Create EMA(FP32) + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() + self.updates = updates + # decay exponential ramp (to help early epochs) + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = ( + model.module.state_dict() if is_parallel(model) else model.state_dict() + ) # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1.0 - d) * msd[k].detach() diff --git a/asone/detectors/yolox/yolox/utils/logger.py b/asone/detectors/yolox/yolox/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..20a395b54661449cafd0147b23d46e22081295f8 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/logger.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import inspect +import os +import sys +from collections import defaultdict +from loguru import logger + +import cv2 +import numpy as np + +import torch + + +def get_caller_name(depth=0): + """ + Args: + depth (int): Depth of caller conext, use 0 for caller depth. + Default value: 0. + + Returns: + str: module name of the caller + """ + # the following logic is a little bit faster than inspect.stack() logic + frame = inspect.currentframe().f_back + for _ in range(depth): + frame = frame.f_back + + return frame.f_globals["__name__"] + + +class StreamToLoguru: + """ + stream object that redirects writes to a logger instance. + """ + + def __init__(self, level="INFO", caller_names=("apex", "pycocotools")): + """ + Args: + level(str): log level string of loguru. Default value: "INFO". + caller_names(tuple): caller names of redirected module. + Default value: (apex, pycocotools). + """ + self.level = level + self.linebuf = "" + self.caller_names = caller_names + + def write(self, buf): + full_name = get_caller_name(depth=1) + module_name = full_name.rsplit(".", maxsplit=-1)[0] + if module_name in self.caller_names: + for line in buf.rstrip().splitlines(): + # use caller level log + logger.opt(depth=2).log(self.level, line.rstrip()) + else: + sys.__stdout__.write(buf) + + def flush(self): + pass + + def isatty(self): + # when using colab, jax is installed by default and issue like + # https://github.com/Megvii-BaseDetection/YOLOX/issues/1437 might be raised + # due to missing attribute like`isatty`. + # For more details, checked the following link: + # https://github.com/google/jax/blob/10720258ea7fb5bde997dfa2f3f71135ab7a6733/jax/_src/pretty_printer.py#L54 # noqa + return True + + +def redirect_sys_output(log_level="INFO"): + redirect_logger = StreamToLoguru(log_level) + sys.stderr = redirect_logger + sys.stdout = redirect_logger + + +def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"): + """setup logger for training and testing. + Args: + save_dir(str): location to save log file + distributed_rank(int): device rank when multi-gpu environment + filename (string): log save name. + mode(str): log file write mode, `append` or `override`. default is `a`. + + Return: + logger instance. + """ + loguru_format = ( + "{time:YYYY-MM-DD HH:mm:ss} | " + "{level: <8} | " + "{name}:{line} - {message}" + ) + + logger.remove() + save_file = os.path.join(save_dir, filename) + if mode == "o" and os.path.exists(save_file): + os.remove(save_file) + # only keep logger in rank0 process + if distributed_rank == 0: + logger.add( + sys.stderr, + format=loguru_format, + level="INFO", + enqueue=True, + ) + logger.add(save_file) + + # redirect stdout/stderr to loguru + redirect_sys_output("INFO") + + +class WandbLogger(object): + """ + Log training runs, datasets, models, and predictions to Weights & Biases. + This logger sends information to W&B at wandb.ai. + By default, this information includes hyperparameters, + system configuration and metrics, model metrics, + and basic data metrics and analyses. + + For more information, please refer to: + https://docs.wandb.ai/guides/track + https://docs.wandb.ai/guides/integrations/other/yolox + """ + def __init__(self, + project=None, + name=None, + id=None, + entity=None, + save_dir=None, + config=None, + val_dataset=None, + num_eval_images=100, + log_checkpoints=False, + **kwargs): + """ + Args: + project (str): wandb project name. + name (str): wandb run name. + id (str): wandb run id. + entity (str): wandb entity name. + save_dir (str): save directory. + config (dict): config dict. + val_dataset (Dataset): validation dataset. + num_eval_images (int): number of images from the validation set to log. + log_checkpoints (bool): log checkpoints + **kwargs: other kwargs. + + Usage: + Any arguments for wandb.init can be provided on the command line using + the prefix `wandb-`. + Example + ``` + python tools/train.py .... --logger wandb wandb-project \ + wandb-name \ + wandb-id \ + wandb-save_dir \ + wandb-num_eval_imges \ + wandb-log_checkpoints + ``` + The val_dataset argument is not open to the command line. + """ + try: + import wandb + self.wandb = wandb + except ModuleNotFoundError: + raise ModuleNotFoundError( + "wandb is not installed." + "Please install wandb using pip install wandb" + ) + + self.project = project + self.name = name + self.id = id + self.save_dir = save_dir + self.config = config + self.kwargs = kwargs + self.entity = entity + self._run = None + self.val_artifact = None + if num_eval_images == -1: + self.num_log_images = len(val_dataset) + else: + self.num_log_images = min(num_eval_images, len(val_dataset)) + self.log_checkpoints = (log_checkpoints == "True" or log_checkpoints == "true") + self._wandb_init = dict( + project=self.project, + name=self.name, + id=self.id, + entity=self.entity, + dir=self.save_dir, + resume="allow" + ) + self._wandb_init.update(**kwargs) + + _ = self.run + + if self.config: + self.run.config.update(self.config) + self.run.define_metric("train/epoch") + self.run.define_metric("val/*", step_metric="train/epoch") + self.run.define_metric("train/step") + self.run.define_metric("train/*", step_metric="train/step") + + if val_dataset and self.num_log_images != 0: + self.cats = val_dataset.cats + self.id_to_class = { + cls['id']: cls['name'] for cls in self.cats + } + self._log_validation_set(val_dataset) + + @property + def run(self): + if self._run is None: + if self.wandb.run is not None: + logger.info( + "There is a wandb run already in progress " + "and newly created instances of `WandbLogger` will reuse" + " this run. If this is not desired, call `wandb.finish()`" + "before instantiating `WandbLogger`." + ) + self._run = self.wandb.run + else: + self._run = self.wandb.init(**self._wandb_init) + return self._run + + def _log_validation_set(self, val_dataset): + """ + Log validation set to wandb. + + Args: + val_dataset (Dataset): validation dataset. + """ + if self.val_artifact is None: + self.val_artifact = self.wandb.Artifact(name="validation_images", type="dataset") + self.val_table = self.wandb.Table(columns=["id", "input"]) + + for i in range(self.num_log_images): + data_point = val_dataset[i] + img = data_point[0] + id = data_point[3] + img = np.transpose(img, (1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + self.val_table.add_data( + id.item(), + self.wandb.Image(img) + ) + + self.val_artifact.add(self.val_table, "validation_images_table") + self.run.use_artifact(self.val_artifact) + self.val_artifact.wait() + + def log_metrics(self, metrics, step=None): + """ + Args: + metrics (dict): metrics dict. + step (int): step number. + """ + + for k, v in metrics.items(): + if isinstance(v, torch.Tensor): + metrics[k] = v.item() + + if step is not None: + metrics.update({"train/step": step}) + self.run.log(metrics) + else: + self.run.log(metrics) + + def log_images(self, predictions): + if len(predictions) == 0 or self.val_artifact is None or self.num_log_images == 0: + return + + table_ref = self.val_artifact.get("validation_images_table") + + columns = ["id", "predicted"] + for cls in self.cats: + columns.append(cls["name"]) + + result_table = self.wandb.Table(columns=columns) + for idx, val in table_ref.iterrows(): + + avg_scores = defaultdict(int) + num_occurrences = defaultdict(int) + + if val[0] in predictions: + prediction = predictions[val[0]] + boxes = [] + + for i in range(len(prediction["bboxes"])): + bbox = prediction["bboxes"][i] + x0 = bbox[0] + y0 = bbox[1] + x1 = bbox[2] + y1 = bbox[3] + box = { + "position": { + "minX": min(x0, x1), + "minY": min(y0, y1), + "maxX": max(x0, x1), + "maxY": max(y0, y1) + }, + "class_id": prediction["categories"][i], + "domain": "pixel" + } + avg_scores[ + self.id_to_class[prediction["categories"][i]] + ] += prediction["scores"][i] + num_occurrences[self.id_to_class[prediction["categories"][i]]] += 1 + boxes.append(box) + else: + boxes = [] + + average_class_score = [] + for cls in self.cats: + if cls["name"] not in num_occurrences: + score = 0 + else: + score = avg_scores[cls["name"]] / num_occurrences[cls["name"]] + average_class_score.append(score) + result_table.add_data( + idx, + self.wandb.Image(val[1], boxes={ + "prediction": { + "box_data": boxes, + "class_labels": self.id_to_class + } + } + ), + *average_class_score + ) + + self.wandb.log({"val_results/result_table": result_table}) + + def save_checkpoint(self, save_dir, model_name, is_best, metadata=None): + """ + Args: + save_dir (str): save directory. + model_name (str): model name. + is_best (bool): whether the model is the best model. + metadata (dict): metadata to save corresponding to the checkpoint. + """ + + if not self.log_checkpoints: + return + + if "epoch" in metadata: + epoch = metadata["epoch"] + else: + epoch = None + + filename = os.path.join(save_dir, model_name + "_ckpt.pth") + artifact = self.wandb.Artifact( + name=f"run_{self.run.id}_model", + type="model", + metadata=metadata + ) + artifact.add_file(filename, name="model_ckpt.pth") + + aliases = ["latest"] + + if is_best: + aliases.append("best") + + if epoch: + aliases.append(f"epoch-{epoch}") + + self.run.log_artifact(artifact, aliases=aliases) + + def finish(self): + self.run.finish() + + @classmethod + def initialize_wandb_logger(cls, args, exp, val_dataset): + wandb_params = dict() + prefix = "wandb-" + for k, v in zip(args.opts[0::2], args.opts[1::2]): + if k.startswith("wandb-"): + try: + wandb_params.update({k[len(prefix):]: int(v)}) + except ValueError: + wandb_params.update({k[len(prefix):]: v}) + + return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params) diff --git a/asone/detectors/yolox/yolox/utils/lr_scheduler.py b/asone/detectors/yolox/yolox/utils/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..2a1513c22abb5bbcb65447f6ee4bbadebfa9d43f --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/lr_scheduler.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import math +from functools import partial + + +class LRScheduler: + def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs): + """ + Supported lr schedulers: [cos, warmcos, multistep] + + Args: + lr (float): learning rate. + iters_per_peoch (int): number of iterations in one epoch. + total_epochs (int): number of epochs in training. + kwargs (dict): + - cos: None + - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)] + - multistep: [milestones (epochs), gamma (default 0.1)] + """ + + self.lr = lr + self.iters_per_epoch = iters_per_epoch + self.total_epochs = total_epochs + self.total_iters = iters_per_epoch * total_epochs + + self.__dict__.update(kwargs) + + self.lr_func = self._get_lr_func(name) + + def update_lr(self, iters): + return self.lr_func(iters) + + def _get_lr_func(self, name): + if name == "cos": # cosine lr schedule + lr_func = partial(cos_lr, self.lr, self.total_iters) + elif name == "warmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 1e-6) + lr_func = partial( + warm_cos_lr, + self.lr, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + ) + elif name == "yoloxwarmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + lr_func = partial( + yolox_warm_cos_lr, + self.lr, + min_lr_ratio, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iters, + ) + elif name == "yoloxsemiwarmcos": + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + normal_iters = self.iters_per_epoch * self.semi_epoch + semi_iters = self.iters_per_epoch_semi * ( + self.total_epochs - self.semi_epoch - self.no_aug_epochs + ) + lr_func = partial( + yolox_semi_warm_cos_lr, + self.lr, + min_lr_ratio, + warmup_lr_start, + self.total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + self.iters_per_epoch, + self.iters_per_epoch_semi, + ) + elif name == "multistep": # stepwise lr schedule + milestones = [ + int(self.total_iters * milestone / self.total_epochs) + for milestone in self.milestones + ] + gamma = getattr(self, "gamma", 0.1) + lr_func = partial(multistep_lr, self.lr, milestones, gamma) + else: + raise ValueError("Scheduler version {} not supported.".format(name)) + return lr_func + + +def cos_lr(lr, total_iters, iters): + """Cosine learning rate""" + lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters)) + return lr + + +def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters): + """Cosine learning rate with warm up.""" + if iters <= warmup_total_iters: + lr = (lr - warmup_lr_start) * iters / float( + warmup_total_iters + ) + warmup_lr_start + else: + lr *= 0.5 * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters) + ) + ) + return lr + + +def yolox_warm_cos_lr( + lr, + min_lr_ratio, + total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iter, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= total_iters - no_aug_iter: + lr = min_lr + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iter) + ) + ) + return lr + + +def yolox_semi_warm_cos_lr( + lr, + min_lr_ratio, + warmup_lr_start, + total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + iters_per_epoch, + iters_per_epoch_semi, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= normal_iters + semi_iters: + lr = min_lr + elif iters <= normal_iters: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * ( + normal_iters + - warmup_total_iters + + (iters - normal_iters) + * iters_per_epoch + * 1.0 + / iters_per_epoch_semi + ) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + return lr + + +def multistep_lr(lr, milestones, gamma, iters): + """MultiStep learning rate""" + for milestone in milestones: + lr *= gamma if iters >= milestone else 1.0 + return lr diff --git a/asone/detectors/yolox/yolox/utils/metric.py b/asone/detectors/yolox/yolox/utils/metric.py new file mode 100644 index 0000000000000000000000000000000000000000..10f0e631f9996bb50ac72539b7a6dc91d9560932 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/metric.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. +import functools +import os +import time +from collections import defaultdict, deque + +import numpy as np + +import torch + +__all__ = [ + "AverageMeter", + "MeterBuffer", + "get_total_and_free_memory_in_Mb", + "occupy_mem", + "gpu_mem_usage", +] + + +def get_total_and_free_memory_in_Mb(cuda_device): + devices_info_str = os.popen( + "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader" + ) + devices_info = devices_info_str.read().strip().split("\n") + if "CUDA_VISIBLE_DEVICES" in os.environ: + visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',') + cuda_device = int(visible_devices[cuda_device]) + total, used = devices_info[int(cuda_device)].split(",") + return int(total), int(used) + + +def occupy_mem(cuda_device, mem_ratio=0.9): + """ + pre-allocate gpu memory for training to avoid memory Fragmentation. + """ + total, used = get_total_and_free_memory_in_Mb(cuda_device) + max_mem = int(total * mem_ratio) + block_mem = max_mem - used + x = torch.cuda.FloatTensor(256, 1024, block_mem) + del x + time.sleep(5) + + +def gpu_mem_usage(): + """ + Compute the GPU memory usage for the current device (MB). + """ + mem_usage_bytes = torch.cuda.max_memory_allocated() + return mem_usage_bytes / (1024 * 1024) + + +class AverageMeter: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=50): + self._deque = deque(maxlen=window_size) + self._total = 0.0 + self._count = 0 + + def update(self, value): + self._deque.append(value) + self._count += 1 + self._total += value + + @property + def median(self): + d = np.array(list(self._deque)) + return np.median(d) + + @property + def avg(self): + # if deque is empty, nan will be returned. + d = np.array(list(self._deque)) + return d.mean() + + @property + def global_avg(self): + return self._total / max(self._count, 1e-5) + + @property + def latest(self): + return self._deque[-1] if len(self._deque) > 0 else None + + @property + def total(self): + return self._total + + def reset(self): + self._deque.clear() + self._total = 0.0 + self._count = 0 + + def clear(self): + self._deque.clear() + + +class MeterBuffer(defaultdict): + """Computes and stores the average and current value""" + + def __init__(self, window_size=20): + factory = functools.partial(AverageMeter, window_size=window_size) + super().__init__(factory) + + def reset(self): + for v in self.values(): + v.reset() + + def get_filtered_meter(self, filter_key="time"): + return {k: v for k, v in self.items() if filter_key in k} + + def update(self, values=None, **kwargs): + if values is None: + values = {} + values.update(kwargs) + for k, v in values.items(): + if isinstance(v, torch.Tensor): + v = v.detach() + self[k].update(v) + + def clear_meters(self): + for v in self.values(): + v.clear() diff --git a/asone/detectors/yolox/yolox/utils/model_utils.py b/asone/detectors/yolox/yolox/utils/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3bc2d1ff7a314e143ec3424a0afefc73b7b5b137 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/model_utils.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import contextlib +from copy import deepcopy +from typing import Sequence + +import torch +import torch.nn as nn + +__all__ = [ + "fuse_conv_and_bn", + "fuse_model", + "get_model_info", + "replace_module", + "freeze_module", + "adjust_status", +] + + +def get_model_info(model: nn.Module, tsize: Sequence[int]) -> str: + from thop import profile + + stride = 64 + img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) + flops, params = profile(deepcopy(model), inputs=(img,), verbose=False) + params /= 1e6 + flops /= 1e9 + flops *= tsize[0] * tsize[1] / stride / stride * 2 # Gflops + info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops) + return info + + +def fuse_conv_and_bn(conv: nn.Conv2d, bn: nn.BatchNorm2d) -> nn.Conv2d: + """ + Fuse convolution and batchnorm layers. + check more info on https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + + Args: + conv (nn.Conv2d): convolution to fuse. + bn (nn.BatchNorm2d): batchnorm to fuse. + + Returns: + nn.Conv2d: fused convolution behaves the same as the input conv and bn. + """ + fusedconv = ( + nn.Conv2d( + conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True, + ) + .requires_grad_(False) + .to(conv.weight.device) + ) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # prepare spatial bias + b_conv = ( + torch.zeros(conv.weight.size(0), device=conv.weight.device) + if conv.bias is None + else conv.bias + ) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( + torch.sqrt(bn.running_var + bn.eps) + ) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def fuse_model(model: nn.Module) -> nn.Module: + """fuse conv and bn in model + + Args: + model (nn.Module): model to fuse + + Returns: + nn.Module: fused model + """ + from yolox.models.network_blocks import BaseConv + + for m in model.modules(): + if type(m) is BaseConv and hasattr(m, "bn"): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, "bn") # remove batchnorm + m.forward = m.fuseforward # update forward + return model + + +def replace_module(module, replaced_module_type, new_module_type, replace_func=None) -> nn.Module: + """ + Replace given type in module to a new type. mostly used in deploy. + + Args: + module (nn.Module): model to apply replace operation. + replaced_module_type (Type): module type to be replaced. + new_module_type (Type) + replace_func (function): python function to describe replace logic. Defalut value None. + + Returns: + model (nn.Module): module that already been replaced. + """ + + def default_replace_func(replaced_module_type, new_module_type): + return new_module_type() + + if replace_func is None: + replace_func = default_replace_func + + model = module + if isinstance(module, replaced_module_type): + model = replace_func(replaced_module_type, new_module_type) + else: # recurrsively replace + for name, child in module.named_children(): + new_child = replace_module(child, replaced_module_type, new_module_type) + if new_child is not child: # child is already replaced + model.add_module(name, new_child) + + return model + + +def freeze_module(module: nn.Module, name=None) -> nn.Module: + """freeze module inplace + + Args: + module (nn.Module): module to freeze. + name (str, optional): name to freeze. If not given, freeze the whole module. + Note that fuzzy match is not supported. Defaults to None. + + Examples: + freeze the backbone of model + >>> freeze_moudle(model.backbone) + + or freeze the backbone of model by name + >>> freeze_moudle(model, name="backbone") + """ + for param_name, parameter in module.named_parameters(): + if name is None or name in param_name: + parameter.requires_grad = False + + # ensure module like BN and dropout are freezed + for module_name, sub_module in module.named_modules(): + # actually there are no needs to call eval for every single sub_module + if name is None or name in module_name: + sub_module.eval() + + return module + + +@contextlib.contextmanager +def adjust_status(module: nn.Module, training: bool = False) -> nn.Module: + """Adjust module to training/eval mode temporarily. + + Args: + module (nn.Module): module to adjust status. + training (bool): training mode to set. True for train mode, False fro eval mode. + + Examples: + >>> with adjust_status(model, training=False): + ... model(data) + """ + status = {} + + def backup_status(module): + for m in module.modules(): + # save prev status to dict + status[m] = m.training + m.training = training + + def recover_status(module): + for m in module.modules(): + # recover prev status from dict + m.training = status.pop(m) + + backup_status(module) + yield module + recover_status(module) diff --git a/asone/detectors/yolox/yolox/utils/setup_env.py b/asone/detectors/yolox/yolox/utils/setup_env.py new file mode 100644 index 0000000000000000000000000000000000000000..45289f3245f09e48395ad419d17efffe6846b05c --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/setup_env.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import os +import subprocess +from loguru import logger + +import cv2 + +from .dist import get_world_size, is_main_process + +__all__ = ["configure_nccl", "configure_module", "configure_omp"] + + +def configure_nccl(): + """Configure multi-machine environment variables of NCCL.""" + os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" + os.environ["NCCL_IB_HCA"] = subprocess.getoutput( + "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " + "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " + "| grep v >/dev/null && echo $i ; done; popd > /dev/null" + ) + os.environ["NCCL_IB_GID_INDEX"] = "3" + os.environ["NCCL_IB_TC"] = "106" + + +def configure_omp(num_threads=1): + """ + If OMP_NUM_THREADS is not configured and world_size is greater than 1, + Configure OMP_NUM_THREADS environment variables of NCCL to `num_thread`. + + Args: + num_threads (int): value of `OMP_NUM_THREADS` to set. + """ + # We set OMP_NUM_THREADS=1 by default, which achieves the best speed on our machines + # feel free to change it for better performance. + if "OMP_NUM_THREADS" not in os.environ and get_world_size() > 1: + os.environ["OMP_NUM_THREADS"] = str(num_threads) + if is_main_process(): + logger.info( + "\n***************************************************************\n" + "We set `OMP_NUM_THREADS` for each process to {} to speed up.\n" + "please further tune the variable for optimal performance.\n" + "***************************************************************".format( + os.environ["OMP_NUM_THREADS"] + ) + ) + + +def configure_module(ulimit_value=8192): + """ + Configure pytorch module environment. setting of ulimit and cv2 will be set. + + Args: + ulimit_value(int): default open file number on linux. Default value: 8192. + """ + # system setting + try: + import resource + + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1])) + except Exception: + # Exception might be raised in Windows OS or rlimit reaches max limit number. + # However, set rlimit value might not be necessary. + pass + + # cv2 + # multiprocess might be harmful on performance of torch dataloader + os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" + try: + cv2.setNumThreads(0) + cv2.ocl.setUseOpenCL(False) + except Exception: + # cv2 version mismatch might rasie exceptions. + pass diff --git a/asone/detectors/yolox/yolox/utils/visualize.py b/asone/detectors/yolox/yolox/utils/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..e714a3ee73699141fb4cd8d131d541a6e6625ed6 --- /dev/null +++ b/asone/detectors/yolox/yolox/utils/visualize.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii Inc. All rights reserved. + +import cv2 +import numpy as np + +__all__ = ["vis"] + + +def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): + + for i in range(len(boxes)): + box = boxes[i] + cls_id = int(cls_ids[i]) + score = scores[i] + if score < conf: + continue + x0 = int(box[0]) + y0 = int(box[1]) + x1 = int(box[2]) + y1 = int(box[3]) + + color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() + text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) + txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) + font = cv2.FONT_HERSHEY_SIMPLEX + + txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] + cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) + + txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() + cv2.rectangle( + img, + (x0, y0 + 1), + (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])), + txt_bk_color, + -1 + ) + cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) + + return img + + +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.286, 0.286, 0.286, + 0.429, 0.429, 0.429, + 0.571, 0.571, 0.571, + 0.714, 0.714, 0.714, + 0.857, 0.857, 0.857, + 0.000, 0.447, 0.741, + 0.314, 0.717, 0.741, + 0.50, 0.5, 0 + ] +).astype(np.float32).reshape(-1, 3) diff --git a/asone/detectors/yolox/yolox_detector.py b/asone/detectors/yolox/yolox_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..56169fa4079924db7211073eafd6d7a833f92256 --- /dev/null +++ b/asone/detectors/yolox/yolox_detector.py @@ -0,0 +1,175 @@ + +import os +from asone.utils import get_names +import numpy as np +import warnings + +import torch +import onnxruntime + +from asone import utils +from asone.detectors.yolox.yolox.utils import fuse_model, postprocess +from asone.detectors.yolox.yolox.exp import get_exp +from asone.detectors.yolox.yolox_utils import preprocess, multiclass_nms, demo_postprocess + + +class YOLOxDetector: + def __init__(self, + model_name=None, + exp_file=None, + weights=None, + use_onnx=False, + use_cuda=False + ): + + self.use_onnx = use_onnx + self.device = 'cuda' if use_cuda else 'cpu' + + if not os.path.exists(weights): + utils.download_weights(weights) + + self.weights_name = os.path.basename(weights) + + if model_name is None: + model_name = 'yolox-s' + + if exp_file is None: + exp_file = os.path.join("exps", "default", "yolox_s.py") + # Load Model + if self.use_onnx: + self.model = self.load_onnx_model(use_cuda, weights) + else: + self.model = self.load_torch_model(weights, exp_file, model_name) + + def load_onnx_model(self, use_cuda, weights): + # Load onnx + if use_cuda: + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + else: + providers = ['CPUExecutionProvider'] + model = onnxruntime.InferenceSession(weights, providers=providers) + return model + + def load_torch_model(self, weights, + exp_file, model_name, + fp16=True, fuse=False): + # Device: CUDA and if fp16=True only then half precision floating point works + self.fp16 = bool(fp16) & ( + (not self.use_onnx or self.use_onnx) and self.device != 'cpu') + exp = get_exp(exp_file, model_name) + + ckpt = torch.load(weights, map_location="cpu") + + # get number of classes from weights + # head.cls_preds.0.weight weights contains number of classes so simply extract it and with in exp file. + exp.num_classes = ckpt['model']['head.cls_preds.0.weight'].size()[0] + self.classes = exp.num_classes + model = exp.get_model() + if self.device == "cuda": + model.cuda() + if self.fp16: # to FP16 + model.half() + model.eval() + + # load the model state dict + model.load_state_dict(ckpt["model"]) + if fuse: + model = fuse_model(model) + return model + + def detect(self, + image: list, + input_shape: tuple = (640, 640), + conf_thres: float = 0.25, + iou_thres: float = 0.45, + max_det: int = 1000, + filter_classes: bool = None, + agnostic_nms: bool = True, + with_p6: bool = False + ) -> list: + + if self.weights_name in ['yolox_tiny.onnx', 'yolox_nano.onnx']: + input_shape = (416, 416) + + self.input_shape = input_shape + + # Image Preprocess for onnx models + if self.use_onnx: + processed_image, ratio = preprocess(image, self.input_shape) + else: + processed_image, ratio = preprocess(image, self.input_shape) + processed_image = torch.from_numpy(processed_image).unsqueeze(0) + processed_image = processed_image.float() + if self.device == "cuda": + processed_image = processed_image.cuda() + if self.fp16: + processed_image = processed_image.half() + + detection = [] + # Inference + if self.use_onnx: # Run ONNX model + # Model Input and Output + model_inputs = {self.model.get_inputs( + )[0].name: processed_image[None, :, :, :]} + detection = self.model.run(None, model_inputs)[0] + # Postprrocessing + detection = demo_postprocess( + detection, self.input_shape, p6=with_p6)[0] + boxes = detection[:, :4] + scores = detection[:, 4:5] * detection[:, 5:] + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2. + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2. + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2. + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2. + boxes_xyxy /= ratio + detection = multiclass_nms( + boxes_xyxy, scores, nms_thr=iou_thres, score_thr=conf_thres) + + # Run Pytorch model + else: + with torch.no_grad(): + prediction = self.model(processed_image) + prediction = postprocess(prediction, + self.classes, + conf_thres, + iou_thres, + class_agnostic=agnostic_nms + )[0] + if prediction is not None: + prediction = prediction.detach().cpu().numpy() + bboxes = prediction[:, 0:4] + # Postprocessing + bboxes /= ratio + cls = prediction[:, 6] + scores = prediction[:, 4] * prediction[:, 5] + for box in range(len(bboxes)): + pred = np.append(bboxes[box], scores[box]) + pred = np.append(pred, cls[box]) + detection.append(pred) + detection = np.array(detection) + else: + detection = prediction + + if filter_classes: + class_names = get_names() + + filter_class_idx = [] + if filter_classes: + for _class in filter_classes: + if _class.lower() in class_names: + filter_class_idx.append( + class_names.index(_class.lower())) + else: + warnings.warn( + f"class {_class} not found in model classes list.") + + detection = detection[np.in1d( + detection[:, 5].astype(int), filter_class_idx)] + + image_info = { + 'width': image.shape[1], + 'height': image.shape[0], + } + + return detection, image_info diff --git a/asone/detectors/yolox/yolox_utils.py b/asone/detectors/yolox/yolox_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7ea12f581b4f6fca8ab1680cc84d07c87f997f64 --- /dev/null +++ b/asone/detectors/yolox/yolox_utils.py @@ -0,0 +1,129 @@ +import cv2 +import numpy as np + +def preprocess(img, input_size, swap=(2, 0, 1)): + if len(img.shape) == 3: + padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 + else: + padded_img = np.ones(input_size, dtype=np.uint8) * 114 + + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + return padded_img, r + +def nms(boxes, scores, nms_thr): + """Single class NMS implemented in Numpy.""" + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= nms_thr)[0] + order = order[inds + 1] + + return keep + + +def multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True): + """Multiclass NMS implemented in Numpy""" + if class_agnostic: + nms_method = multiclass_nms_class_agnostic + else: + nms_method = multiclass_nms_class_aware + return nms_method(boxes, scores, nms_thr, score_thr) + + +def multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy. Class-aware version.""" + final_dets = [] + num_classes = scores.shape[1] + for cls_ind in range(num_classes): + cls_scores = scores[:, cls_ind] + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + continue + else: + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if len(keep) > 0: + cls_inds = np.ones((len(keep), 1)) * cls_ind + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1 + ) + final_dets.append(dets) + if len(final_dets) == 0: + return None + return np.concatenate(final_dets, 0) + + +def multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy. Class-agnostic version.""" + cls_inds = scores.argmax(1) + cls_scores = scores[np.arange(len(cls_inds)), cls_inds] + + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + return None + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + valid_cls_inds = cls_inds[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if keep: + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1 + ) + return dets + +def demo_postprocess(outputs, img_size, p6=False): + + grids = [] + expanded_strides = [] + + if not p6: + strides = [8, 16, 32] + else: + strides = [8, 16, 32, 64] + + hsizes = [img_size[0] // stride for stride in strides] + wsizes = [img_size[1] // stride for stride in strides] + + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) + + grids = np.concatenate(grids, 1) + expanded_strides = np.concatenate(expanded_strides, 1) + outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides + outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides + + return outputs + diff --git a/asone/linux/Instructions/Benchmarking.md b/asone/linux/Instructions/Benchmarking.md new file mode 100644 index 0000000000000000000000000000000000000000..7c848f144c265bc82583dbb1353ee27259f6312b --- /dev/null +++ b/asone/linux/Instructions/Benchmarking.md @@ -0,0 +1,61 @@ +# Benchmarking + +## Hardware Used: +- CPU: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz +- GPU: 8GB (RTX2080) + +## Trackers + +### DeepSort + +| Model | Model Flag | FPS-GPU | FPS-CPU +|---------------- |-----------| -----------| -------- +|DeepSort-ONNX-Yolov5s|DEEPSORT|13|3.2| +|DeepSort-Pytorch-Yolov5s|DEEPSORT|13|3.2| + +### ByteTrack + +| Model | Model Flag | FPS-GPU | FPS-CPU +|---------------- |-----------| -----------| -------- +|ByteTrack-ONNX-YOLOv5s|BYTETRACK|33.7|17.4| +|ByteTrack-Pytorch-Sample-YOLOv5s|BYTETRACK|33.7|17.4| + +### NorFair + +| Model | Model Flag | FPS-GPU | FPS-CPU +|---------------- |-----------| -----------| -------- +|tryolab-ONNX-YOLOv5s|NORFAIR|25.8|12| +|tryolab-Pytorch-YOLOv5s|NORFAIR|25.8|12| + +## Detectors +### YOLOv5 +| Pytorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV5X6_PYTORCH 20.8 3.69
YOLOV5S_PYTORCH 57.25 25.4
YOLOV5N_PYTORCH 68 45
YOLOV5M_PYTORCH 54 14
YOLOV5L_PYTORCH 40.06 8.28
YOLOV5X_PYTORCH 28.8 4.32
YOLOV5N6_PYTORCH 63.5 39
YOLOV5S6_PYTORCH 58 23
YOLOV5M6_PYTORCH 49 10
YOLOV5L6_PYTORCH 33 6.5
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV5X6_ONNX 2.58 2.46
YOLOV5S_ONNX 17 16.35
YOLOV5N_ONNX 57.25 35.23
YOLOV5M_ONNX 45.8 11.17
YOLOV5L_ONNX 4.07 4.36
YOLOV5X_ONNX 2.32 2.6
YOLOV5N6_ONNX 28.6 32.7
YOLOV5S6_ONNX 17 16.35
YOLOV5M6_ONNX 7.5 7.6
YOLOV5L6_ONNX 3.7 3.98
| + +### YOLOv6 +| Pytorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV6N_PYTORCH 65.4 35.32
YOLOV6T_PYTORCH 63 15.21
YOLOV6S_PYTORCH 49.24 20
YOLOV6M_PYTORCH 35 9.96
YOLOV6L_PYTORCH 31 6.2
YOLOV6L_RELU_PYTORCH 27 6.3
YOLOV6S_REPOPT_PYTORCH 63.5 39
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV6N_ONNX 50 30
YOLOV6T_ONNX 45.8 16
YOLOV6S_ONNX 41 13.8
YOLOV6M_ONNX 25 6.07
YOLOV6L_ONNNX 17.7 3.32
YOLOV6L_RELU_ONNX 19.15 4.36
YOLOV6S_REPOPT_ONNX 63.5 39
| + +### YOLOv7 +| PyTorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV7_TINY_PYTORCH 53 19
YOLOV7_PYTORCH 38 6.83
YOLOV7_X_PYTORCH 28 4.36
YOLOV7_W6_PYTORCH 32.7 7.26
YOLOV7_E6_PYTORCH 15.26 3.07
YOLOV7_D6_PYTORCH 21 3.78
YOLOV7_E6E_PYTORCH 24 3.36
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV7_TINY_ONNX 41.6 22
YOLOV7_ONNX 26 3.78
YOLOV7_X_ONNX 19.08 2.35
YOLOV7_W6_ONNX 28.6 5.2
YOLOV7_E6_ONNX 14.3 2.97
YOLOV7_D6_ONNX 18.32 2.58
YOLOV7_E6E_ONNX 15.26 2.09
| + +### YOLOR +| Pytorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOR_CSP_X_PYTORCH 28.6 1.83
YOLOR_CSP_X_STAR_PYTORCH 30 1.76
YOLOR_CSP_STAR_PYTORCH 38.1 2.86
YOLOR_CSP_PYTORCH 38 2.77
YOLOR_P6_PYTORCH 20 1.57
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOR_CSP_X_ONNX 15.7 2.53
YOLOR_CSP_X_STAR_ONNX 15.79 2.05
YOLOR_CSP_STAR_ONNX 18.32 3.34
YOLOR_CSP_ONNX 15.7 2.53
YOLOR_P6_ONNX 25.4 5.58
| + +### YOLOX +| Pytorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOX_L_PYTORCH 2.58 2.31
YOLOX_NANO_PYTORCH 35 32
YOLOX_TINY_PYTORCH 25.4 25.4
YOLOX_DARKNET_PYTORCH 2 1.94
YOLOX_S_PYTORCH 9.54 9.7
YOLOX_M_PYTORCH 4.4 4.36
YOLOX_X_PYTORCH 15.64 1.39
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOX_L_ONNX 22.9 3.07
YOLOX_NANO_ONNX 59 54
YOLOX_TINY_ONNX 60 35
YOLOX_DARKNET_ONNX 24 3.36
YOLOX_S_ONNX 45 13.8
YOLOX_M_ONNX 32 6.54
YOLOX_X_ONNX 15.79 2.03
| + +### YOLOv8 +| Pytorch |ONNX | +|:-------------------------------:|:-----------------------------:| +|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV8N_PYTORCH 26.7 17.0
YOLOV8S_PYTORCH 26.4 12.3
YOLOV8M_PYTORCH 25.1 6.8
YOLOV8L_PYTORCH 23.6 4.0
YOLOV8X_PYTORCH 20.7 2.8
|
Model Name / Model Flag FPS-GPU FPS-CPU
YOLOV8N_ONNX 25.1 10.5
YOLOV8S_ONNX 24.5 7.5
YOLOV8M_ONNX 22.9 4.7
YOLOV8l_ONNX 20.4 2.9
YOLOV8X_ONNX 19.0 2.0
| + +Return to [Installation Page](../../../README.md) diff --git a/asone/linux/Instructions/Demo-Detectron2.md b/asone/linux/Instructions/Demo-Detectron2.md new file mode 100644 index 0000000000000000000000000000000000000000..42116a757e617cb64bbb9ad3a4002039fbd56372 --- /dev/null +++ b/asone/linux/Instructions/Demo-Detectron2.md @@ -0,0 +1,90 @@ +# ASOne + +#### Table of Contents + +- [Docker Demo](#docker-demo) + - [Docker Demo](#docker-demo-1) + - [Setup Detectron](#setup-detectron) + - [Demo using docker compose](#demo-using-docker-compose-file) + - [Test Detectron2](#test-detectron2) + +# Docker Installation + +- If you haven't installed docker first install it by following provided instructions [here](../) + +## Docker Demo + +### Setting Up detectron2 + +1. Clone the Repo + +``` +git clone https://github.com/facebookresearch/detectron2.git +``` + +2. Goto the detectron2 directory + +``` +cd detectron2 +``` + +3. Download some sample images in this folder + +### Demo Using Docker Compose File + +1. Run container without gpu + +``` +docker compose run linux +``` + +2. Run container with gpu + +``` +docker compose run linux-gpu +``` + +- To test DISPLAY is shared with docker properly: + +``` +python main.py +``` + +- if an image show up then everything is working properly. + +- if you see an error saying `qt.qpa.xcb: could not connect to display` that means your display is not accessible to docker. + +Try this: +``` +sudo xhost +local:docker +``` + +- To build and run docker container manually follow instructions for [Manual Build](Manual-Build.md) + + +### Test Detectron2 + +1. After docker container starts properly, in docker terminal change directory using. + +``` +cd detectron2 +``` + +2. In Docker terminal run demo.py file + +``` +python demo/demo.py --input [PATH_TO_TEST_IMAGE] --output [PATH_TO_OUTPUT_IMAGE] \ + --opts MODEL.DEVICE [DEVICE] \ + MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl +``` + +- `PATH_TO_TEST_IMAGE` = Path of test image +- `PATH_TO_OUTPUT_IMAGE` = Path of Results +- `DEVICE` = device to use i.e. `cpu` or `gpu` + +e.g. +``` +python demo/demo.py --input ../test.jpeg --output ../result.jpg \ + --opts MODEL.DEVICE gpu \ + MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl +``` diff --git a/asone/linux/Instructions/Docker-Setup.md b/asone/linux/Instructions/Docker-Setup.md new file mode 100644 index 0000000000000000000000000000000000000000..a4b55ec9c48696b541e918ee845ef4ff6e1f2322 --- /dev/null +++ b/asone/linux/Instructions/Docker-Setup.md @@ -0,0 +1,42 @@ +# Setting ASOne on Docker + +1. Clone the repo + +``` +git clone https://github.com/axcelerateai/asone.git +cd asone +``` + +2. If using windows, Run this command in command prompt. +``` +set PWD=%cd% +``` +2. Run docker coompose command. + +``` +# To test on Linux with GPU +docker compose run linux-gpu + +# To test on Windows with GPU +docker compose run windows-gpu +``` + +``` +# To test on Linux with CPU +docker compose run linux + +# To test on Windows with CPU +docker compose run windows +``` + +3. In docker terminal. + +``` +# if using gpu +python main.py [VIDEO_PATH] + +# if using cpu +python main.py [VIDEO_PATH] --cpu +``` + +Return to [main page](../../README.md) diff --git a/asone/linux/Instructions/Driver-Installations.md b/asone/linux/Instructions/Driver-Installations.md new file mode 100644 index 0000000000000000000000000000000000000000..7743d792c6ca46c836d02ad99d4cb4f899642418 --- /dev/null +++ b/asone/linux/Instructions/Driver-Installations.md @@ -0,0 +1,33 @@ +# Driver Installations + + +### Linux + +For systems with `GPU` please verify you have nvidia drivers installed. + +Run + +``` +nvidia-smi +``` +Drivers are installed if you see following. + +![](../imgs/nvidia-drivers.png) + +If drivers are not installed, you can do so using following command: + +``` +sudo apt-get install nvidia-driver-YYY nvidia-dkms-YYY +``` +where, +- `YYY`= Nvidia driver version + +e.g `sudo apt-get install nvidia-driver-510 nvidia-dkms-510` + +- `Reboot` your system after installing nvidia-drivers. +``` +sudo reboot +``` + + +Return to [Installation Page](../../../README.md) diff --git a/asone/linux/Instructions/Manual-Build.md b/asone/linux/Instructions/Manual-Build.md new file mode 100644 index 0000000000000000000000000000000000000000..ba95146a60e12895ee860bc6af3abb57d2a1684c --- /dev/null +++ b/asone/linux/Instructions/Manual-Build.md @@ -0,0 +1,46 @@ +# ASOne + + + +# Docker Manual Build + +## Docker Installation + +- If you haven't installed docker first install it by following provided instructions [here](../) + +## Build Image Manually + +1. Run the follwoing command to build docker image + +``` +docker build -t [IMAGE_NAME]:[TAG] . +``` + - `IMAGE_NAME` = Asign a name to image + - `TAG` = Asign a tag to image + +e.g. `docker build -t asone:latest .` + +## Run Build Image + +1. To run the build image in docker container with `cpu`. + +``` +docker run --env="DISPLAY" --net=host -v [PATH_TO_LOCAL_DIR]:/workspace/ -it [IMAGE_NAME]:[TAG] +``` + - `IMAGE_NAME` = Asign a name to image + - `TAG` = Asign a tag to image + - `PATH_TO_LOCAL_DIR` = Path to detectron2 directory or use `$PWD` if already in that directory + + +e.g `docker run --env="DISPLAY" --net=host -v $PWD:/workspace/ -it asone:latest` + +2. To run th ebuild image in docker container with `gpu` + +``` +docker run --gpus all --env="DISPLAY" --net=host -v [PATH_TO_LOCAL_DIR]:/workspace/ -it [IMAGE_NAME]:[TAG] +``` + - `IMAGE_NAME` = Asign a name to image + - `TAG` = Asign a tag to image + - `PATH_TO_LOCAL_DIR` = Path to detectron2 directory or use `$PWD` if already in that directory + +e.g `docker run --gpus all --env="DISPLAY" --net=host -v $PWD:/workspace/ -it asone:latest` \ No newline at end of file diff --git a/asone/linux/Instructions/Manual-Installation.md b/asone/linux/Instructions/Manual-Installation.md new file mode 100644 index 0000000000000000000000000000000000000000..fdbe961dbc2ce6ff974854f3fdc4d9850d3a1329 --- /dev/null +++ b/asone/linux/Instructions/Manual-Installation.md @@ -0,0 +1,68 @@ +# ASOne + + +# Docker Manual Installation + +## Ubuntu + + +1. Run following command to remove all old versions on docker + +``` +sudo apt-get remove docker docker-engine docker.io containerd runc +``` + +2. Set up Repository + +- Update the apt package index and install packages to allow apt to use a repository over HTTPS: + +``` +sudo apt-get update +sudo apt-get install \ + ca-certificates \ + curl \ + gnupg \ + lsb-release +``` + +- Add Docker’s official GPG key: + +``` +sudo mkdir -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg +``` + +- Use the following command to set up the repository: + +``` +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +``` + +3. Install Docker Engine + +- Update the apt package index, and install the latest version of Docker Engine, containerd, and Docker Compose: + +``` +sudo apt-get update +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin +``` + +4. Install `nvidia-docker` to allow docker interact with GPU. + +``` +sudo apt-get install -y nvidia-docker2 +sudo systemctl restart docker +``` + +5. Give docker access to devices. + +``` +sudo xhost +local:docker + +sudo groupadd docker +sudo gpasswd -a $USER docker +newgrp docker + +``` \ No newline at end of file diff --git a/asone/linux/README.md b/asone/linux/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ddbf92d3681b1fde8be3c62cc566a862627c0b10 --- /dev/null +++ b/asone/linux/README.md @@ -0,0 +1,76 @@ +# ASOne + +#### Table of Contents +- [Docker Intallation](#docker-installation) + - [Ubuntu](#ubuntu) + - [Prerequisite](#prerequisite) + - [Install Using Shell Script](#install-using-shell-script) + +# Docker Installation + +## Ubuntu +#### Prerequisite +1. For systems with `GPU` please verify you have nvidia drivers installed. run + +``` +nvidia-smi +``` +if you see something like the following. Then you can continue running [shell script](#install-using-shell-script) + +![](imgs/nvidia-drivers.png) + + or you can install nvidia drivers using following command: + +``` +sudo apt-get install nvidia-driver-YYY nvidia-dkms-YYY +``` +- `YYY`= Nvidia driver version + +e.g `sudo apt-get install nvidia-driver-510 nvidia-dkms-510` + +- `Reboot` your system after installing nvidia-drivers. +``` +sudo reboot +``` + +#### Install using Shell Script + +``` +cd asone-linux +chmod a+x docker-installation.sh +./docker-installation.sh +``` + - For systems with `GPU` run following commands after installing docker. + + Setup the package repository and the GPG key: + ``` + distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ + && curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/experimental/$distribution/libnvidia-container.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + ``` + Install the `nvidia-docker2` package (and dependencies) after updating the package listing: + ``` + sudo apt-get update + sudo apt-get install -y nvidia-docker2 + sudo systemctl restart docker + + sudo xhost +local:docker + ``` + +- [NOTE] If there is an error while installing docker, try removing apt-lists and resinstalling. + +``` +sudo rm -rf /var/lib/apt/lists/* +./docker-installation.sh +``` + +``` +# jump back to main folder +cd .. +``` + +If everything is done successfully you can return to [Installation Page](../README.md) + +In case shell script keeps failing or you want to install manually follow steps in [Manual Installation](Instructions/Manual-Installation.md) diff --git a/asone/linux/docker-installation.sh b/asone/linux/docker-installation.sh new file mode 100644 index 0000000000000000000000000000000000000000..a904a85607ca76660b5575769ac939b5e2b5202c --- /dev/null +++ b/asone/linux/docker-installation.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +echo "[INFO]: Removing previous verions of dockers..." +echo "[INFO]: Removing previous verions of dockers..." > logs.txt +if sudo apt-get remove docker docker-engine docker.io containerd runc -y >> logs.txt; then + echo "[INFO]: Previous docker removed successfully!" + echo "[INFO]: Previous docker removed successfully!" >> logs.txt +fi + +echo "[INFO]: Updating apt-package index..." +echo "[INFO]: Updating apt-package index..." >> logs.txt + + +if sudo apt-get update -y >> logs.txt; then + echo "[INFO]: apt-package index updated successfuly!" + echo "[INFO]: apt-package index updated successfuly!" >> logs.txt +else + echo "[ERROR]: Error while updating apt-package index. Check logs.txt file for more info." + echo "[ERROR]: Error while updating apt-package index." >> logs.txt + # exit 1 +fi + +echo "[INFO]: Installing required apt packages..." +echo "[INFO]: Installing required apt packages..." >> logs.txt + +if sudo apt-get install \ + ca-certificates \ + curl \ + gnupg \ + lsb-release -y ; then + echo "[INFO]: Required apt packages installed successfully!" + echo "[INFO]: Required apt packages installed successfully!" >> logs.txt +else + echo "[ERROR]: Error installing required apt packages. Check logs.txt file for more info." + echo "[ERROR]: Error installing required apt packages." >> logs.txt + exit 1 +fi + +echo "[INFO]: Adding docker GPG key..." +echo "[INFO]: Adding docker GPG key..." >> logs.txt + +sudo mkdir -p /etc/apt/keyrings + +if curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg >> logs.txt;then + echo "[INFO]: Docker GPG key added successfully!" + echo "[INFO]: Docker GPG key added successfully!" >> logs.txt +else + echo "[ERROR]: Error adding docker GPG key. Check logs.txt file for more info." + echo "[ERROR]: Error adding docker GPG key." >> logs.txt + exit 1 +fi + + +echo "[INFO]: Setting docker repository..." +echo "[INFO]: Setting docker repository..." >> logs.txt +if echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null >> logs.txt; then + echo "[INFO]: Docker repository setup done." + echo "[INFO]: Docker repository setup done." >> logs.txt +else + echo "[ERROR]: Error setting up docker repository. Check logs.txt file for more info." + echo "[ERROR]: Error setting up docker repository." >> logs.txt + exit 1 +fi + +echo "[INFO]: Installing Docker Engine..." +echo "[INFO]: Installing Docker Engine..." >> logs.txt + +if sudo apt-get update -y >> logs.txt; then + if sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin -y >> logs.txt; then + if sudo docker --version; then + echo "[INFO]: Docker Engine instaleld successfully!" + echo "[INFO]: Docker Engine instaleld successfully!" >> logs.txt + fi + else + echo "[ERROR]: Error installing docker engine. Check logs.txt file for more info." + echo "[ERROR]: Error installing docker engine." >> logs.txt + exit 1 + fi +else + echo "[ERROR]: Error updating apt packages. Check logs.txt file for more info." + echo "[ERROR]: Error updating apt packages." >> logs.txt + # exit 1 +fi + +echo "[INFO]: Adding docker to sudo group..." +echo "[INFO]: Adding docker to sudo group..." >> logs.txt +sudo xhost +local:docker + +sudo groupadd docker +sudo gpasswd -a $USER docker +newgrp docker + +echo "[INFO]: Docker Installation and setup completed successfully!" +echo "[INFO]: Docker Installation and setup completed successfully!" >> logs.txt \ No newline at end of file diff --git a/asone/linux/imgs/nvidia-drivers.png b/asone/linux/imgs/nvidia-drivers.png new file mode 100644 index 0000000000000000000000000000000000000000..c332107e7b1368084319a3965bc495060e0d801a Binary files /dev/null and b/asone/linux/imgs/nvidia-drivers.png differ diff --git a/asone/linux/main.py b/asone/linux/main.py new file mode 100644 index 0000000000000000000000000000000000000000..825df2e5d16d7057851bfdcc8f80b91917e77802 --- /dev/null +++ b/asone/linux/main.py @@ -0,0 +1,7 @@ +import cv2 +import numpy as np + +img = cv2.imread('test-asone.jpeg') + +cv2.imshow('RESULT', img) +cv2.waitKey(0) diff --git a/asone/linux/test-asone.jpeg b/asone/linux/test-asone.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..3b97f83a4d8df84381156ed5c3baa9d27b3c09ba Binary files /dev/null and b/asone/linux/test-asone.jpeg differ diff --git a/asone/linux/test.jpeg b/asone/linux/test.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..8aa39d1dd267c40091b1804fa30cb711492dbb67 Binary files /dev/null and b/asone/linux/test.jpeg differ diff --git a/asone/trackers/__init__.py b/asone/trackers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb57fcd8934f4248ab096baa5aa93c65d294c8a0 --- /dev/null +++ b/asone/trackers/__init__.py @@ -0,0 +1,7 @@ +from asone.trackers.byte_track.bytetracker import ByteTrack +from asone.trackers.deep_sort.deepsort import DeepSort +from asone.trackers.nor_fair.norfair import NorFair + +from asone.trackers.tracker import Tracker + +__all__ = ['Tracker', 'ByteTrack', 'DeepSort', 'NorFair'] diff --git a/asone/trackers/byte_track/__init__.py b/asone/trackers/byte_track/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/byte_track/bytetracker.py b/asone/trackers/byte_track/bytetracker.py new file mode 100644 index 0000000000000000000000000000000000000000..b2c675b329a043331ed70f108fdcbc6c917c7897 --- /dev/null +++ b/asone/trackers/byte_track/bytetracker.py @@ -0,0 +1,60 @@ +from .tracker.byte_tracker import BYTETracker +import numpy as np +from asone import utils + + +class ByteTrack(object): + def __init__(self, detector, min_box_area: int = 10, aspect_ratio_thresh:float= 1.6) -> None: + + self.min_box_area = min_box_area + self.aspect_ratio_thresh = aspect_ratio_thresh + self.min_box_area = min_box_area + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) + + self.detector = detector + try: + self.input_shape = tuple(detector.model.get_inputs()[0].shape[2:]) + except AttributeError as e: + self.input_shape = (640, 640) + + self.tracker = BYTETracker(frame_rate=30) + + def detect_and_track(self, image: np.ndarray, config: dict) -> tuple: + dets_xyxy, image_info = self.detector.detect(image, **config) + + class_ids = [] + ids = [] + bboxes_xyxy = [] + scores = [] + + if isinstance(dets_xyxy, np.ndarray) and len(dets_xyxy) > 0: + class_ids = [int(i) for i in dets_xyxy[:, -1].tolist()] + bboxes_xyxy, ids, scores = self._tracker_update( + dets_xyxy, + image_info, + ) + return bboxes_xyxy, ids, scores, class_ids + + def _tracker_update(self, dets: np.ndarray, image_info: dict): + online_targets = [] + if dets is not None: + online_targets = self.tracker.update( + dets[:, :-1], + [image_info['height'], image_info['width']], + [image_info['height'], image_info['width']], + ) + + online_xyxys = [] + online_ids = [] + online_scores = [] + for online_target in online_targets: + tlwh = online_target.tlwh + track_id = online_target.track_id + vertical = tlwh[2] / tlwh[3] > self.aspect_ratio_thresh + if tlwh[2] * tlwh[3] > self.min_box_area and not vertical: + online_xyxys.append(utils.tlwh_to_xyxy(tlwh)) + online_ids.append(track_id) + online_scores.append(online_target.score) + + return online_xyxys, online_ids, online_scores diff --git a/asone/trackers/byte_track/tracker/__init__.py b/asone/trackers/byte_track/tracker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/byte_track/tracker/basetrack.py b/asone/trackers/byte_track/tracker/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..a7130b5cc08ac55705c155594d0f2a1d09f96774 --- /dev/null +++ b/asone/trackers/byte_track/tracker/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed \ No newline at end of file diff --git a/asone/trackers/byte_track/tracker/byte_tracker.py b/asone/trackers/byte_track/tracker/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2b3a764fbb0d8319134f8bb9449beee97154f60c --- /dev/null +++ b/asone/trackers/byte_track/tracker/byte_tracker.py @@ -0,0 +1,326 @@ +import numpy as np +from .kalman_filter import KalmanFilter +from asone.trackers.byte_track.tracker import matching +from .basetrack import BaseTrack, TrackState + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, track_thresh=0.5,match_thresh=0.8, track_buffer=30, mot20=False, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.track_thresh = track_thresh + self.track_buffer = track_buffer + self.mot20 = mot20 + self.match_thresh = match_thresh + + self.frame_id = 0 + self.det_thresh = track_thresh + 0.1 + self.buffer_size = int(frame_rate / 30.0 * self.track_buffer) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + + def update(self, output_results, img_info, img_size): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + if output_results.shape[1] == 5: + scores = output_results[:, 4] + bboxes = output_results[:, :4] + else: + output_results = output_results.cpu().numpy() + scores = output_results[:, 4] * output_results[:, 5] + bboxes = output_results[:, :4] # x1y1x2y2 + img_h, img_w = img_info[0], img_info[1] + scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) + bboxes /= scale + + remain_inds = scores > self.track_thresh + inds_low = scores > 0.1 + inds_high = scores < self.track_thresh + + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + scores_second = scores[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with high score detection boxes''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + if not self.mot20: + dists = matching.fuse_score(dists, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.match_thresh) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with low score detection boxes''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + if not self.mot20: + dists = matching.fuse_score(dists, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/asone/trackers/byte_track/tracker/kalman_filter.py b/asone/trackers/byte_track/tracker/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..deda8a26292b81bc6512a8f6145afabde6c16d7a --- /dev/null +++ b/asone/trackers/byte_track/tracker/kalman_filter.py @@ -0,0 +1,270 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') \ No newline at end of file diff --git a/asone/trackers/byte_track/tracker/matching.py b/asone/trackers/byte_track/tracker/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..19f2d1d34fdc24454887eaff2db1628b615d8467 --- /dev/null +++ b/asone/trackers/byte_track/tracker/matching.py @@ -0,0 +1,178 @@ +import numpy as np +import scipy +import lap +from scipy.spatial.distance import cdist +from cython_bbox import bbox_overlaps as bbox_ious +from asone.trackers.byte_track.tracker import kalman_filter + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def v_iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] + btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix + + +def fuse_iou(cost_matrix, tracks, detections): + if cost_matrix.size == 0: + return cost_matrix + reid_sim = 1 - cost_matrix + iou_dist = iou_distance(tracks, detections) + iou_sim = 1 - iou_dist + fuse_sim = reid_sim * (1 + iou_sim) / 2 + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + #fuse_sim = fuse_sim * (1 + det_scores) / 2 + fuse_cost = 1 - fuse_sim + return fuse_cost + + +def fuse_score(cost_matrix, detections): + if cost_matrix.size == 0: + return cost_matrix + iou_sim = 1 - cost_matrix + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + fuse_sim = iou_sim * det_scores + fuse_cost = 1 - fuse_sim + return fuse_cost \ No newline at end of file diff --git a/asone/trackers/deep_sort/__init__.py b/asone/trackers/deep_sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/deep_sort/deepsort.py b/asone/trackers/deep_sort/deepsort.py new file mode 100644 index 0000000000000000000000000000000000000000..077e3fa15fcfb51cc239f305e6de7860ebb99192 --- /dev/null +++ b/asone/trackers/deep_sort/deepsort.py @@ -0,0 +1,74 @@ +from .tracker import build_tracker +import numpy as np +import os +from asone import utils + + +class DeepSort: + def __init__(self, detector, weights=None, use_cuda=True): + + if weights is None: + weights = os.path.join(os.path.dirname( + os.path.abspath(__file__)), "tracker/deep/checkpoint/ckpt.t7") + + if not os.path.exists(weights): + utils.download_weights(weights) + + cfg = { + 'MAX_DIST': 0.2, + 'MIN_CONFIDENCE': 0.3, + 'NMS_MAX_OVERLAP': 0.5, + 'MAX_IOU_DISTANCE': 0.7, + 'MAX_AGE': 70, + 'N_INIT': 3, + 'NN_BUDGET': 100 + } + + self.tracker = build_tracker(weights, cfg, use_cuda=use_cuda) + self.detector = detector + try: + self.input_shape = tuple(detector.model.get_inputs()[0].shape[2:]) + except AttributeError as e: + self.input_shape = (640, 640) + + def detect_and_track(self, image: np.ndarray, config: dict) -> tuple: + + dets_xyxy, image_info = self.detector.detect( + image, **config + ) + + image_info['im0'] = image + + class_ids = [] + ids = [] + bboxes_xyxy = [] + scores = [] + + if isinstance(dets_xyxy, np.ndarray) and len(dets_xyxy) > 0: + class_ids = dets_xyxy[:, -1].tolist() + bboxes_xyxy, ids, class_ids = self._tracker_update( + dets_xyxy, + image_info, + ) + + return bboxes_xyxy, ids, [], class_ids + + def _tracker_update(self, dets_xyxy: np.ndarray, image_info: dict): + + bbox_xyxy = [] + ids = [] + object_id = [] + + if dets_xyxy is not None: + dets_xywh = np.array([np.array(utils.xyxy_to_xywh(det)) + for det in dets_xyxy[:, :4]]) + + outputs = self.tracker.update( + dets_xywh, dets_xyxy[:, -2].tolist(), dets_xyxy[:, -1].tolist(), image_info['im0']) + + if len(outputs) > 0: + bbox_xyxy = outputs[:, :4] + ids = outputs[:, -2] + object_id = outputs[:, -1] + + return bbox_xyxy, ids, object_id diff --git a/asone/trackers/deep_sort/tracker/.DS_Store b/asone/trackers/deep_sort/tracker/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..03ce5b7b8f7e9d04f249929f023b58114b4ef969 Binary files /dev/null and b/asone/trackers/deep_sort/tracker/.DS_Store differ diff --git a/asone/trackers/deep_sort/tracker/.gitignore b/asone/trackers/deep_sort/tracker/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..37ed2f4dc4a1ca945a0d807274bfe2f6cc7e2fec --- /dev/null +++ b/asone/trackers/deep_sort/tracker/.gitignore @@ -0,0 +1,13 @@ +# Folders +__pycache__/ +build/ +*.egg-info + + +# Files +*.weights +*.t7 +*.mp4 +*.avi +*.so +*.txt diff --git a/asone/trackers/deep_sort/tracker/README.md b/asone/trackers/deep_sort/tracker/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6073f8064faeaa5dfe6ec9642830b5506d02276f --- /dev/null +++ b/asone/trackers/deep_sort/tracker/README.md @@ -0,0 +1,137 @@ +# Deep Sort with PyTorch + +![](demo/demo.gif) + +## Update(1-1-2020) +Changes +- fix bugs +- refactor code +- accerate detection by adding nms on gpu + +## Latest Update(07-22) +Changes +- bug fix (Thanks @JieChen91 and @yingsen1 for bug reporting). +- using batch for feature extracting for each frame, which lead to a small speed up. +- code improvement. + +Futher improvement direction +- Train detector on specific dataset rather than the official one. +- Retrain REID model on pedestrain dataset for better performance. +- Replace YOLOv3 detector with advanced ones. + +**Any contributions to this repository is welcome!** + + +## Introduction +This is an implement of MOT tracking algorithm deep sort. Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. This CNN model is indeed a RE-ID model and the detector used in [PAPER](https://arxiv.org/abs/1703.07402) is FasterRCNN , and the original source code is [HERE](https://github.com/nwojke/deep_sort). +However in original code, the CNN model is implemented with tensorflow, which I'm not familier with. SO I re-implemented the CNN feature extraction model with PyTorch, and changed the CNN model a little bit. Also, I use **YOLOv3** to generate bboxes instead of FasterRCNN. + +## Dependencies +- python 3 (python2 not sure) +- numpy +- scipy +- opencv-python +- sklearn +- torch >= 0.4 +- torchvision >= 0.1 +- pillow +- vizer +- edict + +## Quick Start +0. Check all dependencies installed +```bash +pip install -r requirements.txt +``` +for user in china, you can specify pypi source to accelerate install like: +```bash +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +1. Clone this repository +``` +git clone git@github.com:ZQPei/deep_sort_pytorch.git +``` + +2. Download YOLOv3 parameters +``` +cd detector/YOLOv3/weight/ +wget https://pjreddie.com/media/files/yolov3.weights +wget https://pjreddie.com/media/files/yolov3-tiny.weights +cd ../../../ +``` + +3. Download deepsort parameters ckpt.t7 +``` +cd deep_sort/deep/checkpoint +# download ckpt.t7 from +https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder +cd ../../../ +``` + +4. Compile nms module +```bash +cd detector/YOLOv3/nms +sh build.sh +cd ../../.. +``` + +Notice: +If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`. + +5. Run demo +``` +usage: python yolov3_deepsort.py VIDEO_PATH + [--help] + [--frame_interval FRAME_INTERVAL] + [--config_detection CONFIG_DETECTION] + [--config_deepsort CONFIG_DEEPSORT] + [--display] + [--display_width DISPLAY_WIDTH] + [--display_height DISPLAY_HEIGHT] + [--save_path SAVE_PATH] + [--cpu] + +# yolov3 + deepsort +python yolov3_deepsort.py [VIDEO_PATH] + +# yolov3_tiny + deepsort +python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml + +# yolov3 + deepsort on webcam +python3 yolov3_deepsort.py /dev/video0 --camera 0 + +# yolov3_tiny + deepsort on webcam +python3 yolov3_deepsort.py /dev/video0 --config_detection ./configs/yolov3_tiny.yaml --camera 0 +``` +Use `--display` to enable display. +Results will be saved to `./output/results.avi` and `./output/results.txt`. + +All files above can also be accessed from BaiduDisk! +linker:[BaiduDisk](https://pan.baidu.com/s/1YJ1iPpdFTlUyLFoonYvozg) +passwd:fbuw + +## Training the RE-ID model +The original model used in paper is in original_model.py, and its parameter here [original_ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6). + +To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset. + +Then you can try [train.py](deep_sort/deep/train.py) to train your own parameter and evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py). +![train.jpg](deep_sort/deep/train.jpg) + +## Demo videos and images +[demo.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6) +[demo2.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6) + +![1.jpg](demo/1.jpg) +![2.jpg](demo/2.jpg) + + +## References +- paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402) + +- code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort) + +- paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) + +- code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/) diff --git a/asone/trackers/deep_sort/tracker/__init__.py b/asone/trackers/deep_sort/tracker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f850ca27472a7178dc6825a0eb15333974119ddd --- /dev/null +++ b/asone/trackers/deep_sort/tracker/__init__.py @@ -0,0 +1,14 @@ +from .deep_sort import DeepSORT +from .parser import get_config + +__all__ = ['DeepSORT', 'build_tracker'] + + +def build_tracker(weights, cfg, use_cuda=True): + # cfg = get_config() + # cfg.merge_from_file(cfg_deep) + + return DeepSORT(weights, + max_dist=cfg['MAX_DIST'], min_confidence=cfg['MIN_CONFIDENCE'], + nms_max_overlap=cfg['NMS_MAX_OVERLAP'], max_iou_distance=cfg['MAX_IOU_DISTANCE'], + max_age=cfg['MAX_AGE'], n_init=cfg['N_INIT'], nn_budget=cfg['NN_BUDGET'], use_cuda=use_cuda) diff --git a/asone/trackers/deep_sort/tracker/deep/__init__.py b/asone/trackers/deep_sort/tracker/deep/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/deep_sort/tracker/deep/evaluate.py b/asone/trackers/deep_sort/tracker/deep/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..a0458ace6993dcae9f820e076f8c5dcc62d592ca --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/evaluate.py @@ -0,0 +1,13 @@ +import torch + +features = torch.load("features.pth") +qf = features["qf"] +ql = features["ql"] +gf = features["gf"] +gl = features["gl"] + +scores = qf.mm(gf.t()) +res = scores.topk(5, dim=1)[1][:, 0] +top1correct = gl[res].eq(ql).sum().item() + +print("Acc top1:{:.3f}".format(top1correct / ql.size(0))) diff --git a/asone/trackers/deep_sort/tracker/deep/feature_extractor.py b/asone/trackers/deep_sort/tracker/deep/feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..a342cf5b6021dcc009ea7e4d35f6f28e298bda65 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/feature_extractor.py @@ -0,0 +1,54 @@ +import torch +import torchvision.transforms as transforms +import numpy as np +import cv2 +import logging + +from .model import Net + + +class Extractor(object): + def __init__(self, model_path, use_cuda=True): + self.net = Net(reid=True) + self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + state_dict = torch.load(model_path, map_location=torch.device(self.device))[ + 'net_dict'] + self.net.load_state_dict(state_dict) + logger = logging.getLogger("root.tracker") + logger.info("Loading weights from {}... Done!".format(model_path)) + self.net.to(self.device) + self.size = (64, 128) + self.norm = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]) + + def _preprocess(self, im_crops): + """ + TODO: + 1. to float with scale from 0 to 1 + 2. resize to (64, 128) as Market1501 dataset did + 3. concatenate to a numpy array + 3. to torch Tensor + 4. normalize + """ + def _resize(im, size): + return cv2.resize(im.astype(np.float32)/255., size) + + im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze( + 0) for im in im_crops], dim=0).float() + return im_batch + + def __call__(self, im_crops): + im_batch = self._preprocess(im_crops) + with torch.no_grad(): + im_batch = im_batch.to(self.device) + features = self.net(im_batch) + return features.cpu().numpy() + + +if __name__ == '__main__': + img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)] + extr = Extractor("checkpoint/ckpt.t7") + feature = extr(img) + print(feature.shape) diff --git a/asone/trackers/deep_sort/tracker/deep/model.py b/asone/trackers/deep_sort/tracker/deep/model.py new file mode 100644 index 0000000000000000000000000000000000000000..b99247489627df09276b52f6d47ef866e0e5bd4a --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/model.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out, is_downsample=False): + super(BasicBlock, self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self, x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y), True) + + +def make_layers(c_in, c_out, repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i == 0: + blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] + else: + blocks += [BasicBlock(c_out, c_out), ] + return nn.Sequential(*blocks) + + +class Net(nn.Module): + def __init__(self, num_classes=751, reid=False): + super(Net, self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3, 64, 3, stride=1, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + # nn.Conv2d(32,32,3,stride=1,padding=1), + # nn.BatchNorm2d(32), + # nn.ReLU(inplace=True), + nn.MaxPool2d(3, 2, padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(64, 64, 2, False) + # 32 64 32 + self.layer2 = make_layers(64, 128, 2, True) + # 64 32 16 + self.layer3 = make_layers(128, 256, 2, True) + # 128 16 8 + self.layer4 = make_layers(256, 512, 2, True) + # 256 8 4 + self.avgpool = nn.AvgPool2d((8, 4), 1) + # 256 1 1 + self.reid = reid + self.classifier = nn.Sequential( + nn.Linear(512, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(256, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + # B x 128 + if self.reid: + x = x.div(x.norm(p=2, dim=1, keepdim=True)) + return x + # classifier + x = self.classifier(x) + return x + + +if __name__ == '__main__': + net = Net() + x = torch.randn(4, 3, 128, 64) + y = net(x) + import ipdb + ipdb.set_trace() diff --git a/asone/trackers/deep_sort/tracker/deep/original_model.py b/asone/trackers/deep_sort/tracker/deep/original_model.py new file mode 100644 index 0000000000000000000000000000000000000000..27734ad52b3b02d815416d998bae145a93dbf519 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/original_model.py @@ -0,0 +1,111 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out, is_downsample=False): + super(BasicBlock, self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self, x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y), True) + + +def make_layers(c_in, c_out, repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i == 0: + blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] + else: + blocks += [BasicBlock(c_out, c_out), ] + return nn.Sequential(*blocks) + + +class Net(nn.Module): + def __init__(self, num_classes=625, reid=False): + super(Net, self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3, 32, 3, stride=1, padding=1), + nn.BatchNorm2d(32), + nn.ELU(inplace=True), + nn.Conv2d(32, 32, 3, stride=1, padding=1), + nn.BatchNorm2d(32), + nn.ELU(inplace=True), + nn.MaxPool2d(3, 2, padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(32, 32, 2, False) + # 32 64 32 + self.layer2 = make_layers(32, 64, 2, True) + # 64 32 16 + self.layer3 = make_layers(64, 128, 2, True) + # 128 16 8 + self.dense = nn.Sequential( + nn.Dropout(p=0.6), + nn.Linear(128*16*8, 128), + nn.BatchNorm1d(128), + nn.ELU(inplace=True) + ) + # 256 1 1 + self.reid = reid + self.batch_norm = nn.BatchNorm1d(128) + self.classifier = nn.Sequential( + nn.Linear(128, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + + x = x.view(x.size(0), -1) + if self.reid: + x = self.dense[0](x) + x = self.dense[1](x) + x = x.div(x.norm(p=2, dim=1, keepdim=True)) + return x + x = self.dense(x) + # B x 128 + # classifier + x = self.classifier(x) + return x + + +if __name__ == '__main__': + net = Net(reid=True) + x = torch.randn(4, 3, 128, 64) + y = net(x) + import ipdb + ipdb.set_trace() diff --git a/asone/trackers/deep_sort/tracker/deep/test.py b/asone/trackers/deep_sort/tracker/deep/test.py new file mode 100644 index 0000000000000000000000000000000000000000..0ba3050cb441e6419112604657797c78b6aa9b74 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/test.py @@ -0,0 +1,80 @@ +import torch +import torch.backends.cudnn as cudnn +import torchvision + +import argparse +import os + +from model import Net + +parser = argparse.ArgumentParser(description="Train on market1501") +parser.add_argument("--data-dir", default='data', type=str) +parser.add_argument("--no-cuda", action="store_true") +parser.add_argument("--gpu-id", default=0, type=int) +args = parser.parse_args() + +# device +device = "cuda:{}".format( + args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" +if torch.cuda.is_available() and not args.no_cuda: + cudnn.benchmark = True + +# data loader +root = args.data_dir +query_dir = os.path.join(root, "query") +gallery_dir = os.path.join(root, "gallery") +transform = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128, 64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize( + [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +queryloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(query_dir, transform=transform), + batch_size=64, shuffle=False +) +galleryloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(gallery_dir, transform=transform), + batch_size=64, shuffle=False +) + +# net definition +net = Net(reid=True) +assert os.path.isfile( + "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" +print('Loading from checkpoint/ckpt.t7') +checkpoint = torch.load("./checkpoint/ckpt.t7") +net_dict = checkpoint['net_dict'] +net.load_state_dict(net_dict, strict=False) +net.eval() +net.to(device) + +# compute features +query_features = torch.tensor([]).float() +query_labels = torch.tensor([]).long() +gallery_features = torch.tensor([]).float() +gallery_labels = torch.tensor([]).long() + +with torch.no_grad(): + for idx, (inputs, labels) in enumerate(queryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + query_features = torch.cat((query_features, features), dim=0) + query_labels = torch.cat((query_labels, labels)) + + for idx, (inputs, labels) in enumerate(galleryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + gallery_features = torch.cat((gallery_features, features), dim=0) + gallery_labels = torch.cat((gallery_labels, labels)) + +gallery_labels -= 2 + +# save features +features = { + "qf": query_features, + "ql": query_labels, + "gf": gallery_features, + "gl": gallery_labels +} +torch.save(features, "features.pth") diff --git a/asone/trackers/deep_sort/tracker/deep/train.jpg b/asone/trackers/deep_sort/tracker/deep/train.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3635a614738828b880aa862bc52423848ac8e472 Binary files /dev/null and b/asone/trackers/deep_sort/tracker/deep/train.jpg differ diff --git a/asone/trackers/deep_sort/tracker/deep/train.py b/asone/trackers/deep_sort/tracker/deep/train.py new file mode 100644 index 0000000000000000000000000000000000000000..67f475634cea1997212ee37917397134c5c4173b --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep/train.py @@ -0,0 +1,206 @@ +import argparse +import os +import time + +import numpy as np +import matplotlib.pyplot as plt +import torch +import torch.backends.cudnn as cudnn +import torchvision + +from model import Net + +parser = argparse.ArgumentParser(description="Train on market1501") +parser.add_argument("--data-dir", default='data', type=str) +parser.add_argument("--no-cuda", action="store_true") +parser.add_argument("--gpu-id", default=0, type=int) +parser.add_argument("--lr", default=0.1, type=float) +parser.add_argument("--interval", '-i', default=20, type=int) +parser.add_argument('--resume', '-r', action='store_true') +args = parser.parse_args() + +# device +device = "cuda:{}".format( + args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" +if torch.cuda.is_available() and not args.no_cuda: + cudnn.benchmark = True + +# data loading +root = args.data_dir +train_dir = os.path.join(root, "train") +test_dir = os.path.join(root, "test") +transform_train = torchvision.transforms.Compose([ + torchvision.transforms.RandomCrop((128, 64), padding=4), + torchvision.transforms.RandomHorizontalFlip(), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize( + [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +transform_test = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128, 64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize( + [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +trainloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(train_dir, transform=transform_train), + batch_size=64, shuffle=True +) +testloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(test_dir, transform=transform_test), + batch_size=64, shuffle=True +) +num_classes = max(len(trainloader.dataset.classes), + len(testloader.dataset.classes)) + +# net definition +start_epoch = 0 +net = Net(num_classes=num_classes) +if args.resume: + assert os.path.isfile( + "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" + print('Loading from checkpoint/ckpt.t7') + checkpoint = torch.load("./checkpoint/ckpt.t7") + # import ipdb; ipdb.set_trace() + net_dict = checkpoint['net_dict'] + net.load_state_dict(net_dict) + best_acc = checkpoint['acc'] + start_epoch = checkpoint['epoch'] +net.to(device) + +# loss and optimizer +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD( + net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) +best_acc = 0. + +# train function for each epoch + + +def train(epoch): + print("\nEpoch : %d" % (epoch+1)) + net.train() + training_loss = 0. + train_loss = 0. + correct = 0 + total = 0 + interval = args.interval + start = time.time() + for idx, (inputs, labels) in enumerate(trainloader): + # forward + inputs, labels = inputs.to(device), labels.to(device) + outputs = net(inputs) + loss = criterion(outputs, labels) + + # backward + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # accumurating + training_loss += loss.item() + train_loss += loss.item() + correct += outputs.max(dim=1)[1].eq(labels).sum().item() + total += labels.size(0) + + # print + if (idx+1) % interval == 0: + end = time.time() + print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( + 100.*(idx+1)/len(trainloader), end-start, training_loss / + interval, correct, total, 100.*correct/total + )) + training_loss = 0. + start = time.time() + + return train_loss/len(trainloader), 1. - correct/total + + +def test(epoch): + global best_acc + net.eval() + test_loss = 0. + correct = 0 + total = 0 + start = time.time() + with torch.no_grad(): + for idx, (inputs, labels) in enumerate(testloader): + inputs, labels = inputs.to(device), labels.to(device) + outputs = net(inputs) + loss = criterion(outputs, labels) + + test_loss += loss.item() + correct += outputs.max(dim=1)[1].eq(labels).sum().item() + total += labels.size(0) + + print("Testing ...") + end = time.time() + print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( + 100.*(idx+1)/len(testloader), end-start, test_loss / + len(testloader), correct, total, 100.*correct/total + )) + + # saving checkpoint + acc = 100.*correct/total + if acc > best_acc: + best_acc = acc + print("Saving parameters to checkpoint/ckpt.t7") + checkpoint = { + 'net_dict': net.state_dict(), + 'acc': acc, + 'epoch': epoch, + } + if not os.path.isdir('checkpoint'): + os.mkdir('checkpoint') + torch.save(checkpoint, './checkpoint/ckpt.t7') + + return test_loss/len(testloader), 1. - correct/total + + +# plot figure +x_epoch = [] +record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} +fig = plt.figure() +ax0 = fig.add_subplot(121, title="loss") +ax1 = fig.add_subplot(122, title="top1err") + + +def draw_curve(epoch, train_loss, train_err, test_loss, test_err): + global record + record['train_loss'].append(train_loss) + record['train_err'].append(train_err) + record['test_loss'].append(test_loss) + record['test_err'].append(test_err) + + x_epoch.append(epoch) + ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') + ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') + ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') + ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') + if epoch == 0: + ax0.legend() + ax1.legend() + fig.savefig("train.jpg") + +# lr decay + + +def lr_decay(): + global optimizer + for params in optimizer.param_groups: + params['lr'] *= 0.1 + lr = params['lr'] + print("Learning rate adjusted to {}".format(lr)) + + +def main(): + for epoch in range(start_epoch, start_epoch+40): + train_loss, train_err = train(epoch) + test_loss, test_err = test(epoch) + draw_curve(epoch, train_loss, train_err, test_loss, test_err) + if (epoch+1) % 20 == 0: + lr_decay() + + +if __name__ == '__main__': + main() diff --git a/asone/trackers/deep_sort/tracker/deep_sort.py b/asone/trackers/deep_sort/tracker/deep_sort.py new file mode 100644 index 0000000000000000000000000000000000000000..5092424cc33f4a8ff3368b4da81afd6d55d9eee0 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/deep_sort.py @@ -0,0 +1,115 @@ +import numpy as np +import torch + +from .deep.feature_extractor import Extractor +from .sort.nn_matching import NearestNeighborDistanceMetric +from .sort.detection import Detection +from .sort.tracker import Tracker + + +__all__ = ['DeepSORT'] + + +class DeepSORT(object): + def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): + self.min_confidence = min_confidence + self.nms_max_overlap = nms_max_overlap + + self.extractor = Extractor(model_path, use_cuda=use_cuda) + + max_cosine_distance = max_dist + metric = NearestNeighborDistanceMetric( + "cosine", max_cosine_distance, nn_budget) + self.tracker = Tracker( + metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) + + def update(self, bbox_xywh, confidences, oids, ori_img): + self.height, self.width = ori_img.shape[:2] + # generate detections + features = self._get_features(bbox_xywh, ori_img) + bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) + detections = [Detection(bbox_tlwh[i], conf, features[i], oid) for i, (conf,oid) in enumerate(zip(confidences,oids)) if conf > self.min_confidence] + + # print(detections) + # run on non-maximum supression + boxes = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + + # update tracker + self.tracker.predict() + self.tracker.update(detections) + # print("len(scores):", len(scores)) + # print("self.tracker.tracks",len(self.tracker.tracks)) + # output bbox identities + outputs = [] + for track in self.tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + box = track.to_tlwh() + x1, y1, x2, y2 = self._tlwh_to_xyxy(box) + track_id = track.track_id + track_oid = track.oid + outputs.append(np.array([x1, y1, x2, y2, track_id, track_oid], dtype=np.int)) + if len(outputs) > 0: + outputs = np.stack(outputs, axis=0) + return outputs + + """ + TODO: + Convert bbox from xc_yc_w_h to xtl_ytl_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + @staticmethod + def _xywh_to_tlwh(bbox_xywh): + if isinstance(bbox_xywh, np.ndarray): + bbox_tlwh = bbox_xywh.copy() + elif isinstance(bbox_xywh, torch.Tensor): + bbox_tlwh = bbox_xywh.clone() + bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. + bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. + return bbox_tlwh + + def _xywh_to_xyxy(self, bbox_xywh): + x, y, w, h = bbox_xywh + x1 = max(int(x - w / 2), 0) + x2 = min(int(x + w / 2), self.width - 1) + y1 = max(int(y - h / 2), 0) + y2 = min(int(y + h / 2), self.height - 1) + return x1, y1, x2, y2 + + def _tlwh_to_xyxy(self, bbox_tlwh): + """ + TODO: + Convert bbox from xtl_ytl_w_h to xc_yc_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + x, y, w, h = bbox_tlwh + x1 = max(int(x), 0) + x2 = min(int(x+w), self.width - 1) + y1 = max(int(y), 0) + y2 = min(int(y+h), self.height - 1) + return x1, y1, x2, y2 + + def increment_ages(self): + self.tracker.increment_ages() + + def _xyxy_to_tlwh(self, bbox_xyxy): + x1, y1, x2, y2 = bbox_xyxy + + t = x1 + l = y1 + w = int(x2 - x1) + h = int(y2 - y1) + return t, l, w, h + + def _get_features(self, bbox_xywh, ori_img): + im_crops = [] + for box in bbox_xywh: + x1, y1, x2, y2 = self._xywh_to_xyxy(box) + im = ori_img[y1:y2, x1:x2] + im_crops.append(im) + if im_crops: + features = self.extractor(im_crops) + else: + features = np.array([]) + return features diff --git a/asone/trackers/deep_sort/tracker/parser.py b/asone/trackers/deep_sort/tracker/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..449d1aaac85c917e223f61535e0f24bd9e197489 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/parser.py @@ -0,0 +1,41 @@ +import os +import yaml +from easydict import EasyDict as edict + + +class YamlParser(edict): + """ + This is yaml parser based on EasyDict. + """ + + def __init__(self, cfg_dict=None, config_file=None): + if cfg_dict is None: + cfg_dict = {} + + if config_file is not None: + assert(os.path.isfile(config_file)) + with open(config_file, 'r') as fo: + yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader) + cfg_dict.update(yaml_) + + super(YamlParser, self).__init__(cfg_dict) + + def merge_from_file(self, config_file): + with open(config_file, 'r') as fo: + yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader) + self.update(yaml_) + + def merge_from_dict(self, config_dict): + self.update(config_dict) + + +def get_config(config_file=None): + return YamlParser(config_file=config_file) + + +if __name__ == "__main__": + cfg = YamlParser(config_file="../configs/yolov3.yaml") + cfg.merge_from_file("../configs/deep_sort.yaml") + + import ipdb + ipdb.set_trace() diff --git a/asone/trackers/deep_sort/tracker/sort/__init__.py b/asone/trackers/deep_sort/tracker/sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/deep_sort/tracker/sort/detection.py b/asone/trackers/deep_sort/tracker/sort/detection.py new file mode 100644 index 0000000000000000000000000000000000000000..de29a7ac5d2d66aae328f2a5a0cd7ef822cc0403 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/detection.py @@ -0,0 +1,50 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Parameters + ---------- + tlwh : array_like + Bounding box in format `(x, y, w, h)`. + confidence : float + Detector confidence score. + feature : array_like + A feature vector that describes the object contained in this image. + + Attributes + ---------- + tlwh : ndarray + Bounding box in format `(top left x, top left y, width, height)`. + confidence : ndarray + Detector confidence score. + feature : ndarray | NoneType + A feature vector that describes the object contained in this image. + + """ + + def __init__(self, tlwh, confidence, feature, oid): + self.tlwh = np.asarray(tlwh, dtype=np.float) + self.confidence = float(confidence) + self.feature = np.asarray(feature, dtype=np.float32) + self.oid = oid + + def to_tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret diff --git a/asone/trackers/deep_sort/tracker/sort/iou_matching.py b/asone/trackers/deep_sort/tracker/sort/iou_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..62d5a3f63b70db5e322b6f8766444dd824c010ae --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/iou_matching.py @@ -0,0 +1,82 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import linear_assignment + + +def iou(bbox, candidates): + """Computer intersection over union. + + Parameters + ---------- + bbox : ndarray + A bounding box in format `(top left x, top left y, width, height)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + + """ + bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + + +def iou_cost(tracks, detections, track_indices=None, + detection_indices=None): + """An intersection over union distance metric. + + Parameters + ---------- + tracks : List[deep_sort.track.Track] + A list of tracks. + detections : List[deep_sort.detection.Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = linear_assignment.INFTY_COST + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray( + [detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou(bbox, candidates) + return cost_matrix diff --git a/asone/trackers/deep_sort/tracker/sort/kalman_filter.py b/asone/trackers/deep_sort/tracker/sort/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..787a76e6a43870a9538647b51fda6a5254ce2d43 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/kalman_filter.py @@ -0,0 +1,229 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + mean = np.dot(self._motion_mat, mean) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False): + """Compute gating distance between state distribution and measurements. + + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + cholesky_factor = np.linalg.cholesky(covariance) + d = measurements - mean + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha diff --git a/asone/trackers/deep_sort/tracker/sort/linear_assignment.py b/asone/trackers/deep_sort/tracker/sort/linear_assignment.py new file mode 100644 index 0000000000000000000000000000000000000000..858b71a4ae32ca39f03ff5d0ca0fdcc5963171b0 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/linear_assignment.py @@ -0,0 +1,192 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +# from sklearn.utils.linear_assignment_ import linear_assignment +from scipy.optimize import linear_sum_assignment as linear_assignment +from . import kalman_filter + + +INFTY_COST = 1e+5 + + +def min_cost_matching( + distance_metric, max_distance, tracks, detections, track_indices=None, + detection_indices=None): + """Solve linear assignment problem. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric( + tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + + row_indices, col_indices = linear_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in col_indices: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in row_indices: + unmatched_tracks.append(track_idx) + for row, col in zip(row_indices, col_indices): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade( + distance_metric, max_distance, cascade_depth, tracks, detections, + track_indices=None, detection_indices=None): + """Run matching cascade. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + cascade_depth: int + The cascade depth, should be se to the maximum track age. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [ + k for k in track_indices + if tracks[k].time_since_update == 1 + level + ] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix( + kf, cost_matrix, tracks, detections, track_indices, detection_indices, + gated_cost=INFTY_COST, only_position=False): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + + Returns + ------- + ndarray + Returns the modified cost matrix. + + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray( + [detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/asone/trackers/deep_sort/tracker/sort/nn_matching.py b/asone/trackers/deep_sort/tracker/sort/nn_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..21e5b4f478fead21d38227ce2eac34556bd1179e --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/nn_matching.py @@ -0,0 +1,176 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +def _pdist(a, b): + """Compute pair-wise squared distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + """ + a, b = np.asarray(a), np.asarray(b) + if len(a) == 0 or len(b) == 0: + return np.zeros((len(a), len(b))) + a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) + r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] + r2 = np.clip(r2, 0., float(np.inf)) + return r2 + + +def _cosine_distance(a, b, data_is_normalized=False): + """Compute pair-wise cosine distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + data_is_normalized : Optional[bool] + If True, assumes rows in a and b are unit length vectors. + Otherwise, a and b are explicitly normalized to lenght 1. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + """ + if not data_is_normalized: + a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) + b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) + return 1. - np.dot(a, b.T) + + +def _nn_euclidean_distance(x, y): + """ Helper function for nearest neighbor distance metric (Euclidean). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest Euclidean distance to a sample in `x`. + + """ + distances = _pdist(x, y) + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(x, y): + """ Helper function for nearest neighbor distance metric (cosine). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest cosine distance to a sample in `x`. + + """ + distances = _cosine_distance(x, y) + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + Parameters + ---------- + metric : str + Either "euclidean" or "cosine". + matching_threshold: float + The matching threshold. Samples with larger distance are considered an + invalid match. + budget : Optional[int] + If not None, fix samples per class to at most this number. Removes + the oldest samples when the budget is reached. + + Attributes + ---------- + samples : Dict[int -> List[ndarray]] + A dictionary that maps from target identities to the list of samples + that have been observed so far. + + """ + + def __init__(self, metric, matching_threshold, budget=None): + + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError( + "Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """Update the distance metric with new data. + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : ndarray + An integer array of associated target identities. + active_targets : List[int] + A list of targets that are currently present in the scene. + + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """Compute distance between features and targets. + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : List[int] + A list of targets to match the given `features` against. + + Returns + ------- + ndarray + Returns a cost matrix of shape len(targets), len(features), where + element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix diff --git a/asone/trackers/deep_sort/tracker/sort/preprocessing.py b/asone/trackers/deep_sort/tracker/sort/preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..5493b127f602dec398efac4269c00d31a3650ce9 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/preprocessing.py @@ -0,0 +1,73 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import cv2 + + +def non_max_suppression(boxes, max_bbox_overlap, scores=None): + """Suppress overlapping detections. + + Original code from [1]_ has been adapted to include confidence score. + + .. [1] http://www.pyimagesearch.com/2015/02/16/ + faster-non-maximum-suppression-python/ + + Examples + -------- + + >>> boxes = [d.roi for d in detections] + >>> scores = [d.confidence for d in detections] + >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) + >>> detections = [detections[i] for i in indices] + + Parameters + ---------- + boxes : ndarray + Array of ROIs (x, y, width, height). + max_bbox_overlap : float + ROIs that overlap more than this values are suppressed. + scores : Optional[array_like] + Detector confidence score. + + Returns + ------- + List[int] + Returns indices of detections that have survived non-maxima suppression. + + """ + if len(boxes) == 0: + return [] + + boxes = boxes.astype(np.float) + pick = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + boxes[:, 0] + y2 = boxes[:, 3] + boxes[:, 1] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + if scores is not None: + idxs = np.argsort(scores) + else: + idxs = np.argsort(y2) + + while len(idxs) > 0: + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + overlap = (w * h) / area[idxs[:last]] + + idxs = np.delete( + idxs, np.concatenate( + ([last], np.where(overlap > max_bbox_overlap)[0]))) + + return pick diff --git a/asone/trackers/deep_sort/tracker/sort/track.py b/asone/trackers/deep_sort/tracker/sort/track.py new file mode 100644 index 0000000000000000000000000000000000000000..7ac5eb7c40f5ea4ef445e96bd1074f6b9ab345f5 --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/track.py @@ -0,0 +1,170 @@ +# vim: expandtab:ts=4:sw=4 + + +class TrackState: + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + + """ + + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track: + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Parameters + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age : int + The maximum number of consecutive misses before the track state is + set to `Deleted`. + feature : Optional[ndarray] + Feature vector of the detection this track originates from. If not None, + this feature is added to the `features` cache. + + Attributes + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + hits : int + Total number of measurement updates. + age : int + Total number of frames since first occurance. + time_since_update : int + Total number of frames since last measurement update. + state : TrackState + The current track state. + features : List[ndarray] + A cache of features. On each measurement update, the associated feature + vector is added to this list. + + """ + + def __init__(self, mean, covariance, track_id, n_init, max_age, oid, + feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.oid = oid + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + + self.state = TrackState.Tentative + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get current position in bounding box format `(min x, miny, max x, + max y)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def increment_age(self): + self.age += 1 + self.time_since_update += 1 + + def predict(self, kf): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + + """ + self.mean, self.covariance = kf.predict(self.mean, self.covariance) + self.increment_age() + + def update(self, kf, detection): + """Perform Kalman filter measurement update step and update the feature + cache. + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + detection : Detection + The associated detection. + + """ + self.mean, self.covariance = kf.update( + self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed). + """ + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/asone/trackers/deep_sort/tracker/sort/tracker.py b/asone/trackers/deep_sort/tracker/sort/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..f47759280a32fe5a1d4f375f4b1b7c921fbb9afe --- /dev/null +++ b/asone/trackers/deep_sort/tracker/sort/tracker.py @@ -0,0 +1,145 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import kalman_filter +from . import linear_assignment +from . import iou_matching +from .track import Track + + +class Tracker: + """ + This is the multi-target tracker. + + Parameters + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + A distance metric for measurement-to-track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + + Attributes + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + The distance metric used for measurement to track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of frames that a track remains in initialization phase. + kf : kalman_filter.KalmanFilter + A Kalman filter to filter target trajectories in image space. + tracks : List[Track] + The list of active tracks at the current time step. + + """ + + def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): + self.metric = metric + self.max_iou_distance = max_iou_distance + self.max_age = max_age + self.n_init = n_init + + self.kf = kalman_filter.KalmanFilter() + self.tracks = [] + self._next_id = 1 + + def predict(self): + """Propagate track state distributions one time step forward. + + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.kf) + + def increment_ages(self): + for track in self.tracks: + track.increment_age() + track.mark_missed() + + def update(self, detections): + """Perform measurement update and track management. + + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = \ + self._match(detections) + + # Update track set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update(self.kf, detections[detection_idx]) + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + self.tracks = [t for t in self.tracks if not t.is_deleted()] + # print("LEN self.tracks", len(self.tracks)) + # for t in self.tracks: + # if not t.is_deleted(): + # print(t.__dict__) + + # Update distance metric. + active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] + # print(active_targets) + features, targets, oids = [], [], [] + for track in self.tracks: + if not track.is_confirmed(): + continue + features += track.features + targets += [track.track_id for _ in track.features] + # oids += track.oid + track.features = [] + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets) + + def _match(self, detections): + + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feature for i in detection_indices]) + targets = np.array([tracks[i].track_id for i in track_indices]) + cost_matrix = self.metric.distance(features, targets) + cost_matrix = linear_assignment.gate_cost_matrix( + self.kf, cost_matrix, tracks, dets, track_indices, + detection_indices) + + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + confirmed_tracks = [ + i for i, t in enumerate(self.tracks) if t.is_confirmed()] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed()] + + # Associate confirmed tracks using appearance features. + matches_a, unmatched_tracks_a, unmatched_detections = linear_assignment.matching_cascade(gated_metric, self.metric.matching_threshold, self.max_age, self.tracks, detections, confirmed_tracks) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update == 1] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update != 1] + matches_b, unmatched_tracks_b, unmatched_detections = \ + linear_assignment.min_cost_matching( + iou_matching.iou_cost, self.max_iou_distance, self.tracks, + detections, iou_track_candidates, unmatched_detections) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + mean, covariance = self.kf.initiate(detection.to_xyah()) + self.tracks.append(Track( + mean, covariance, self._next_id, self.n_init, self.max_age, detection.oid, + detection.feature)) + self._next_id += 1 diff --git a/asone/trackers/nor_fair/__init__.py b/asone/trackers/nor_fair/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/asone/trackers/nor_fair/norfair.py b/asone/trackers/nor_fair/norfair.py new file mode 100644 index 0000000000000000000000000000000000000000..771812852f18dbfef844b40e3e17f4b6fc4ae83b --- /dev/null +++ b/asone/trackers/nor_fair/norfair.py @@ -0,0 +1,61 @@ +from norfair import Detection, Tracker +import numpy as np + + +class NorFair: + def __init__(self, detector, max_distance_between_points=30) -> None: + + self.tracker = Tracker( + distance_function=self._euclidean_distance, + distance_threshold=max_distance_between_points, + ) + self.detector = detector + try: + self.input_shape = tuple(detector.model.get_inputs()[0].shape[2:]) + except AttributeError as e: + self.input_shape = (640, 640) + + def _euclidean_distance(self, detection, tracked_object): + return np.linalg.norm(detection.points - tracked_object.estimate) + + def detect_and_track(self, image: np.ndarray, config: dict) -> tuple: + + _dets_xyxy, image_info = self.detector.detect( + image, **config + ) + + class_ids = [] + ids = [] + bboxes_xyxy = [] + scores = [] + + if isinstance(_dets_xyxy, np.ndarray) and len(_dets_xyxy) > 0: + + dets_xyxy = [ + Detection( + np.array([(box[2] + box[0])/2, (box[3] + box[1])/2]), data=box) + for box in _dets_xyxy + # if box[-1] == 2 + ] + + bboxes_xyxy, ids, scores, class_ids = self._tracker_update( + dets_xyxy, image_info) + + return bboxes_xyxy, ids, scores, class_ids + + def _tracker_update(self, dets_xyxy: list, image_info: dict): + + bboxes_xyxy = [] + class_ids = [] + scores = [] + ids = [] + + tracked_objects = self.tracker.update(detections=dets_xyxy) + + for obj in tracked_objects: + det = obj.last_detection.data + bboxes_xyxy.append(det[:4]) + class_ids.append(int(det[-1])) + scores.append(int(det[-2])) + ids.append(obj.id) + return np.array(bboxes_xyxy), ids, scores, class_ids diff --git a/asone/trackers/tracker.py b/asone/trackers/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..15292d86cf83da509eaa17e29a55d6db55ec66db --- /dev/null +++ b/asone/trackers/tracker.py @@ -0,0 +1,32 @@ +from asone.trackers import ByteTrack +from asone.trackers import NorFair +from asone.trackers import DeepSort + +class Tracker: + def __init__(self, tracker: int, detector: object, use_cuda=True) -> None: + + self.trackers = { + '0': ByteTrack, + '1': DeepSort, + '2': NorFair + } + + self.tracker = self._select_tracker(tracker, detector, use_cuda=use_cuda) + + def _select_tracker(self, tracker, detector, use_cuda): + _tracker = self.trackers.get(str(tracker), None) + + if _tracker is not None: + if _tracker is DeepSort: + return _tracker(detector, use_cuda=use_cuda) + else: + return _tracker(detector) + else: + raise ValueError(f'Invalid tracker: {tracker}') + + def detect_and_track(self, image, config: dict): + + return self.tracker.detect_and_track(image, config) + + def get_tracker(self): + return self.tracker \ No newline at end of file diff --git a/asone/utils/__init__.py b/asone/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fc009db0b8def159d3fa094008eea087c13272dc --- /dev/null +++ b/asone/utils/__init__.py @@ -0,0 +1,8 @@ +from asone.utils.classes import get_names +from asone.utils.download import download_weights +from asone.utils.colors import compute_color_for_labels +from asone.utils.counting import estimateSpeed, intersect +from asone.utils.ponits_conversion import xyxy_to_tlwh, xyxy_to_xywh, tlwh_to_xyxy +from asone.utils.temp_loader import get_detector, get_tracker + +from asone.utils.draw import draw_boxes \ No newline at end of file diff --git a/asone/utils/classes.py b/asone/utils/classes.py new file mode 100644 index 0000000000000000000000000000000000000000..e585120917e9531eeab1f3b03cbf53ab876d1a55 --- /dev/null +++ b/asone/utils/classes.py @@ -0,0 +1,18 @@ + +# names = [] +# with open('classes.txt') as f: +# names.append(f.read()) + +global names +names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] + +def get_names(): + return names \ No newline at end of file diff --git a/asone/utils/colors.py b/asone/utils/colors.py new file mode 100644 index 0000000000000000000000000000000000000000..8b01c388c491a37ceef34a037462f201ecdd2715 --- /dev/null +++ b/asone/utils/colors.py @@ -0,0 +1,24 @@ +from numpy import random +from asone.utils import get_names +names = get_names() + +colors = [[random.randint(0, 255) for _ in range(3)] + for _ in range(len(names))] +palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) + + +def compute_color_for_labels(label): + """ + Simple function that adds fixed color depending on the class + """ + if label == 0: # person #BGR + color = (85, 45, 255) + elif label == 2: # Car + color = (222, 82, 175) + elif label == 3: # Motobike + color = (0, 204, 255) + elif label == 5: # Bus + color = (0, 149, 255) + else: + color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] + return tuple(color) diff --git a/asone/utils/counting.py b/asone/utils/counting.py new file mode 100644 index 0000000000000000000000000000000000000000..31c0d979edfb6cdb1c79cdadef5481a890ebdeb9 --- /dev/null +++ b/asone/utils/counting.py @@ -0,0 +1,21 @@ +import math + +def estimateSpeed(location1, location2): + + d_pixels = math.sqrt(math.pow( + location2[0] - location1[0], 2) + math.pow(location2[1] - location1[1], 2)) + ppm = 8 # Pixels per Meter + d_meters = d_pixels / ppm + time_constant = 15 * 3.6 + speed = d_meters * time_constant + return speed + +# Return true if line segments AB and CD intersect + + +def intersect(A, B, C, D): + return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) + + +def ccw(A, B, C): + return (C[1]-A[1]) * (B[0]-A[0]) > (B[1]-A[1]) * (C[0]-A[0]) diff --git a/asone/utils/default_cfg.py b/asone/utils/default_cfg.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5462d28441b6df72f8c21b921cc86c324ae03b --- /dev/null +++ b/asone/utils/default_cfg.py @@ -0,0 +1,16 @@ +config = { + "output_dir": "results", + "filename": None, + "fps": None, + "save_result": True, + "display": True, + "draw_trails": False, + "filter_classes": None, + "class_names": None, + "input_shape" : (640, 640), + "conf_thres": 0.25, + "iou_thres" : 0.45, + "max_det" : 1000, + "agnostic_nms" : False, + "with_p6" : False +} diff --git a/asone/utils/download.py b/asone/utils/download.py new file mode 100644 index 0000000000000000000000000000000000000000..4d0265b416b75b5116b05335fd01b203e5134ea9 --- /dev/null +++ b/asone/utils/download.py @@ -0,0 +1,113 @@ +import gdown +import os +import zipfile + + +def exractfile(file, dest): + with zipfile.ZipFile(file, 'r') as zip_ref: + zip_ref.extractall(dest) + + +def download_weights(weights): + + outputpath = os.path.dirname(weights) + model = os.path.splitext(os.path.basename(weights))[0] + filename = f'{model}.zip' + + if model == 'yolov5s': + model_key = '1H7G8ryDXs6bKlK2Qot7-2uIkjEYoYook' + elif model == 'yolov5x6': + model_key = '161bThpOB4HDqrh2FXvbFZJmiSKFwS_Wb' + elif model == 'yolov5n': + model_key = '1zI4f0AUHAz-fTE_fP7UyiFSRGBYYXd7x' + elif model == 'yolov5m': + model_key = '1vy8S68wbUzKSHMhsTuLN-VA7lMzKchAa' + elif model == 'yolov5l': + model_key = '1pQL9s0o3v6CycAgAX8SkxCfordUl5IxZ' + elif model == 'yolov5x': + model_key = '1iB7MQ1IP3MVKLMF8TIJ44vtv9cjWC2qH' + elif model == 'yolov5n6': + model_key = '1YxnRYlPcCqXGbX20kPlfSimNfROKwoJH' + elif model == 'yolov5s6': + model_key = '1mm5zY6IpPtM7IZh_X5x0kAxuO7INKyte' + elif model == 'yolov5m6': + model_key = '1qv_uan5oNq9skcg1UThfaFs0xMs2mSE2' + elif model == 'yolov5l6': + model_key = '1eaM51cIh8i_EXmg6Nf0Sx2uW53pT7wZR' + elif model == 'yolov6n': + model_key = '1NA_u4BkPE_N8HcPmZrd7HyLmvFHOk8qd' + elif model == 'yolov6t': + model_key = '16OWncBp-vh-sLDMOR58th3WOGv4envQ1' + elif model == 'yolov6s': + model_key = '14BE0j654ClLxMq2ySWZNhTCmf48mLyXi' + elif model == 'yolov6l_relu': + model_key = '14UfY057QUQoAj6q39PX_qE7U1bBIrIGi' + elif model == 'yolov6l': + model_key = '1HdRIs0uMPbqs5E2aEX8O3d3dJTh-KBTf' + elif model == 'yolov6m': + model_key = '1t_w9SCwbZAW7icwX_z97-SQz-plXzBgM' + elif model == 'yolov6s_repopt': + model_key = '1L_1Crxx-4059xDDUZEf_asWRBVd3PF05' + elif model == 'yolov7-e6e': + model_key = '1rQR5KiSJiWtpHEniAyeBQdpXFb7Wv1UT' + elif model == 'yolov7-d6': + model_key = '1idAyjdq9pVsgkDCCfADbGOjxGq4TPulB' + elif model == 'yolov7': + model_key = '10XNOpBAmMrYqmXOsJLl79MGtuGWY2zAl' + elif model == 'yolov7-tiny': + model_key = '1ut2doFvtQSKGjiHGPBsEItZlTTj-7_rF' + elif model == 'yolov7-e6': + model_key = '1E9pow2PFcvil0iqRx2tRCI4HLduh9gp0' + elif model == 'yolov7-w6': + model_key = '1B8j9XMZxGxz8kpsqJhKXuk1TE_244n6t' + elif model == 'yolov7x': + model_key = '1FiGLXG6_3He21ean4bFET471Wrj-3oc3' + elif model == 'yolor_csp': + model_key = '1G3FBZKrznW_64mGfs6b3nAJiJv6GmmV0' + elif model == 'yolor_csp_star': + model_key = '15WDl46ZthFGZfpOyI3qXx6gC9FQLH_wH' + elif model == 'yolor_csp_x': + model_key = '1LU2ckh7eSpVD0nyPSdq1n34lKmNAX39T' + elif model == 'yolor_csp_x_star': + model_key = '1jheqFDm7BpHQpR60wuWSBpbuyK5SoKdV' + elif model == 'yolor_p6': + model_key = '1XKREKdxQCO8OXiW2IWGFhczolIaIr9sm' + elif model == 'yolox_l': + model_key = '1jX1KHerOdZ5-dmXh6cWcRAn80aKD-7sP' + elif model == 'yolox_nano': + model_key = '1783Os6uTpYjunL-MfK0WE1Wcwk58fIUi' + elif model == 'yolox_tiny': + model_key = '1Lcv1ITvfPdWsu6Kb8Hq6cOSfJE7lbbf2' + elif model == 'yolox_darknet': + model_key = '17f4UI06TWJ25Oqo2OoQGu8AoGVX1lPta' + elif model == 'yolox_s': + model_key = '1IUAOv62XuwkwwTCVD2y3xJg7KUA3M0-M' + elif model == 'yolox_m': + model_key = '1ktHj8UEwl0V8Qz9G74E-yj-o13FLeD0-' + elif model == 'yolox_x': + model_key = '13HNnlILCx_XamNJWwJ1MG5x0XfP6HL1U' + elif model == 'ckpt': + model_key = '1VZ05gzg249Q1m8BJVQxl3iHoNIbjzJf8' + + elif model == 'yolov8s': + model_key = '1rokjGeiLlLSNugd6LuGQj6Yr_i5_XH3Y' + elif model == 'yolov8n': + model_key = '1JslnzKzY7bHRQWiLfvcINteIOgyOv_oU' + elif model == 'yolov8l': + model_key = '1Zlp3e9gBQtgt76SHWpRNdEw4rXeT4GxE' + elif model == 'yolov8m': + model_key = '1ijE_fou-U-UJb4xRspFK0OnPfsLcuw3U' + elif model == 'yolov8x': + model_key = '1vtkXtgSLG49l-mh8zzgF9xZdM1ZuRldI' + + else: + raise ValueError(f'No model named {model} found.') + + url = f'https://drive.google.com/uc?id={model_key}&confirm=t' + gdown.download(url, output=filename, quiet=False) + + if not os.path.exists(outputpath): + os.makedirs(outputpath) + + exractfile(filename, outputpath) + os.remove(filename) diff --git a/asone/utils/draw.py b/asone/utils/draw.py new file mode 100644 index 0000000000000000000000000000000000000000..51d067c0ab942b151820f1c28f21f7fee215f10f --- /dev/null +++ b/asone/utils/draw.py @@ -0,0 +1,121 @@ +import cv2 +from numpy import random +import numpy as np +from asone.utils import compute_color_for_labels +from asone.utils import get_names +from collections import deque + +names = get_names() +data_deque = {} + + +def draw_ui_box(x, img, label=None, color=None, line_thickness=None): + # Plots one bounding box on image img + tl = line_thickness or round( + 0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(str(label), 0, fontScale=tl / 3, thickness=tf)[0] + # c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + img = draw_border(img, (c1[0], c1[1] - t_size[1] - 3), + (c1[0] + t_size[0], c1[1]+3), color, 1, 8, 2) + + # cv2.line(img, c1, c2, color, 30) + # cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, str(label), (c1[0], c1[1] - 2), 0, tl / 3, + [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + + +def draw_border(img, pt1, pt2, color, thickness, r, d): + x1, y1 = pt1 + x2, y2 = pt2 + # Top leftfrom collections import deque (x1, y1 + r + d), color, thickness) + cv2.ellipse(img, (x1 + r, y1 + r), (r, r), 180, 0, 90, color, thickness) + + # Top right + cv2.line(img, (x2 - r, y1), (x2 - r - d, y1), color, thickness) + cv2.line(img, (x2, y1 + r), (x2, y1 + r + d), color, thickness) + cv2.ellipse(img, (x2 - r, y1 + r), (r, r), 270, 0, 90, color, thickness) + # Bottom left + cv2.line(img, (x1 + r, y2), (x1 + r + d, y2), color, thickness) + cv2.line(img, (x1, y2 - r), (x1, y2 - r - d), color, thickness) + cv2.ellipse(img, (x1 + r, y2 - r), (r, r), 90, 0, 90, color, thickness) + # Bottom right + cv2.line(img, (x2 - r, y2), (x2 - r - d, y2), color, thickness) + cv2.line(img, (x2, y2 - r), (x2, y2 - r - d), color, thickness) + cv2.ellipse(img, (x2 - r, y2 - r), (r, r), 0, 0, 90, color, thickness) + + cv2.rectangle(img, (x1 + r, y1), (x2 - r, y2), color, -1, cv2.LINE_AA) + cv2.rectangle(img, (x1, y1 + r), (x2, y2 - r - d), color, -1, cv2.LINE_AA) + + cv2.circle(img, (x1 + r, y1+r), 2, color, 12) + cv2.circle(img, (x2 - r, y1+r), 2, color, 12) + cv2.circle(img, (x1 + r, y2-r), 2, color, 12) + cv2.circle(img, (x2 - r, y2-r), 2, color, 12) + + return img + + +def draw_boxes(img, bbox_xyxy, class_ids, identities=None, draw_trails=False, offset=(0, 0), class_names=None): + # cv2.line(img, line2[0], line2[1], (0,200,0), 3) + height, width, _ = img.shape + + # remove tracked point from buffer if object is lost + if draw_trails: + for key in list(data_deque): + if key not in identities: + data_deque.pop(key) + + for i, box in enumerate(bbox_xyxy): + x1, y1, x2, y2 = [int(i) for i in box] + x1 += offset[0] + x2 += offset[0] + y1 += offset[1] + y2 += offset[1] + + # get ID of object + id = int(identities[i]) if identities is not None else None + + # if class_ids is not None: + color = compute_color_for_labels(int(class_ids[i])) + + if class_names: + obj_name = class_names[int(class_ids[i])] + else: + obj_name = names[int(class_ids[i])] + + label = f'{obj_name}' if id is None else f'{id}' + + draw_ui_box(box, img, label=label, color=color, line_thickness=2) + + # Draw trails + # code to find center of bottom edge + center = (int((x2+x1) / 2), int((y2+y2)/2)) + + if draw_trails: + # create new buffer for new object + if id not in data_deque: + data_deque[id] = deque(maxlen= 64) + + data_deque[id].appendleft(center) + drawtrails(data_deque, id, color, img) + + return img + +def drawtrails(data_deque, id, color, img): + # draw trail + for i in range(1, len(data_deque[id])): + # check if on buffer value is none + if data_deque[id][i - 1] is None or data_deque[id][i] is None: + continue + + # generate dynamic thickness of trails + thickness = int(np.sqrt(64 / float(i + i)) * 1.5) + + # draw trails + cv2.line(img, data_deque[id][i - 1], data_deque[id][i], color, thickness) + diff --git a/asone/utils/ponits_conversion.py b/asone/utils/ponits_conversion.py new file mode 100644 index 0000000000000000000000000000000000000000..681c4f717bbca366aba32d1a406efca6ed87998b --- /dev/null +++ b/asone/utils/ponits_conversion.py @@ -0,0 +1,33 @@ +def xyxy_to_xywh(xyxy): + """" Calculates the relative bounding box from absolute pixel values. """ + bbox_left = min([xyxy[0], xyxy[2]]) + bbox_top = min([xyxy[1], xyxy[3]]) + bbox_w = abs(xyxy[0] - xyxy[2]) + bbox_h = abs(xyxy[1] - xyxy[3]) + x_c = (bbox_left + bbox_w / 2) + y_c = (bbox_top + bbox_h / 2) + w = bbox_w + h = bbox_h + return [x_c, y_c, w, h] + + +def tlwh_to_xyxy(tlwh): + """" Convert tlwh to xyxy """ + x1 = tlwh[0] + y1 = tlwh[1] + x2 = tlwh[2] + x1 + y2 = tlwh[3] + y1 + return [x1, y1, x2, y2] + + +def xyxy_to_tlwh(bbox_xyxy): + tlwh_bboxs = [] + for i, box in enumerate(bbox_xyxy): + x1, y1, x2, y2 = [int(i) for i in box] + top = x1 + left = y1 + w = int(x2 - x1) + h = int(y2 - y1) + tlwh_obj = [top, left, w, h] + tlwh_bboxs.append(tlwh_obj) + return tlwh_bboxs diff --git a/asone/utils/temp_loader.py b/asone/utils/temp_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..b50c2fa37ca3720164aca87ac20d67dc69ba7615 --- /dev/null +++ b/asone/utils/temp_loader.py @@ -0,0 +1,34 @@ +from asone.detectors import YOLOv5Detector +from asone.detectors import YOLOv7Detector +from asone.trackers import ByteTrack +from asone.trackers import NorFair +from asone.trackers import DeepSort + +detectors = { + 'yolov5s': YOLOv5Detector, + 'yolov7': YOLOv7Detector +} + +trackers = { + 'byte_track': ByteTrack, + 'norfair': NorFair, + 'deepsort': DeepSort +} + + +def get_detector(detector, use_cuda=True, use_onnx=False): + detector = detectors.get(detector, None) + + if detector is not None: + return detector(use_cuda=use_cuda, use_onnx=use_onnx) + else: + return None + + +def get_tracker(tracker, detector, use_cuda=True, use_onnx=False): + tracker = trackers.get(tracker, None) + + if tracker is not None: + return tracker(detector) + else: + return None diff --git a/asone/windows/README.md b/asone/windows/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ad0c40e80167a34e822be65bac2af19d2383644c --- /dev/null +++ b/asone/windows/README.md @@ -0,0 +1,139 @@ +# ASOne +## Docker Installation Instructions For Windows +#### Table of Contents +- [System Requirements](#system-requirements) +- [Installation with Batch and Configuring Devices](#installation-with-batch-and-configuring-devices) + + + + + + + +### System Requirements +Windows machine must meet the following requirements to successfully install the docker: + +**With WSL 2 backend**
+Type **winver** in RUN to check the version of the installed windows. + +- Windows 11 64-bit: Home or Pro version 21H2 or higher,\ + or Enterprise or Education version 21H2 or higher +- Windows 10 64-bit: Home or Pro 21H1 (build 19043) or higher,\ + or Enterprise or Education 20H2 (build 19042) or higher +- Enable the WSL 2 feature on Windows. For detailed instructions,\ + refer to the [wsl installation](https://docs.microsoft.com/en-us/windows/wsl/install) + + **Hardware Requirements to run WSL 2 on Windows 10 or Windows 11**
+ +- 64-bit processor with Second Level Address Translation (SLAT) + +- 4GB system RAM + +- BIOS-level hardware virtualization support must be enabled in the \ + BIOS settings. For more information, see [Virtualization](https://docs.docker.com/desktop/troubleshoot/topics/) + +## Installation with Batch and Configuring Devices +1. Download the [enable_feature.bat](enable_feature.bat) and run it as administrator. +- Reboot your system. +2. Download the [installation.bat](installation.bat) and run it as administrator. +- Again Reboot your system. +3. Open XLaunch and select Multiple windows +- Select the option Start no client +- In Extra Settings, select the option + 1. Clipboard + 2. Primary Selection + 3. Native opengl + 4. Disable access control +- Save configuration file for later use +4. Open [cam2ip.exe](cam2ip-1.6-64bit-cv/cam2ip.exe) see "Listening on: 56000" + - IP stream will be on `http://localhost:56000/mjpeg` + + You can now go back to [Installation Page](../README.md). + + + + + + + + diff --git a/asone/windows/enable_feature.bat b/asone/windows/enable_feature.bat new file mode 100644 index 0000000000000000000000000000000000000000..a6f937818909f43aeff68cda749188701cd8b718 --- /dev/null +++ b/asone/windows/enable_feature.bat @@ -0,0 +1,8 @@ +@echo off +@"%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" -NoProfile -InputFormat None -ExecutionPolicy Bypass -Command "[System.Net.ServicePointManager]::SecurityProtocol = 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))" && SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" +powershell.exe dism.exe /online /enable-feature /featurename:VirtualMachinePlatform /all /norestart +powershell.exe dism.exe /online /enable-feature /featurename:Microsoft-Windows-Subsystem-Linux /all /norestart +choco feature enable -name=exitOnRebootDetected +choco feature enable -n allowGlobalConfirmation +echo "REBOOT YOUR SYSTEM" +pause \ No newline at end of file diff --git a/asone/windows/installation.bat b/asone/windows/installation.bat new file mode 100644 index 0000000000000000000000000000000000000000..4ef20998d0d78de52dcde8a69bae201a441a4be0 --- /dev/null +++ b/asone/windows/installation.bat @@ -0,0 +1,8 @@ +@echo on +choco install wsl2 --params "/Version:2 /Retry:true" +wsl --set-default-version 2 +choco install docker-desktop +choco install vcxsrv +for /F "tokens=14" %i in ('"ipconfig | findstr IPv4 | findstr /i "192""') do setx DISPLAY %i:0.0 +echo "REBOOT YOUR SYSTEM" +Pause diff --git a/asone/windows/sample/Inside Ultralytics.mp4 b/asone/windows/sample/Inside Ultralytics.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..25fb12f5aef01b544dbaff8f12f105ca12051968 --- /dev/null +++ b/asone/windows/sample/Inside Ultralytics.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf50725012c206a6a2f3b9608ae46aed9c61c01631faf5d62bfb59b77f60914 +size 39493992 diff --git a/asone/windows/test-display.py b/asone/windows/test-display.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2dc3973489e4e9a2300faf036ca2de23a63b93 --- /dev/null +++ b/asone/windows/test-display.py @@ -0,0 +1,6 @@ +import cv2 + +img = cv2.imread('test.jpg') +cv2.imshow("TEST DISPLAY", cv2.resize(img, (600, 600)) +) +cv2.waitKey(0) \ No newline at end of file diff --git a/asone/windows/test-webcam.py b/asone/windows/test-webcam.py new file mode 100644 index 0000000000000000000000000000000000000000..4655458cb95825c719d73507129c3b845f4f3ad5 --- /dev/null +++ b/asone/windows/test-webcam.py @@ -0,0 +1,34 @@ +import cv2 +import numpy as np + +img = np.zeros((400, 600, 3)) + +##Fetch IP Address +# importing socket module +import socket +# getting the hostname by socket.gethostname() method +hostname = socket.gethostname() +# getting the IP address using socket.gethostbyname() method +ip_address = socket.gethostbyname(hostname) +# capturing the video from ip stream +cap = cv2.VideoCapture(f'http://{ip_address}:56000/mjpeg') +# cap.open("") + +while True: + if not cap.isOpened(): + print('Unable to load camera. Use the command "xhost +"') + pass + + # Capture frame-by-frame + ret, frame = cap.read() + print(frame) + # Display the resulting frame + cv2.imshow('Video', frame) + + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +# When everything is done, release the capture +cap.release() +cv2.destroyAllWindows() diff --git a/asone/windows/test.jpg b/asone/windows/test.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0315b5976ff4ea8cdc3ef9ce54f3264b9794525b Binary files /dev/null and b/asone/windows/test.jpg differ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..393be28e4f618ea77228da6b2983507f7e37a852 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,63 @@ +version: "3.9" +services: + linux: + build: . + image: "asone:latest" + volumes: + - ${PWD}:/workspace + - $HOME/.Xauthority:/root/.Xauthority:rw + - /dev/video0:/dev/video0 + network_mode: host + restart: always + privileged: true + environment: + DISPLAY: $DISPLAY + + linux-gpu: + build: . + image: "asone:latest" + volumes: + - ${PWD}:/workspace + - $HOME/.Xauthority:/root/.Xauthority:rw + - /dev/video0:/dev/video0 + network_mode: host + restart: always + privileged: true + environment: + DISPLAY: $DISPLAY + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + + windows: + build: . + image: "asone:latest" + volumes: + - ${PWD}:/workspace + # - $HOME/.Xauthority:/root/.Xauthority:rw + # - /dev/video0:/dev/video0 + network_mode: host + restart: always + privileged: true + environment: + DISPLAY: $DISPLAY + + windows-gpu: + build: . + image: "asone:latest" + volumes: + - ${PWD}:/workspace + # - $HOME/.Xauthority:/root/.Xauthority:rw + # - /dev/video0:/dev/video0 + network_mode: host + restart: always + privileged: true + environment: + DISPLAY: $DISPLAY + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..b6761da3d713cf8d3ca69bbcafa12a4832e15ffb --- /dev/null +++ b/main.py @@ -0,0 +1,54 @@ +import argparse +import asone +from asone import ASOne + +def main(args): + filter_classes = args.filter_classes + + if filter_classes: + filter_classes = [filter_classes] + + dt_obj = ASOne( + tracker=asone.BYTETRACK, + detector=asone.YOLOV7_PYTORCH, + weights=args.weights, + use_cuda=args.use_cuda + ) + # Get tracking function + track_fn = dt_obj.track_video(args.video_path, + output_dir=args.output_dir, + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + display=args.display, + draw_trails=args.draw_trails, + filter_classes=filter_classes, + class_names=None) # class_names=['License Plate'] for custom weights + + # Loop over track_fn to retrieve outputs of each frame + for bbox_details, frame_details in track_fn: + bbox_xyxy, ids, scores, class_ids = bbox_details + frame, frame_num, fps = frame_details + print(frame_num) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('video_path', help='Path to input video') + parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', + help='run on cpu if not provided the program will run on gpu.') + parser.add_argument('--no_save', default=True, action='store_false', + dest='save_result', help='whether or not save results') + parser.add_argument('--no_display', default=True, action='store_false', + dest='display', help='whether or not display results on screen') + parser.add_argument('--output_dir', default='data/results', help='Path to output directory') + parser.add_argument('--draw_trails', action='store_true', default=False, + help='if provided object motion trails will be drawn.') + parser.add_argument('--filter_classes', default=None, help='Filter class name') + parser.add_argument('-w', '--weights', default=None, help='Path of trained weights') + parser.add_argument('-ct', '--conf_thres', default=0.25, type=float, help='confidence score threshold') + parser.add_argument('-it', '--iou_thres', default=0.45, type=float, help='iou score threshold') + + args = parser.parse_args() + + main(args) diff --git a/requirements.txt b/requirements.txt index 766f4eba26ebc2245112255b0ca07f5292662ff3..d8bd40f4725f43077a2c7c2d0cc9bc244096ef23 100755 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,18 @@ seaborn>=0.11.0 scipy>=1.4.1 Pillow>=7.1.2 huggingface-hub >= 0.11.0 -ultralytics >=8.0.34 \ No newline at end of file +ultralytics >=8.0.34 +lap +loguru +norfair +onnxruntime-gpu==1.12.1 +opencv-python +scipy +pyyaml +easydict +gdown +pandas +tabulate +typing-extensions==3.10.0.2 +wheel +Cython diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e918fde11597a7885c215a04e1deaec4342109 --- /dev/null +++ b/setup.py @@ -0,0 +1,61 @@ +from setuptools import setup, find_packages +from pkg_resources import parse_requirements +import pathlib + +DISTNAME = 'asone' +DESCRIPTION = '' +MAINTAINER = 'AxcelerateAI' +MAINTAINER_EMAIL = 'umair.imran@axcelerate.ai' +URL = 'https://github.com/axcelerateai/asone' +DOWNLOAD_URL = URL + +VERSION = '0.1.2.dev17' + +with open('README.md') as f: + long_description = f.read() + +requirements_txt = pathlib.Path('requirements.txt').open() + + +def setup_package(): + setup( + name=DISTNAME, + version=VERSION, + description=DESCRIPTION, + long_description = long_description, + long_description_content_type='text/markdown', + url=DOWNLOAD_URL, + author=MAINTAINER, + author_email=MAINTAINER_EMAIL, + license='BSD 2-clause', + keywords='asone bytetrack deepsort norfair yolo yolox yolor yolov5 yolov7 installation inferencing', + # package_dir={"":""}, + packages=find_packages(), + + dependency_links=[ + "https://download.pytorch.org/whl/cu113/", + 'https://pypi.python.org/simple/'], + install_requires=[str(requirement) + for requirement in parse_requirements(requirements_txt)], + package_data={ + "": ["detectors/yolor/cfg/*.cfg", "detectors/data/*.yaml", + "detectors/data/*.yml", "detectors/data/*.names"], + }, + + include_package_data=True, + classifiers=[ + 'Development Status :: 1 - Planning', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Operating System :: POSIX :: Linux', + 'Operating System :: Microsoft :: Windows :: Windows 10', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + ], + ) + + +if __name__ == "__main__": + setup_package()