diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..17e87edfda5fc0a61dabcd408d9e26c3ec1f6921 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +deep_sort/deep_sort/deep/checkpoint/ckpt.t7 filter=lfs diff=lfs merge=lfs -text +demo.png filter=lfs diff=lfs merge=lfs -text +test.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..eb3c58e3bfa2da63a78bde8b6fcb4baf1928d887 --- /dev/null +++ b/.gitignore @@ -0,0 +1,131 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +openh264-1.8.0-win64.dll diff --git a/README.md b/README.md index a606d61fb0e55f41a08f06fd21f72c4f3e09b472..f8d3ae2f937bdda4de334126e33125d1c1240673 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,67 @@ ---- -title: Yolov8 Deepsort Tracking -emoji: 👀 -colorFrom: red -colorTo: green -sdk: gradio -sdk_version: 3.48.0 -app_file: app.py -pinned: false -license: mit ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +
+

yolov8-deepsort-tracking

+
+ +![示例图片](https://github.com/KdaiP/yolov8-deepsort-tracking/blob/main/demo.png) + +opencv+yolov8+deepsort的行人检测与跟踪。当然,也可以识别车辆等其他类别。 + +- 2023/10/17更新:简化代码,删除不必要的依赖 + +- 2023/7/4更新:加入了一个基于Gradio的WebUI界面 + +## 安装 +环境:Python>=3.8 + +本项目需要pytorch,建议手动在[pytorch官网](https://pytorch.org/get-started/locally/)根据自己的平台和CUDA环境安装对应的版本。 + +pytorch的详细安装教程可以参照[Conda Quickstart Guide for Ultralytics](https://docs.ultralytics.com/guides/conda-quickstart/) + +安装完pytorch后,需要通过以下命令来安装其他依赖: + +```shell +$ pip install -r requirements.txt +``` + + +## 配置(非WebUI) + +在main.py中修改以下代码,将输入视频路径换成你要处理的视频的路径: + +```python +input_video_path = "test.mp4" +``` + +模型默认使用Ultralytics官方的YOLOv8n模型: + +```python +model = "yolov8n.pt" +``` + +第一次使用会自动从官网下载模型,如果网速过慢,可以在[ultralytics的官方文档](https://docs.ultralytics.com/tasks/detect/)下载模型,然后将模型文件拷贝到程序所在目录下。 + +## 运行(非WebUI) + +运行main.py +运行完成后,终端会显示输出视频所在的路径。 + +## WebUI界面的配置和运行 + +**请先确保已经安装完成上面的依赖** + +安装Gradio库: + +```shell +$ pip install gradio +``` + +运行app.py,如果控制台出现以下消息代表成功运行: +```shell +Running on local URL: http://127.0.0.1:6006 +To create a public link, set `share=True` in `launch()` +``` + +浏览器打开该URL即可使用WebUI界面 + +![WebUI](https://github.com/KdaiP/yolov8-deepsort-tracking/blob/main/webui.png) + diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..1a5043e84ad2de8771cd7a14a8d3bdcd5fb57d91 --- /dev/null +++ b/app.py @@ -0,0 +1,165 @@ +from ultralytics import YOLO +import cv2 +import numpy as np +import tempfile +from pathlib import Path +import deep_sort.deep_sort.deep_sort as ds + +import gradio as gr + +# YoloV8官方模型,从左往右由小到大,第一次使用会自动下载 +model_list = ["yolov8n.pt", "yolov8s.pt", "yolov8m.pt", "yolov8l.pt", "yolov8x.pt"] + +def putTextWithBackground( + img, + text, + origin, + font=cv2.FONT_HERSHEY_SIMPLEX, + font_scale=1, + text_color=(255, 255, 255), + bg_color=(0, 0, 0), + thickness=1, +): + """绘制带有背景的文本。 + + :param img: 输入图像。 + :param text: 要绘制的文本。 + :param origin: 文本的左上角坐标。 + :param font: 字体类型。 + :param font_scale: 字体大小。 + :param text_color: 文本的颜色。 + :param bg_color: 背景的颜色。 + :param thickness: 文本的线条厚度。 + """ + # 计算文本的尺寸 + (text_width, text_height), _ = cv2.getTextSize(text, font, font_scale, thickness) + + # 绘制背景矩形 + bottom_left = origin + top_right = (origin[0] + text_width, origin[1] - text_height - 5) # 减去5以留出一些边距 + cv2.rectangle(img, bottom_left, top_right, bg_color, -1) + + # 在矩形上绘制文本 + text_origin = (origin[0], origin[1] - 5) # 从左上角的位置减去5来留出一些边距 + cv2.putText( + img, + text, + text_origin, + font, + font_scale, + text_color, + thickness, + lineType=cv2.LINE_AA, + ) + + +# 视频处理 +def processVideo(inputPath, model): + """处理视频,检测并跟踪行人。 + + :param inputPath: 视频文件路径 + :return: 输出视频的路径 + """ + tracker = ds.DeepSort( + "deep_sort/deep_sort/deep/checkpoint/ckpt.t7" + ) # 加载deepsort权重文件 + model = YOLO(model) # 加载YOLO模型文件 + + # 读取视频文件 + cap = cv2.VideoCapture(inputPath) + fps = cap.get(cv2.CAP_PROP_FPS) # 获取视频的帧率 + size = ( + int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), + ) # 获取视频的大小 + output_video = cv2.VideoWriter() # 初始化视频写入 + outputPath = tempfile.mkdtemp() # 创建输出视频的临时文件夹的路径 + + # 输出格式为XVID格式的avi文件 + # 如果需要使用h264编码或者需要保存为其他格式,可能需要下载openh264-1.8.0 + # 下载地址:https://github.com/cisco/openh264/releases/tag/v1.8.0 + # 下载完成后将dll文件放在当前文件夹内 + output_type = "avi" + if output_type == "avi": + fourcc = cv2.VideoWriter_fourcc(*"XVID") + video_save_path = Path(outputPath) / "output.avi" # 创建输出视频路径 + if output_type == "mp4": # 浏览器只支持播放h264编码的mp4视频文件 + fourcc = cv2.VideoWriter_fourcc(*"h264") + video_save_path = Path(outputPath) / "output.mp4" + + output_video.open(video_save_path.as_posix(), fourcc, fps, size, True) + # 对每一帧图片进行读取和处理 + while True: + success, frame = cap.read() + if not (success): + break + + # 获取每一帧的目标检测推理结果 + results = model(frame, stream=True) + + detections = [] # 存放bounding box结果 + confarray = [] # 存放每个检测结果的置信度 + + # 读取目标检测推理结果 + # 参考: https://docs.ultralytics.com/modes/predict/#working-with-results + for r in results: + boxes = r.boxes + for box in boxes: + x1, y1, x2, y2 = map(int, box.xywh[0]) # 提取矩形框左上和右下的点,并将tensor类型转为整型 + conf = round(float(box.conf[0]), 2) # 对conf四舍五入到2位小数 + cls = int(box.cls[0]) # 获取物体类别标签 + + if cls == detect_class: + detections.append([x1, y1, x2, y2]) + confarray.append(conf) + + # 使用deepsort进行跟踪 + resultsTracker = tracker.update(np.array(detections), confarray, frame) + for x1, y1, x2, y2, Id in resultsTracker: + x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) + + # 绘制bounding box + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3) + putTextWithBackground( + frame, + str(int(Id)), + (max(-10, x1), max(40, y1)), + font_scale=1.5, + text_color=(255, 255, 255), + bg_color=(255, 0, 255), + ) + + output_video.write(frame) # 将处理后的图像写入视频 + output_video.release() # 释放 + cap.release() # 释放 + print(f"output dir is: {video_save_path.as_posix()}") + return video_save_path.as_posix(), video_save_path.as_posix() # Gradio的视频控件实际读取的是文件路径 + + +if __name__ == "__main__": + # 需要跟踪的物体类别 + detect_class = 0 + + # Gradio参考文档:https://www.gradio.app/guides/blocks-and-event-listeners + with gr.Blocks() as demo: + with gr.Tab("Tracking"): + gr.Markdown( + """ + # YoloV8 + deepsort + 基于opencv + YoloV8 + deepsort + """ + ) + with gr.Row(): + with gr.Column(): + input_video = gr.Video(label="Input video") + model = gr.Dropdown(model_list, value="yolov8n.pt", label="Model") + with gr.Column(): + output = gr.Video() + output_path = gr.Textbox(label="Output path") + button = gr.Button("Process") + + button.click( + processVideo, inputs=[input_video, model], outputs=[output, output_path] + ) + + demo.launch(server_port=6006) diff --git a/deep_sort/configs/deep_sort.yaml b/deep_sort/configs/deep_sort.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6105f46a8d155c2dc7ba9cebb7178b4e0ed389a3 --- /dev/null +++ b/deep_sort/configs/deep_sort.yaml @@ -0,0 +1,10 @@ +DEEPSORT: + REID_CKPT: "deep_sort/deep_sort/deep/checkpoint/ckpt.t7" + MAX_DIST: 0.2 + MIN_CONFIDENCE: 0.3 + NMS_MAX_OVERLAP: 0.5 + MAX_IOU_DISTANCE: 0.7 + MAX_AGE: 70 + N_INIT: 3 + NN_BUDGET: 100 + diff --git a/deep_sort/deep_sort/README.md b/deep_sort/deep_sort/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e89c9b3ea08691210046fbb9184bf8e44e88f29e --- /dev/null +++ b/deep_sort/deep_sort/README.md @@ -0,0 +1,3 @@ +# Deep Sort + +This is the implemention of deep sort with pytorch. \ No newline at end of file diff --git a/deep_sort/deep_sort/__init__.py b/deep_sort/deep_sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5fe5d0fd796ec4f46dc4141f5e4f9f5092f7d321 --- /dev/null +++ b/deep_sort/deep_sort/__init__.py @@ -0,0 +1,21 @@ +from .deep_sort import DeepSort + + +__all__ = ['DeepSort', 'build_tracker'] + + +def build_tracker(cfg, use_cuda): + return DeepSort(cfg.DEEPSORT.REID_CKPT, + max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, + nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, + max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) + + + + + + + + + + diff --git a/deep_sort/deep_sort/__pycache__/__init__.cpython-310.pyc b/deep_sort/deep_sort/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..66658c3ed00f28a82340ba7cfecbcc51fba72468 Binary files /dev/null and b/deep_sort/deep_sort/__pycache__/__init__.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/__pycache__/deep_sort.cpython-310.pyc b/deep_sort/deep_sort/__pycache__/deep_sort.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a3c872ca5444d32e17e36a005ee0ef81bc6fc3a Binary files /dev/null and b/deep_sort/deep_sort/__pycache__/deep_sort.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/deep/__init__.py b/deep_sort/deep_sort/deep/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/deep_sort/deep_sort/deep/__pycache__/__init__.cpython-310.pyc b/deep_sort/deep_sort/deep/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16c50defa0a4b0937d8c149054df18f4e5232b8b Binary files /dev/null and b/deep_sort/deep_sort/deep/__pycache__/__init__.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc b/deep_sort/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70cde2bce296b5fbb96e43f6cfb6fd8565aa9ca9 Binary files /dev/null and b/deep_sort/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/deep/__pycache__/model.cpython-310.pyc b/deep_sort/deep_sort/deep/__pycache__/model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9353eb48f2d927f291529317ea8235043905cc3 Binary files /dev/null and b/deep_sort/deep_sort/deep/__pycache__/model.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/deep/checkpoint/ckpt.t7 b/deep_sort/deep_sort/deep/checkpoint/ckpt.t7 new file mode 100644 index 0000000000000000000000000000000000000000..f1e725a5d1e8a4c9fc972d4a1e3445bdd8cbc6db --- /dev/null +++ b/deep_sort/deep_sort/deep/checkpoint/ckpt.t7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22628596f112dc7eb1fe7adfbfaf95bbc6ce8eb024205beafdc705232a646c29 +size 46061055 diff --git a/deep_sort/deep_sort/deep/evaluate.py b/deep_sort/deep_sort/deep/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..31c40a46eaea0ad7b6fc50a15e39329b954561ff --- /dev/null +++ b/deep_sort/deep_sort/deep/evaluate.py @@ -0,0 +1,15 @@ +import torch + +features = torch.load("features.pth") +qf = features["qf"] +ql = features["ql"] +gf = features["gf"] +gl = features["gl"] + +scores = qf.mm(gf.t()) +res = scores.topk(5, dim=1)[1][:,0] +top1correct = gl[res].eq(ql).sum().item() + +print("Acc top1:{:.3f}".format(top1correct/ql.size(0))) + + diff --git a/deep_sort/deep_sort/deep/feature_extractor.py b/deep_sort/deep_sort/deep/feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..d524080cec6eaf0fbe6aa4ba280157303a9dd88f --- /dev/null +++ b/deep_sort/deep_sort/deep/feature_extractor.py @@ -0,0 +1,65 @@ +import torch +import torchvision.transforms as transforms +import numpy as np +import cv2 +import logging + +from .model import Net + +''' +特征提取器: +提取对应bounding box中的特征, 得到一个固定维度的embedding作为该bounding box的代表, +供计算相似度时使用。 + +模型训练是按照传统ReID的方法进行,使用Extractor类的时候输入为一个list的图片,得到图片对应的特征。 +''' + +class Extractor(object): + def __init__(self, model_path, use_cuda=True): + self.net = Net(reid=True) + self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict'] + self.net.load_state_dict(state_dict) + logger = logging.getLogger("root.tracker") + logger.info("Loading weights from {}... Done!".format(model_path)) + self.net.to(self.device) + self.size = (64, 128) + self.norm = transforms.Compose([ + # RGB图片数据范围是[0-255],需要先经过ToTensor除以255归一化到[0,1]之后, + # 再通过Normalize计算(x - mean)/std后,将数据归一化到[-1,1]。 + transforms.ToTensor(), + # mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225]是从imagenet训练集中算出来的 + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]) + + def _preprocess(self, im_crops): + """ + TODO: + 1. to float with scale from 0 to 1 + 2. resize to (64, 128) as Market1501 dataset did + 3. concatenate to a numpy array + 3. to torch Tensor + 4. normalize + """ + def _resize(im, size): + return cv2.resize(im.astype(np.float32)/255., size) + + im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() + return im_batch + +# __call__()是一个非常特殊的实例方法。该方法的功能类似于在类中重载 () 运算符, +# 使得类实例对象可以像调用普通函数那样,以“对象名()”的形式使用。 + def __call__(self, im_crops): + im_batch = self._preprocess(im_crops) + with torch.no_grad(): + im_batch = im_batch.to(self.device) + features = self.net(im_batch) + return features.cpu().numpy() + + +if __name__ == '__main__': + img = cv2.imread("demo.jpg")[:,:,(2,1,0)] + extr = Extractor("checkpoint/ckpt.t7") + feature = extr(img) + print(feature.shape) + diff --git a/deep_sort/deep_sort/deep/model.py b/deep_sort/deep_sort/deep/model.py new file mode 100644 index 0000000000000000000000000000000000000000..0bfff9f18e02378f85c30f5408a5f2fb6637b052 --- /dev/null +++ b/deep_sort/deep_sort/deep/model.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out,is_downsample=False): + super(BasicBlock,self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self,x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y),True) + +def make_layers(c_in,c_out,repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i ==0: + blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] + else: + blocks += [BasicBlock(c_out,c_out),] + return nn.Sequential(*blocks) + +class Net(nn.Module): + def __init__(self, num_classes=751, reid=False): + super(Net,self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3,64,3,stride=1,padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + # nn.Conv2d(32,32,3,stride=1,padding=1), + # nn.BatchNorm2d(32), + # nn.ReLU(inplace=True), + nn.MaxPool2d(3,2,padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(64,64,2,False) + # 32 64 32 + self.layer2 = make_layers(64,128,2,True) + # 64 32 16 + self.layer3 = make_layers(128,256,2,True) + # 128 16 8 + self.layer4 = make_layers(256,512,2,True) + # 256 8 4 + self.avgpool = nn.AvgPool2d((8,4),1) + # 256 1 1 + self.reid = reid + + self.classifier = nn.Sequential( + nn.Linear(512, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(256, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0),-1) + # B x 128 + if self.reid: + x = x.div(x.norm(p=2,dim=1,keepdim=True)) + return x + # classifier + x = self.classifier(x) + return x + + +if __name__ == '__main__': + net = Net() + x = torch.randn(4,3,128,64) + y = net(x) + import ipdb; ipdb.set_trace() + + diff --git a/deep_sort/deep_sort/deep/original_model.py b/deep_sort/deep_sort/deep/original_model.py new file mode 100644 index 0000000000000000000000000000000000000000..72453a6392b9a360c03034eefee1d6be30f8121b --- /dev/null +++ b/deep_sort/deep_sort/deep/original_model.py @@ -0,0 +1,106 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out,is_downsample=False): + super(BasicBlock,self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self,x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y),True) + +def make_layers(c_in,c_out,repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i ==0: + blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] + else: + blocks += [BasicBlock(c_out,c_out),] + return nn.Sequential(*blocks) + +class Net(nn.Module): + def __init__(self, num_classes=625 ,reid=False): + super(Net,self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3,32,3,stride=1,padding=1), + nn.BatchNorm2d(32), + nn.ELU(inplace=True), + nn.Conv2d(32,32,3,stride=1,padding=1), + nn.BatchNorm2d(32), + nn.ELU(inplace=True), + nn.MaxPool2d(3,2,padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(32,32,2,False) + # 32 64 32 + self.layer2 = make_layers(32,64,2,True) + # 64 32 16 + self.layer3 = make_layers(64,128,2,True) + # 128 16 8 + self.dense = nn.Sequential( + nn.Dropout(p=0.6), + nn.Linear(128*16*8, 128), + nn.BatchNorm1d(128), + nn.ELU(inplace=True) + ) + # 256 1 1 + self.reid = reid + self.batch_norm = nn.BatchNorm1d(128) + self.classifier = nn.Sequential( + nn.Linear(128, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + + x = x.view(x.size(0),-1) + if self.reid: + x = self.dense[0](x) + x = self.dense[1](x) + x = x.div(x.norm(p=2,dim=1,keepdim=True)) + return x + x = self.dense(x) + # B x 128 + # classifier + x = self.classifier(x) + return x + + +if __name__ == '__main__': + net = Net(reid=True) + x = torch.randn(4,3,128,64) + y = net(x) + import ipdb; ipdb.set_trace() + + diff --git a/deep_sort/deep_sort/deep/prepare_car.py b/deep_sort/deep_sort/deep/prepare_car.py new file mode 100644 index 0000000000000000000000000000000000000000..fcf3818fedd64db65ed08f114c103824f01b6e20 --- /dev/null +++ b/deep_sort/deep_sort/deep/prepare_car.py @@ -0,0 +1,129 @@ +# -*- coding:utf8 -*- + +import os +from PIL import Image +from shutil import copyfile, copytree, rmtree, move + +PATH_DATASET = './car-dataset' # 需要处理的文件夹 +PATH_NEW_DATASET = './car-reid-dataset' # 处理后的文件夹 +PATH_ALL_IMAGES = PATH_NEW_DATASET + '/all_images' +PATH_TRAIN = PATH_NEW_DATASET + '/train' +PATH_TEST = PATH_NEW_DATASET + '/test' + +# 定义创建目录函数 +def mymkdir(path): + path = path.strip() # 去除首位空格 + path = path.rstrip("\\") # 去除尾部 \ 符号 + isExists = os.path.exists(path) # 判断路径是否存在 + if not isExists: + os.makedirs(path) # 如果不存在则创建目录 + print(path + ' 创建成功') + return True + else: + # 如果目录存在则不创建,并提示目录已存在 + print(path + ' 目录已存在') + return False + +class BatchRename(): + ''' + 批量重命名文件夹中的图片文件 + ''' + + def __init__(self): + self.path = PATH_DATASET # 表示需要命名处理的文件夹 + + # 修改图像尺寸 + def resize(self): + for aroot, dirs, files in os.walk(self.path): + # aroot是self.path目录下的所有子目录(含self.path),dir是self.path下所有的文件夹的列表. + filelist = files # 注意此处仅是该路径下的其中一个列表 + # print('list', list) + + # filelist = os.listdir(self.path) #获取文件路径 + total_num = len(filelist) # 获取文件长度(个数) + + for item in filelist: + if item.endswith('.jpg'): # 初始的图片的格式为jpg格式的(或者源文件是png格式及其他格式,后面的转换格式就可以调整为自己需要的格式即可) + src = os.path.join(os.path.abspath(aroot), item) + + # 修改图片尺寸到128宽*256高 + im = Image.open(src) + out = im.resize((128, 256), Image.ANTIALIAS) # resize image with high-quality + out.save(src) # 原路径保存 + + def rename(self): + + for aroot, dirs, files in os.walk(self.path): + # aroot是self.path目录下的所有子目录(含self.path),dir是self.path下所有的文件夹的列表. + filelist = files # 注意此处仅是该路径下的其中一个列表 + # print('list', list) + + # filelist = os.listdir(self.path) #获取文件路径 + total_num = len(filelist) # 获取文件长度(个数) + + i = 1 # 表示文件的命名是从1开始的 + for item in filelist: + if item.endswith('.jpg'): # 初始的图片的格式为jpg格式的(或者源文件是png格式及其他格式,后面的转换格式就可以调整为自己需要的格式即可) + src = os.path.join(os.path.abspath(aroot), item) + + # 根据图片名创建图片目录 + dirname = str(item.split('_')[0]) + # 为相同车辆创建目录 + #new_dir = os.path.join(self.path, '..', 'bbox_all', dirname) + new_dir = os.path.join(PATH_ALL_IMAGES, dirname) + if not os.path.isdir(new_dir): + mymkdir(new_dir) + + # 获得new_dir中的图片数 + num_pic = len(os.listdir(new_dir)) + + dst = os.path.join(os.path.abspath(new_dir), + dirname + 'C1T0001F' + str(num_pic + 1) + '.jpg') + # 处理后的格式也为jpg格式的,当然这里可以改成png格式 C1T0001F见mars.py filenames 相机ID,跟踪指数 + # dst = os.path.join(os.path.abspath(self.path), '0000' + format(str(i), '0>3s') + '.jpg') 这种情况下的命名格式为0000000.jpg形式,可以自主定义想要的格式 + try: + copyfile(src, dst) #os.rename(src, dst) + print ('converting %s to %s ...' % (src, dst)) + i = i + 1 + except: + continue + print ('total %d to rename & converted %d jpgs' % (total_num, i)) + + def split(self): + #--------------------------------------- + #train_test + images_path = PATH_ALL_IMAGES + train_save_path = PATH_TRAIN + test_save_path = PATH_TEST + if not os.path.isdir(train_save_path): + os.mkdir(train_save_path) + os.mkdir(test_save_path) + + for _, dirs, _ in os.walk(images_path, topdown=True): + for i, dir in enumerate(dirs): + for root, _, files in os.walk(images_path + '/' + dir, topdown=True): + for j, file in enumerate(files): + if(j==0): # test dataset;每个车辆的第一幅图片 + print("序号:%s 文件夹: %s 图片:%s 归为测试集" % (i + 1, root, file)) + src_path = root + '/' + file + dst_dir = test_save_path + '/' + dir + if not os.path.isdir(dst_dir): + os.mkdir(dst_dir) + dst_path = dst_dir + '/' + file + move(src_path, dst_path) + else: + src_path = root + '/' + file + dst_dir = train_save_path + '/' + dir + if not os.path.isdir(dst_dir): + os.mkdir(dst_dir) + dst_path = dst_dir + '/' + file + move(src_path, dst_path) + rmtree(PATH_ALL_IMAGES) + +if __name__ == '__main__': + demo = BatchRename() + demo.resize() + demo.rename() + demo.split() + + diff --git a/deep_sort/deep_sort/deep/prepare_person.py b/deep_sort/deep_sort/deep/prepare_person.py new file mode 100644 index 0000000000000000000000000000000000000000..3df771ff14502a680b4f20abd4856ff74a54e058 --- /dev/null +++ b/deep_sort/deep_sort/deep/prepare_person.py @@ -0,0 +1,108 @@ +import os +from shutil import copyfile + +# You only need to change this line to your dataset download path +download_path = './Market-1501-v15.09.15' + +if not os.path.isdir(download_path): + print('please change the download_path') + +save_path = download_path + '/pytorch' +if not os.path.isdir(save_path): + os.mkdir(save_path) +#----------------------------------------- +#query +query_path = download_path + '/query' +query_save_path = download_path + '/pytorch/query' +if not os.path.isdir(query_save_path): + os.mkdir(query_save_path) + +for root, dirs, files in os.walk(query_path, topdown=True): + for name in files: + if not name[-3:]=='jpg': + continue + ID = name.split('_') + src_path = query_path + '/' + name + dst_path = query_save_path + '/' + ID[0] + if not os.path.isdir(dst_path): + os.mkdir(dst_path) + copyfile(src_path, dst_path + '/' + name) + +#----------------------------------------- +#multi-query +query_path = download_path + '/gt_bbox' +# for dukemtmc-reid, we do not need multi-query +if os.path.isdir(query_path): + query_save_path = download_path + '/pytorch/multi-query' + if not os.path.isdir(query_save_path): + os.mkdir(query_save_path) + + for root, dirs, files in os.walk(query_path, topdown=True): + for name in files: + if not name[-3:]=='jpg': + continue + ID = name.split('_') + src_path = query_path + '/' + name + dst_path = query_save_path + '/' + ID[0] + if not os.path.isdir(dst_path): + os.mkdir(dst_path) + copyfile(src_path, dst_path + '/' + name) + +#----------------------------------------- +#gallery +gallery_path = download_path + '/bounding_box_test' +gallery_save_path = download_path + '/pytorch/gallery' +if not os.path.isdir(gallery_save_path): + os.mkdir(gallery_save_path) + +for root, dirs, files in os.walk(gallery_path, topdown=True): + for name in files: + if not name[-3:]=='jpg': + continue + ID = name.split('_') + src_path = gallery_path + '/' + name + dst_path = gallery_save_path + '/' + ID[0] + if not os.path.isdir(dst_path): + os.mkdir(dst_path) + copyfile(src_path, dst_path + '/' + name) + +#--------------------------------------- +#train_all +train_path = download_path + '/bounding_box_train' +train_save_path = download_path + '/pytorch/train_all' +if not os.path.isdir(train_save_path): + os.mkdir(train_save_path) + +for root, dirs, files in os.walk(train_path, topdown=True): + for name in files: + if not name[-3:]=='jpg': + continue + ID = name.split('_') + src_path = train_path + '/' + name + dst_path = train_save_path + '/' + ID[0] + if not os.path.isdir(dst_path): + os.mkdir(dst_path) + copyfile(src_path, dst_path + '/' + name) + + +#--------------------------------------- +#train_val +train_path = download_path + '/bounding_box_train' +train_save_path = download_path + '/pytorch/train' +val_save_path = download_path + '/pytorch/test' +if not os.path.isdir(train_save_path): + os.mkdir(train_save_path) + os.mkdir(val_save_path) + +for root, dirs, files in os.walk(train_path, topdown=True): + for name in files: + if not name[-3:]=='jpg': + continue + ID = name.split('_') + src_path = train_path + '/' + name + dst_path = train_save_path + '/' + ID[0] + if not os.path.isdir(dst_path): + os.mkdir(dst_path) + dst_path = val_save_path + '/' + ID[0] #first image is used as val image + os.mkdir(dst_path) + copyfile(src_path, dst_path + '/' + name) \ No newline at end of file diff --git a/deep_sort/deep_sort/deep/test.py b/deep_sort/deep_sort/deep/test.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd590336f7b17c44738c4c15458f02f33f08017 --- /dev/null +++ b/deep_sort/deep_sort/deep/test.py @@ -0,0 +1,77 @@ +import torch +import torch.backends.cudnn as cudnn +import torchvision + +import argparse +import os + +from model import Net + +parser = argparse.ArgumentParser(description="Train on market1501") +parser.add_argument("--data-dir",default='data',type=str) +parser.add_argument("--no-cuda",action="store_true") +parser.add_argument("--gpu-id",default=0,type=int) +args = parser.parse_args() + +# device +device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" +if torch.cuda.is_available() and not args.no_cuda: + cudnn.benchmark = True + +# data loader +root = args.data_dir +query_dir = os.path.join(root,"query") +gallery_dir = os.path.join(root,"gallery") +transform = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128,64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +queryloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(query_dir, transform=transform), + batch_size=64, shuffle=False +) +galleryloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(gallery_dir, transform=transform), + batch_size=64, shuffle=False +) + +# net definition +net = Net(reid=True) +assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" +print('Loading from checkpoint/ckpt.t7') +checkpoint = torch.load("./checkpoint/ckpt.t7") +net_dict = checkpoint['net_dict'] +net.load_state_dict(net_dict, strict=False) +net.eval() +net.to(device) + +# compute features +query_features = torch.tensor([]).float() +query_labels = torch.tensor([]).long() +gallery_features = torch.tensor([]).float() +gallery_labels = torch.tensor([]).long() + +with torch.no_grad(): + for idx,(inputs,labels) in enumerate(queryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + query_features = torch.cat((query_features, features), dim=0) + query_labels = torch.cat((query_labels, labels)) + + for idx,(inputs,labels) in enumerate(galleryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + gallery_features = torch.cat((gallery_features, features), dim=0) + gallery_labels = torch.cat((gallery_labels, labels)) + +gallery_labels -= 2 + +# save features +features = { + "qf": query_features, + "ql": query_labels, + "gf": gallery_features, + "gl": gallery_labels +} +torch.save(features,"features.pth") \ No newline at end of file diff --git a/deep_sort/deep_sort/deep/train.jpg b/deep_sort/deep_sort/deep/train.jpg new file mode 100644 index 0000000000000000000000000000000000000000..323587c1efa1a4a0d3cba3cce143ff0fa737226f Binary files /dev/null and b/deep_sort/deep_sort/deep/train.jpg differ diff --git a/deep_sort/deep_sort/deep/train.py b/deep_sort/deep_sort/deep/train.py new file mode 100644 index 0000000000000000000000000000000000000000..c95b55d7dce1f2f12a6c315bec9101faaeb45d6b --- /dev/null +++ b/deep_sort/deep_sort/deep/train.py @@ -0,0 +1,192 @@ +import argparse +import os +import time + +import numpy as np +import matplotlib.pyplot as plt +import torch +import torch.backends.cudnn as cudnn +import torchvision + +from model import Net + +parser = argparse.ArgumentParser(description="Train on market1501") +parser.add_argument("--data-dir",default='data',type=str) +parser.add_argument("--no-cuda",action="store_true") +parser.add_argument("--gpu-id",default=0,type=int) +parser.add_argument("--lr",default=0.1, type=float) +parser.add_argument("--interval",'-i',default=20,type=int) +parser.add_argument('--resume', '-r',action='store_true') +args = parser.parse_args() + +# device +device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" +if torch.cuda.is_available() and not args.no_cuda: + cudnn.benchmark = True + +# data loading +root = args.data_dir +train_dir = os.path.join(root,"train") +test_dir = os.path.join(root,"test") + +transform_train = torchvision.transforms.Compose([ + torchvision.transforms.RandomCrop((128,64),padding=4), + torchvision.transforms.RandomHorizontalFlip(), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +transform_test = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128,64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +trainloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(train_dir, transform=transform_train), + batch_size=64,shuffle=True +) +testloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(test_dir, transform=transform_test), + batch_size=64,shuffle=True +) +num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes)) +print("num_classes = %s" %num_classes) + +# net definition +start_epoch = 0 +net = Net(num_classes=num_classes) +if args.resume: + assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" + print('Loading from checkpoint/ckpt.t7') + checkpoint = torch.load("./checkpoint/ckpt.t7") + # import ipdb; ipdb.set_trace() + net_dict = checkpoint['net_dict'] + net.load_state_dict(net_dict) + best_acc = checkpoint['acc'] + start_epoch = checkpoint['epoch'] +net.to(device) + +# loss and optimizer +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) +best_acc = 0. + +# train function for each epoch +def train(epoch): + print("\nEpoch : %d"%(epoch+1)) + net.train() + training_loss = 0. + train_loss = 0. + correct = 0 + total = 0 + interval = args.interval + start = time.time() + for idx, (inputs, labels) in enumerate(trainloader): + # forward + inputs,labels = inputs.to(device),labels.to(device) + outputs = net(inputs) + loss = criterion(outputs, labels) + + # backward + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # accumurating + training_loss += loss.item() + train_loss += loss.item() + correct += outputs.max(dim=1)[1].eq(labels).sum().item() + total += labels.size(0) + + # print + if (idx+1)%interval == 0: + end = time.time() + print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( + 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total + )) + training_loss = 0. + start = time.time() + + return train_loss/len(trainloader), 1.- correct/total + +def test(epoch): + global best_acc + net.eval() + test_loss = 0. + correct = 0 + total = 0 + start = time.time() + with torch.no_grad(): + for idx, (inputs, labels) in enumerate(testloader): + inputs, labels = inputs.to(device), labels.to(device) + outputs = net(inputs) + loss = criterion(outputs, labels) + + test_loss += loss.item() + correct += outputs.max(dim=1)[1].eq(labels).sum().item() + total += labels.size(0) + + print("Testing ...") + end = time.time() + print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( + 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total + )) + + # saving checkpoint + acc = 100.*correct/total + if acc > best_acc: + best_acc = acc + print("Saving parameters to checkpoint/ckpt.t7") + checkpoint = { + 'net_dict':net.state_dict(), + 'acc':acc, + 'epoch':epoch, + } + if not os.path.isdir('checkpoint'): + os.mkdir('checkpoint') + torch.save(checkpoint, './checkpoint/ckpt.t7') + + return test_loss/len(testloader), 1.- correct/total + +# plot figure +x_epoch = [] +record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]} +fig = plt.figure() +ax0 = fig.add_subplot(121, title="loss") +ax1 = fig.add_subplot(122, title="top1err") +def draw_curve(epoch, train_loss, train_err, test_loss, test_err): + global record + record['train_loss'].append(train_loss) + record['train_err'].append(train_err) + record['test_loss'].append(test_loss) + record['test_err'].append(test_err) + + x_epoch.append(epoch) + ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') + ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') + ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') + ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') + if epoch == 0: + ax0.legend() + ax1.legend() + fig.savefig("train.jpg") + +# lr decay +def lr_decay(): + global optimizer + for params in optimizer.param_groups: + params['lr'] *= 0.1 + lr = params['lr'] + print("Learning rate adjusted to {}".format(lr)) + +def main(): + total_epoches = 40 + for epoch in range(start_epoch, start_epoch+total_epoches): + train_loss, train_err = train(epoch) + test_loss, test_err = test(epoch) + draw_curve(epoch, train_loss, train_err, test_loss, test_err) + if (epoch+1)%(total_epoches//2)==0: + lr_decay() + + +if __name__ == '__main__': + main() diff --git a/deep_sort/deep_sort/deep_sort.py b/deep_sort/deep_sort/deep_sort.py new file mode 100644 index 0000000000000000000000000000000000000000..a94c878aab7e8f881e2ea4c4c0f07e69011991d7 --- /dev/null +++ b/deep_sort/deep_sort/deep_sort.py @@ -0,0 +1,125 @@ +import numpy as np +import torch + +from .deep.feature_extractor import Extractor +from .sort.nn_matching import NearestNeighborDistanceMetric +from .sort.preprocessing import non_max_suppression +from .sort.detection import Detection +from .sort.tracker import Tracker + + +__all__ = ['DeepSort'] # __all__ 提供了暴露接口用的”白名单“ + + +class DeepSort(object): + def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): + self.min_confidence = min_confidence # 检测结果置信度阈值 + self.nms_max_overlap = nms_max_overlap # 非极大抑制阈值,设置为1代表不进行抑制 + + self.extractor = Extractor(model_path, use_cuda=use_cuda) # 用于提取一个batch图片对应的特征 + + max_cosine_distance = max_dist # 最大余弦距离,用于级联匹配,如果大于该阈值,则忽略 + nn_budget = 100 # 每个类别gallery最多的外观描述子的个数,如果超过,删除旧的 + # NearestNeighborDistanceMetric 最近邻距离度量 + # 对于每个目标,返回到目前为止已观察到的任何样本的最近距离(欧式或余弦)。 + # 由距离度量方法构造一个 Tracker。 + # 第一个参数可选'cosine' or 'euclidean' + self.metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) + self.tracker = Tracker(self.metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) + + def update(self, bbox_xywh, confidences, ori_img): + self.height, self.width = ori_img.shape[:2] + # generate detections + # 从原图中抠取bbox对应图片并计算得到相应的特征 + features = self._get_features(bbox_xywh, ori_img) + bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) + # 筛选掉小于min_confidence的目标,并构造一个Detection对象构成的列表 + detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence] + + # run on non-maximum supression + boxes = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + indices = non_max_suppression(boxes, self.nms_max_overlap, scores) + detections = [detections[i] for i in indices] + + # update tracker + self.tracker.predict() # 将跟踪状态分布向前传播一步 + self.tracker.update(detections) # 执行测量更新和跟踪管理 + + # output bbox identities + outputs = [] + for track in self.tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + box = track.to_tlwh() + x1,y1,x2,y2 = self._tlwh_to_xyxy(box) + track_id = track.track_id + outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int16)) + if len(outputs) > 0: + outputs = np.stack(outputs,axis=0) + return outputs + + + """ + TODO: + Convert bbox from xc_yc_w_h to xtl_ytl_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + #将bbox的[x,y,w,h] 转换成[t,l,w,h] + @staticmethod + def _xywh_to_tlwh(bbox_xywh): + if isinstance(bbox_xywh, np.ndarray): + bbox_tlwh = bbox_xywh.copy() + elif isinstance(bbox_xywh, torch.Tensor): + bbox_tlwh = bbox_xywh.clone() + bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2. + bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2. + return bbox_tlwh + + #将bbox的[x,y,w,h] 转换成[x1,y1,x2,y2] + #某些数据集例如 pascal_voc 的标注方式是采用[x,y,w,h] + """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" + def _xywh_to_xyxy(self, bbox_xywh): + x,y,w,h = bbox_xywh + x1 = max(int(x-w/2),0) + x2 = min(int(x+w/2),self.width-1) + y1 = max(int(y-h/2),0) + y2 = min(int(y+h/2),self.height-1) + return x1,y1,x2,y2 + + def _tlwh_to_xyxy(self, bbox_tlwh): + """ + TODO: + Convert bbox from xtl_ytl_w_h to xc_yc_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + x,y,w,h = bbox_tlwh + x1 = max(int(x),0) + x2 = min(int(x+w),self.width-1) + y1 = max(int(y),0) + y2 = min(int(y+h),self.height-1) + return x1,y1,x2,y2 + + def _xyxy_to_tlwh(self, bbox_xyxy): + x1,y1,x2,y2 = bbox_xyxy + + t = x1 + l = y1 + w = int(x2-x1) + h = int(y2-y1) + return t,l,w,h + + # 获取抠图部分的特征 + def _get_features(self, bbox_xywh, ori_img): + im_crops = [] + for box in bbox_xywh: + x1,y1,x2,y2 = self._xywh_to_xyxy(box) + im = ori_img[y1:y2,x1:x2] # 抠图部分 + im_crops.append(im) + if im_crops: + features = self.extractor(im_crops) # 对抠图部分提取特征 + else: + features = np.array([]) + return features + + diff --git a/deep_sort/deep_sort/sort/__init__.py b/deep_sort/deep_sort/sort/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/deep_sort/deep_sort/sort/__pycache__/__init__.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9384c63e3dc4076da421c6c409af55ca0d1ce1a9 Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/__init__.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/detection.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/detection.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68ecd1294bb0d355d44e1deae777b7dd46503d9d Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/detection.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5aa0ee8a2f1f7c18e9586b071f3e0feb63a882bd Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..353f6b1f15a99a03a91f049f8128075002cee3da Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c9c2e93b647e8612de032fcbb6a9873b9bc42ad Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b68aa05ab0a48bc8d0bfc893227e2c4e928c3143 Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/preprocessing.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/preprocessing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e67b1ed28689317686a33bc8057594fe262ad7e5 Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/preprocessing.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/track.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/track.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a008e008f4fb10b108b4ab9066929ae67252da37 Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/track.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/__pycache__/tracker.cpython-310.pyc b/deep_sort/deep_sort/sort/__pycache__/tracker.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc26da92bc1505ca3d0af63565d23d868467559f Binary files /dev/null and b/deep_sort/deep_sort/sort/__pycache__/tracker.cpython-310.pyc differ diff --git a/deep_sort/deep_sort/sort/detection.py b/deep_sort/deep_sort/sort/detection.py new file mode 100644 index 0000000000000000000000000000000000000000..dbdbc8b525747ffc2bd494f8ab0e93c035730ce7 --- /dev/null +++ b/deep_sort/deep_sort/sort/detection.py @@ -0,0 +1,49 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Parameters + ---------- + tlwh : array_like + Bounding box in format `(top left x, top left y, width, height)`. + confidence : float + Detector confidence score. + feature : array_like + A feature vector that describes the object contained in this image. + + Attributes + ---------- + tlwh : ndarray + Bounding box in format `(top left x, top left y, width, height)`. + confidence : ndarray + Detector confidence score. + feature : ndarray | NoneType + A feature vector that describes the object contained in this image. + + """ + + def __init__(self, tlwh, confidence, feature): + self.tlwh = np.asarray(tlwh, dtype=np.float32) + self.confidence = float(confidence) + self.feature = np.asarray(feature, dtype=np.float32) + + def to_tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret diff --git a/deep_sort/deep_sort/sort/iou_matching.py b/deep_sort/deep_sort/sort/iou_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..c7e0f7a41c1d95d4bd6ca04245c5abb9b3ed6156 --- /dev/null +++ b/deep_sort/deep_sort/sort/iou_matching.py @@ -0,0 +1,84 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import linear_assignment + +#计算两个框的IOU +def iou(bbox, candidates): + """Computer intersection over union. + + Parameters + ---------- + bbox : ndarray + A bounding box in format `(top left x, top left y, width, height)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + + """ + bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + # np.c_ Translates slice objects to concatenation along the second axis. + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + +# 计算tracks和detections之间的IOU距离成本矩阵 +def iou_cost(tracks, detections, track_indices=None, + detection_indices=None): + """An intersection over union distance metric. + + 用于计算tracks和detections之间的iou距离矩阵 + + Parameters + ---------- + tracks : List[deep_sort.track.Track] + A list of tracks. + detections : List[deep_sort.detection.Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = linear_assignment.INFTY_COST + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou(bbox, candidates) + return cost_matrix diff --git a/deep_sort/deep_sort/sort/kalman_filter.py b/deep_sort/deep_sort/sort/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..ee15231bf0b49b62cb1f83a243970e477a47990e --- /dev/null +++ b/deep_sort/deep_sort/sort/kalman_filter.py @@ -0,0 +1,286 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +''' +卡尔曼滤波分为两个阶段: +(1) 预测track在下一时刻的位置, +(2) 基于detection来更新预测的位置。 +''' +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + 对于每个轨迹,由一个 KalmanFilter 预测状态分布。每个轨迹记录自己的均值和方差作为滤波器输入。 + + 8维状态空间[x, y, a, h, vx, vy, va, vh]包含边界框中心位置(x, y),纵横比a,高度h和它们各自的速度。 + 物体运动遵循恒速模型。 边界框位置(x, y, a, h)被视为状态空间的直接观察(线性观察模型) + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + # 依据当前状态估计(高度)选择运动和观测不确定性。这些权重控制模型中的不确定性。 + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + + + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + # Translates slice objects to concatenation along the first axis + mean = np.r_[mean_pos, mean_vel] + + # 由测量初始化均值向量(8维)和协方差矩阵(8x8维) + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + #卡尔曼滤波器由目标上一时刻的均值和协方差进行预测。 + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + + # 初始化噪声矩阵Q;np.r_ 按列连接两个矩阵 + # motion_cov是过程噪声 W_k的 协方差矩阵Qk + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + # Update time state x' = Fx (1) + # x为track在t-1时刻的均值,F称为状态转移矩阵,该公式预测t时刻的x' + # self._motion_mat为F_k是作用在 x_{k-1}上的状态变换模型 + mean = np.dot(self._motion_mat, mean) + # Calculate error covariance P' = FPF^T+Q (2) + # P为track在t-1时刻的协方差,Q为系统的噪声矩阵,代表整个系统的可靠程度,一般初始化为很小的值, + # 该公式预测t时刻的P' + # covariance为P_{k|k} ,后验估计误差协方差矩阵,度量估计值的精确程度 + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + 投影状态分布到测量空间 + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + mean:ndarray,状态的平均向量(8维数组)。 + covariance:ndarray,状态的协方差矩阵(8x8维)。 + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + 返回(ndarray,ndarray),返回给定状态估计的投影平均值和协方差矩阵 + + """ + # 在公式4中,R为检测器的噪声矩阵,它是一个4x4的对角矩阵, + # 对角线上的值分别为中心点两个坐标以及宽高的噪声, + # 以任意值初始化,一般设置宽高的噪声大于中心点的噪声, + # 该公式先将协方差矩阵P'映射到检测空间,然后再加上噪声矩阵R; + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + + # R为测量过程中噪声的协方差;初始化噪声矩阵R + innovation_cov = np.diag(np.square(std)) + + # 将均值向量映射到检测空间,即 Hx' + mean = np.dot(self._update_mat, mean) + # 将协方差矩阵映射到检测空间,即 HP'H^T + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov # 公式(4) + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + 通过估计值和观测值估计最新结果 + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + # 将均值和协方差映射到检测空间,得到 Hx'和S + projected_mean, projected_cov = self.project(mean, covariance) + + # 矩阵分解 + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + # 计算卡尔曼增益K;相当于求解公式(5) + # 公式5计算卡尔曼增益K,卡尔曼增益用于估计误差的重要程度 + # 求解卡尔曼滤波增益K 用到了cholesky矩阵分解加快求解; + # 公式5的右边有一个S的逆,如果S矩阵很大,S的逆求解消耗时间太大, + # 所以代码中把公式两边同时乘上S,右边的S*S的逆变成了单位矩阵,转化成AX=B形式求解。 + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + # y = z - Hx' (3) + # 在公式3中,z为detection的均值向量,不包含速度变化值,即z=[cx, cy, r, h], + # H称为测量矩阵,它将track的均值向量x'映射到检测空间,该公式计算detection和track的均值误差 + innovation = measurement - projected_mean + + # 更新后的均值向量 x = x' + Ky (6) + new_mean = mean + np.dot(innovation, kalman_gain.T) + # 更新后的协方差矩阵 P = (I - KH)P' (7) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False): + """Compute gating distance between state distribution and measurements. + + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + 状态分布上的平均向量(8维) + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + 状态分布的协方差(8x8维) + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + N 个测量的 N×4维矩阵,每个矩阵的格式为(x,y,a,h),其中(x,y)是边界框中心位置,宽高比和h高度。 + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + 如果为True,则只计算盒子中心位置 + + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + 返回一个长度为N的数组,其中第i个元素包含(mean,covariance)和measurements [i]之间的平方Mahalanobis距离 + + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + cholesky_factor = np.linalg.cholesky(covariance) + d = measurements - mean + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha diff --git a/deep_sort/deep_sort/sort/linear_assignment.py b/deep_sort/deep_sort/sort/linear_assignment.py new file mode 100644 index 0000000000000000000000000000000000000000..931a9685a594eab4b553e497bbe1eaca090861e1 --- /dev/null +++ b/deep_sort/deep_sort/sort/linear_assignment.py @@ -0,0 +1,240 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +# The linear sum assignment problem is also known as minimum weight matching in bipartite graphs. +from scipy.optimize import linear_sum_assignment as linear_assignment +from . import kalman_filter + + +INFTY_COST = 1e+5 + +# min_cost_matching 使用匈牙利算法解决线性分配问题。 +# 传入 门控余弦距离成本 或 iou cost +def min_cost_matching( + distance_metric, max_distance, tracks, detections, track_indices=None, + detection_indices=None): + """Solve linear assignment problem. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + # 计算成本矩阵 + cost_matrix = distance_metric( + tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + + # 执行匈牙利算法,得到指派成功的索引对,行索引为tracks的索引,列索引为detections的索引 + row_indices, col_indices = linear_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + # 找出未匹配的detections + for col, detection_idx in enumerate(detection_indices): + if col not in col_indices: + unmatched_detections.append(detection_idx) + # 找出未匹配的tracks + for row, track_idx in enumerate(track_indices): + if row not in row_indices: + unmatched_tracks.append(track_idx) + # 遍历匹配的(track, detection)索引对 + for row, col in zip(row_indices, col_indices): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + # 如果相应的cost大于阈值max_distance,也视为未匹配成功 + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade( + distance_metric, max_distance, cascade_depth, tracks, detections, + track_indices=None, detection_indices=None): + """Run matching cascade. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + 距离度量: + 输入:一个轨迹和检测列表,以及一个N个轨迹索引和M个检测索引的列表。 + 返回:NxM维的代价矩阵,其中元素(i,j)是给定轨迹索引中第i个轨迹与 + 给定检测索引中第j个检测之间的关联成本。 + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + 门控阈值。成本大于此值的关联将被忽略。 + cascade_depth: int + The cascade depth, should be se to the maximum track age. + 级联深度应设置为最大轨迹寿命。 + tracks : List[track.Track] + A list of predicted tracks at the current time step. + 当前时间步的预测轨迹列表。 + detections : List[detection.Detection] + A list of detections at the current time step. + 当前时间步的检测列表。 + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + 轨迹索引列表,用于将 cost_matrix中的行映射到tracks的 + 轨迹(请参见上面的说明)。 默认为所有轨迹。 + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + 将 cost_matrix中的列映射到的检测索引列表 + detections中的检测(请参见上面的说明)。 默认为全部检测。 + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + 返回包含以下三个条目的元组: + + 匹配的跟踪和检测的索引列表, + 不匹配的轨迹索引的列表, + 未匹配的检测索引的列表。 + + """ + + # 分配track_indices和detection_indices两个列表 + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + # 初始化匹配集matches M ← ∅ + # 未匹配检测集unmatched_detections U ← D + unmatched_detections = detection_indices + matches = [] + # 由小到大依次对每个level的tracks做匹配 + for level in range(cascade_depth): + # 如果没有detections,退出循环 + if len(unmatched_detections) == 0: # No detections left + break + + # 当前level的所有tracks索引 + # 步骤6:Select tracks by age + track_indices_l = [ + k for k in track_indices + if tracks[k].time_since_update == 1 + level + ] + # 如果当前level没有track,继续 + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + # 步骤7:调用min_cost_matching函数进行匹配 + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l # 步骤8 + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) # 步骤9 + return matches, unmatched_tracks, unmatched_detections + +''' +门控成本矩阵:通过计算卡尔曼滤波的状态分布和测量值之间的距离对成本矩阵进行限制, +成本矩阵中的距离是track和detection之间的外观相似度。 +如果一个轨迹要去匹配两个外观特征非常相似的 detection,很容易出错; +分别让两个detection计算与这个轨迹的马氏距离,并使用一个阈值gating_threshold进行限制, +就可以将马氏距离较远的那个detection区分开,从而减少错误的匹配。 +''' +def gate_cost_matrix( + kf, cost_matrix, tracks, detections, track_indices, detection_indices, + gated_cost=INFTY_COST, only_position=False): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + 代价矩阵中与不可行关联相对应的条目设置此值。 默认为一个很大的值。 + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + 如果为True,则在门控期间仅考虑状态分布的x,y位置。默认为False。 + + Returns + ------- + ndarray + Returns the modified cost matrix. + + """ + # 根据通过卡尔曼滤波获得的状态分布,使成本矩阵中的不可行条目无效。 + gating_dim = 2 if only_position else 4 # 测量空间维度 + # 马氏距离通过测算检测与平均轨迹位置的距离超过多少标准差来考虑状态估计的不确定性。 + # 通过从逆chi^2分布计算95%置信区间的阈值,排除可能性小的关联。 + # 四维测量空间对应的马氏阈值为9.4877 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray( + [detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + #KalmanFilter.gating_distance 计算状态分布和测量之间的选通距离 + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/deep_sort/deep_sort/sort/nn_matching.py b/deep_sort/deep_sort/sort/nn_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..e39b883f62e0d54f098a560d1d8a41c88c53c404 --- /dev/null +++ b/deep_sort/deep_sort/sort/nn_matching.py @@ -0,0 +1,207 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +def _pdist(a, b): + """Compute pair-wise squared distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that element (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + + 用于计算成对点之间的平方距离 + a :NxM 矩阵,代表 N 个样本,每个样本 M 个数值 + b :LxM 矩阵,代表 L 个样本,每个样本有 M 个数值 + 返回的是 NxL 的矩阵,比如 dist[i][j] 代表 a[i] 和 b[j] 之间的平方和距离 + 参考:https://blog.csdn.net/frankzd/article/details/80251042 + + """ + a, b = np.asarray(a), np.asarray(b) + if len(a) == 0 or len(b) == 0: + return np.zeros((len(a), len(b))) + a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) + r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] + r2 = np.clip(r2, 0., float(np.inf)) + return r2 + + +def _cosine_distance(a, b, data_is_normalized=False): + """Compute pair-wise cosine distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + data_is_normalized : Optional[bool] + If True, assumes rows in a and b are unit length vectors. + Otherwise, a and b are explicitly normalized to lenght 1. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + 用于计算成对点之间的余弦距离 + a :NxM 矩阵,代表 N 个样本,每个样本 M 个数值 + b :LxM 矩阵,代表 L 个样本,每个样本有 M 个数值 + 返回的是 NxL 的矩阵,比如 c[i][j] 代表 a[i] 和 b[j] 之间的余弦距离 + 参考: + https://blog.csdn.net/u013749540/article/details/51813922 + + + """ + if not data_is_normalized: + # np.linalg.norm 求向量的范式,默认是 L2 范式 + a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) + b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) + return 1. - np.dot(a, b.T) # 余弦距离 = 1 - 余弦相似度 + + +def _nn_euclidean_distance(x, y): + """ Helper function for nearest neighbor distance metric (Euclidean). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest Euclidean distance to a sample in `x`. + + """ + distances = _pdist(x, y) + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(x, y): + """ Helper function for nearest neighbor distance metric (cosine). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest cosine distance to a sample in `x`. + + """ + distances = _cosine_distance(x, y) + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + 对于每个目标,返回最近邻居的距离度量, 即与到目前为止已观察到的任何样本的最接近距离。 + + Parameters + ---------- + metric : str + Either "euclidean" or "cosine". + matching_threshold: float + The matching threshold. Samples with larger distance are considered an + invalid match. + 匹配阈值。 距离较大的样本对被认为是无效的匹配。 + budget : Optional[int] + If not None, fix samples per class to at most this number. Removes + the oldest samples when the budget is reached. + 如果不是None,则将每个类别的样本最多固定为该数字。 + 删除达到budget时最古老的样本。 + + Attributes + ---------- + samples : Dict[int -> List[ndarray]] + A dictionary that maps from target identities to the list of samples + that have been observed so far. + 一个从目标ID映射到到目前为止已经观察到的样本列表的字典 + + """ + + def __init__(self, metric, matching_threshold, budget=None): + + + if metric == "euclidean": + self._metric = _nn_euclidean_distance # 欧式距离 + elif metric == "cosine": + self._metric = _nn_cosine_distance # 余弦距离 + else: + raise ValueError( + "Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget # budge用于控制 feature 的数目 + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """Update the distance metric with new data. + 用新的数据更新测量距离 + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : ndarray + An integer array of associated target identities. + active_targets : List[int] + A list of targets that are currently present in the scene. + 传入特征列表及其对应id,partial_fit构造一个活跃目标的特征字典。 + + """ + for feature, target in zip(features, targets): + # 对应目标下添加新的feature,更新feature集合 + # samples字典 d: feature list} + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + # 只考虑budget个目标,超过直接忽略 + self.samples[target] = self.samples[target][-self.budget:] + + # 筛选激活的目标;samples是一个字典{id->feature list} + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """Compute distance between features and targets. + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : List[int] + A list of targets to match the given `features` against. + + Returns + ------- + ndarray + Returns a cost matrix of shape len(targets), len(features), where + element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + + 计算features和targets之间的距离,返回一个成本矩阵(代价矩阵) + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix diff --git a/deep_sort/deep_sort/sort/preprocessing.py b/deep_sort/deep_sort/sort/preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..3e708ee5fff2474f15c8f6d113e535fa007090e3 --- /dev/null +++ b/deep_sort/deep_sort/sort/preprocessing.py @@ -0,0 +1,73 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import cv2 + + +def non_max_suppression(boxes, max_bbox_overlap, scores=None): + """Suppress overlapping detections. + + Original code from [1]_ has been adapted to include confidence score. + + .. [1] http://www.pyimagesearch.com/2015/02/16/ + faster-non-maximum-suppression-python/ + + Examples + -------- + + >>> boxes = [d.roi for d in detections] + >>> scores = [d.confidence for d in detections] + >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) + >>> detections = [detections[i] for i in indices] + + Parameters + ---------- + boxes : ndarray + Array of ROIs (x, y, width, height). + max_bbox_overlap : float + ROIs that overlap more than this values are suppressed. + scores : Optional[array_like] + Detector confidence score. + + Returns + ------- + List[int] + Returns indices of detections that have survived non-maxima suppression. + + """ + if len(boxes) == 0: + return [] + + boxes = boxes.astype(np.float32) + pick = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + boxes[:, 0] + y2 = boxes[:, 3] + boxes[:, 1] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + if scores is not None: + idxs = np.argsort(scores) + else: + idxs = np.argsort(y2) + + while len(idxs) > 0: + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + overlap = (w * h) / area[idxs[:last]] # IOU + + idxs = np.delete( + idxs, np.concatenate( + ([last], np.where(overlap > max_bbox_overlap)[0]))) + + return pick diff --git a/deep_sort/deep_sort/sort/track.py b/deep_sort/deep_sort/sort/track.py new file mode 100644 index 0000000000000000000000000000000000000000..b81d7968bdb828ba43fd9a9968d40520f2d818b3 --- /dev/null +++ b/deep_sort/deep_sort/sort/track.py @@ -0,0 +1,199 @@ +# vim: expandtab:ts=4:sw=4 + + +class TrackState: + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + + 单个目标track状态的枚举类型。 + 新创建的track分类为“Tentative”,直到收集到足够的证据为止。 + 然后,跟踪状态更改为“Confirmed”。 + 不再活跃的tracks被归类为“Deleted”,以将其标记为从有效集中删除。 + + """ + + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track: + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + 具有状态空间(x,y,a,h)并关联速度的单个目标轨迹(track), + 其中(x,y)是边界框的中心,a是宽高比,h是高度。 + + Parameters + ---------- + mean : ndarray + Mean vector of the initial state distribution. + 初始状态分布的均值向量 + covariance : ndarray + Covariance matrix of the initial state distribution. + 初始状态分布的协方差矩阵 + track_id : int + A unique track identifier. + 唯一的track标识符 + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + 确认track之前的连续检测次数。 在第一个n_init帧中 + 第一个未命中的情况下将跟踪状态设置为“Deleted” + max_age : int + The maximum number of consecutive misses before the track state is + set to `Deleted`. + 跟踪状态设置为Deleted之前的最大连续未命中数;代表一个track的存活期限 + + feature : Optional[ndarray] + Feature vector of the detection this track originates from. If not None, + this feature is added to the `features` cache. + 此track所源自的检测的特征向量。 如果不是None,此feature已添加到feature缓存中。 + + Attributes + ---------- + mean : ndarray + Mean vector of the initial state distribution. + 初始状态分布的均值向量 + covariance : ndarray + Covariance matrix of the initial state distribution. + 初始状态分布的协方差矩阵 + track_id : int + A unique track identifier. + hits : int + Total number of measurement updates. + 测量更新总数 + age : int + Total number of frames since first occurence. + 自第一次出现以来的总帧数 + time_since_update : int + Total number of frames since last measurement update. + 自上次测量更新以来的总帧数 + state : TrackState + The current track state. + features : List[ndarray] + A cache of features. On each measurement update, the associated feature + vector is added to this list. + feature缓存。每次测量更新时,相关feature向量添加到此列表中 + + """ + + def __init__(self, mean, covariance, track_id, n_init, max_age, + feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + # hits代表匹配上了多少次,匹配次数超过n_init,设置Confirmed状态 + # hits每次调用update函数的时候+1 + self.hits = 1 + self.age = 1 # 和time_since_update功能重复 + # 每次调用predict函数的时候就会+1; 每次调用update函数的时候就会设置为0 + self.time_since_update = 0 + + self.state = TrackState.Tentative # 初始化一个Track的时设置Tentative状态 + # 每个track对应多个features, 每次更新都会将最新的feature添加到列表中 + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get current position in bounding box format `(min x, miny, max x, + max y)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def predict(self, kf): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + 使用卡尔曼滤波器预测步骤将状态分布传播到当前时间步 + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + + """ + self.mean, self.covariance = kf.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kf, detection): + """Perform Kalman filter measurement update step and update the feature + cache. + 执行卡尔曼滤波器测量更新步骤并更新feature缓存 + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + detection : Detection + The associated detection. + + """ + self.mean, self.covariance = kf.update( + self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + # hits代表匹配上了多少次,匹配次数超过n_init,设置Confirmed状态 + # 连续匹配上n_init帧的时候,转变为确定态 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + # 如果在处于Tentative态的情况下没有匹配上任何detection,转变为删除态。 + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + # 如果time_since_update超过max_age,设置Deleted状态 + # 即失配连续达到max_age次数的时候,转变为删除态 + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed). + """ + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/deep_sort/deep_sort/sort/tracker.py b/deep_sort/deep_sort/sort/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..1896906ab539ab0b888b74a2c5e3ab354a4d3da2 --- /dev/null +++ b/deep_sort/deep_sort/sort/tracker.py @@ -0,0 +1,168 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import kalman_filter +from . import linear_assignment +from . import iou_matching +from .track import Track + + +class Tracker: + """ + This is the multi-target tracker. + + Parameters + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + A distance metric for measurement-to-track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + + Attributes + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + The distance metric used for measurement to track association. + 测量与轨迹关联的距离度量 + max_age : int + Maximum number of missed misses before a track is deleted. + 删除轨迹前的最大未命中数 + n_init : int + Number of frames that a track remains in initialization phase. + 确认轨迹前的连续检测次数。如果前n_init帧内发生未命中,则将轨迹状态设置为Deleted + kf : kalman_filter.KalmanFilter + A Kalman filter to filter target trajectories in image space. + tracks : List[Track] + The list of active tracks at the current time step. + + """ + + def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): + self.metric = metric + self.max_iou_distance = max_iou_distance + self.max_age = max_age + self.n_init = n_init + + self.kf = kalman_filter.KalmanFilter() # 实例化卡尔曼滤波器 + self.tracks = [] # 保存一个轨迹列表,用于保存一系列轨迹 + self._next_id = 1 # 下一个分配的轨迹id + + def predict(self): + """Propagate track state distributions one time step forward. + 将跟踪状态分布向前传播一步 + + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.kf) + + def update(self, detections): + """Perform measurement update and track management. + 执行测量更新和轨迹管理 + + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = \ + self._match(detections) + + # Update track set. + + # 1. 针对匹配上的结果 + for track_idx, detection_idx in matches: + # 更新tracks中相应的detection + self.tracks[track_idx].update( + self.kf, detections[detection_idx]) + + # 2. 针对未匹配的track, 调用mark_missed进行标记 + # track失配时,若Tantative则删除;若update时间很久也删除 + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + + # 3. 针对未匹配的detection, detection失配,进行初始化 + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + + # 得到最新的tracks列表,保存的是标记为Confirmed和Tentative的track + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + # Update distance metric. + active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] + features, targets = [], [] + for track in self.tracks: + # 获取所有Confirmed状态的track id + if not track.is_confirmed(): + continue + features += track.features # 将Confirmed状态的track的features添加到features列表 + # 获取每个feature对应的trackid + targets += [track.track_id for _ in track.features] + track.features = [] + # 距离度量中的特征集更新 + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets) + + def _match(self, detections): + + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feature for i in detection_indices]) + targets = np.array([tracks[i].track_id for i in track_indices]) + + # 通过最近邻(余弦距离)计算出成本矩阵(代价矩阵) + cost_matrix = self.metric.distance(features, targets) + # 计算门控后的成本矩阵(代价矩阵) + cost_matrix = linear_assignment.gate_cost_matrix( + self.kf, cost_matrix, tracks, dets, track_indices, + detection_indices) + + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + # 区分开confirmed tracks和unconfirmed tracks + confirmed_tracks = [ + i for i, t in enumerate(self.tracks) if t.is_confirmed()] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed()] + + # Associate confirmed tracks using appearance features. + # 对确定态的轨迹进行级联匹配,得到匹配的tracks、不匹配的tracks、不匹配的detections + # matching_cascade 根据特征将检测框匹配到确认的轨迹。 + # 传入门控后的成本矩阵 + matches_a, unmatched_tracks_a, unmatched_detections = \ + linear_assignment.matching_cascade( + gated_metric, self.metric.matching_threshold, self.max_age, + self.tracks, detections, confirmed_tracks) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + # 将未确定态的轨迹和刚刚没有匹配上的轨迹组合为 iou_track_candidates + # 并进行基于IoU的匹配 + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update == 1] # 刚刚没有匹配上的轨迹 + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update != 1] # 并非刚刚没有匹配上的轨迹 + # 对级联匹配中还没有匹配成功的目标再进行IoU匹配 + # min_cost_matching 使用匈牙利算法解决线性分配问题。 + # 传入 iou_cost,尝试关联剩余的轨迹与未确认的轨迹。 + matches_b, unmatched_tracks_b, unmatched_detections = \ + linear_assignment.min_cost_matching( + iou_matching.iou_cost, self.max_iou_distance, self.tracks, + detections, iou_track_candidates, unmatched_detections) + + matches = matches_a + matches_b # 组合两部分匹配 + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + mean, covariance = self.kf.initiate(detection.to_xyah()) + self.tracks.append(Track( + mean, covariance, self._next_id, self.n_init, self.max_age, + detection.feature)) + self._next_id += 1 diff --git a/deep_sort/utils/__init__.py b/deep_sort/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/deep_sort/utils/asserts.py b/deep_sort/utils/asserts.py new file mode 100644 index 0000000000000000000000000000000000000000..59a73cc04025762d6490fcd2945a747d963def32 --- /dev/null +++ b/deep_sort/utils/asserts.py @@ -0,0 +1,13 @@ +from os import environ + + +def assert_in(file, files_to_check): + if file not in files_to_check: + raise AssertionError("{} does not exist in the list".format(str(file))) + return True + + +def assert_in_env(check_list: list): + for item in check_list: + assert_in(item, environ.keys()) + return True diff --git a/deep_sort/utils/draw.py b/deep_sort/utils/draw.py new file mode 100644 index 0000000000000000000000000000000000000000..bc7cb537978e86805d5d9789785a8afe67df9030 --- /dev/null +++ b/deep_sort/utils/draw.py @@ -0,0 +1,36 @@ +import numpy as np +import cv2 + +palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) + + +def compute_color_for_labels(label): + """ + Simple function that adds fixed color depending on the class + """ + color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] + return tuple(color) + + +def draw_boxes(img, bbox, identities=None, offset=(0,0)): + for i,box in enumerate(bbox): + x1,y1,x2,y2 = [int(i) for i in box] + x1 += offset[0] + x2 += offset[0] + y1 += offset[1] + y2 += offset[1] + # box text and bar + id = int(identities[i]) if identities is not None else 0 + color = compute_color_for_labels(id) + label = '{}{:d}'.format("", id) + t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] + cv2.rectangle(img,(x1, y1),(x2,y2),color,3) + cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) + cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) + return img + + + +if __name__ == '__main__': + for i in range(82): + print(compute_color_for_labels(i)) diff --git a/deep_sort/utils/evaluation.py b/deep_sort/utils/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..100179407181933d59809b25400d115cfa789867 --- /dev/null +++ b/deep_sort/utils/evaluation.py @@ -0,0 +1,103 @@ +import os +import numpy as np +import copy +import motmetrics as mm +mm.lap.default_solver = 'lap' +from utils.io import read_results, unzip_objs + + +class Evaluator(object): + + def __init__(self, data_root, seq_name, data_type): + self.data_root = data_root + self.seq_name = seq_name + self.data_type = data_type + + self.load_annotations() + self.reset_accumulator() + + def load_annotations(self): + assert self.data_type == 'mot' + + gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') + self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) + self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) + + def reset_accumulator(self): + self.acc = mm.MOTAccumulator(auto_id=True) + + def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): + # results + trk_tlwhs = np.copy(trk_tlwhs) + trk_ids = np.copy(trk_ids) + + # gts + gt_objs = self.gt_frame_dict.get(frame_id, []) + gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] + + # ignore boxes + ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) + ignore_tlwhs = unzip_objs(ignore_objs)[0] + + + # remove ignored results + keep = np.ones(len(trk_tlwhs), dtype=bool) + iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) + if len(iou_distance) > 0: + match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + match_ious = iou_distance[match_is, match_js] + + match_js = np.asarray(match_js, dtype=int) + match_js = match_js[np.logical_not(np.isnan(match_ious))] + keep[match_js] = False + trk_tlwhs = trk_tlwhs[keep] + trk_ids = trk_ids[keep] + + # get distance matrix + iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) + + # acc + self.acc.update(gt_ids, trk_ids, iou_distance) + + if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): + events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics + else: + events = None + return events + + def eval_file(self, filename): + self.reset_accumulator() + + result_frame_dict = read_results(filename, self.data_type, is_gt=False) + frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) + for frame_id in frames: + trk_objs = result_frame_dict.get(frame_id, []) + trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] + self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) + + return self.acc + + @staticmethod + def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): + names = copy.deepcopy(names) + if metrics is None: + metrics = mm.metrics.motchallenge_metrics + metrics = copy.deepcopy(metrics) + + mh = mm.metrics.create() + summary = mh.compute_many( + accs, + metrics=metrics, + names=names, + generate_overall=True + ) + + return summary + + @staticmethod + def save_summary(summary, filename): + import pandas as pd + writer = pd.ExcelWriter(filename) + summary.to_excel(writer) + writer.save() diff --git a/deep_sort/utils/io.py b/deep_sort/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..2dc9afd24019cd930eef6c21ab9f579313dd3b3a --- /dev/null +++ b/deep_sort/utils/io.py @@ -0,0 +1,133 @@ +import os +from typing import Dict +import numpy as np + +# from utils.log import get_logger + + +def write_results(filename, results, data_type): + if data_type == 'mot': + save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' + elif data_type == 'kitti': + save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' + else: + raise ValueError(data_type) + + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids in results: + if data_type == 'kitti': + frame_id -= 1 + for tlwh, track_id in zip(tlwhs, track_ids): + if track_id < 0: + continue + x1, y1, w, h = tlwh + x2, y2 = x1 + w, y1 + h + line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) + f.write(line) + + +# def write_results(filename, results_dict: Dict, data_type: str): +# if not filename: +# return +# path = os.path.dirname(filename) +# if not os.path.exists(path): +# os.makedirs(path) + +# if data_type in ('mot', 'mcmot', 'lab'): +# save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' +# elif data_type == 'kitti': +# save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' +# else: +# raise ValueError(data_type) + +# with open(filename, 'w') as f: +# for frame_id, frame_data in results_dict.items(): +# if data_type == 'kitti': +# frame_id -= 1 +# for tlwh, track_id in frame_data: +# if track_id < 0: +# continue +# x1, y1, w, h = tlwh +# x2, y2 = x1 + w, y1 + h +# line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) +# f.write(line) +# logger.info('Save results to {}'.format(filename)) + + +def read_results(filename, data_type: str, is_gt=False, is_ignore=False): + if data_type in ('mot', 'lab'): + read_fun = read_mot_results + else: + raise ValueError('Unknown data type: {}'.format(data_type)) + + return read_fun(filename, is_gt, is_ignore) + + +""" +labels={'ped', ... % 1 +'person_on_vhcl', ... % 2 +'car', ... % 3 +'bicycle', ... % 4 +'mbike', ... % 5 +'non_mot_vhcl', ... % 6 +'static_person', ... % 7 +'distractor', ... % 8 +'occluder', ... % 9 +'occluder_on_grnd', ... %10 +'occluder_full', ... % 11 +'reflection', ... % 12 +'crowd' ... % 13 +}; +""" + + +def read_mot_results(filename, is_gt, is_ignore): + valid_labels = {1} + ignore_labels = {2, 7, 8, 12} + results_dict = dict() + if os.path.isfile(filename): + with open(filename, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + if len(linelist) < 7: + continue + fid = int(linelist[0]) + if fid < 1: + continue + results_dict.setdefault(fid, list()) + + if is_gt: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + mark = int(float(linelist[6])) + if mark == 0 or label not in valid_labels: + continue + score = 1 + elif is_ignore: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + if label not in ignore_labels and vis_ratio >= 0: + continue + else: + continue + score = 1 + else: + score = float(linelist[6]) + + tlwh = tuple(map(float, linelist[2:6])) + target_id = int(linelist[1]) + + results_dict[fid].append((tlwh, target_id, score)) + + return results_dict + + +def unzip_objs(objs): + if len(objs) > 0: + tlwhs, ids, scores = zip(*objs) + else: + tlwhs, ids, scores = [], [], [] + tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) + + return tlwhs, ids, scores \ No newline at end of file diff --git a/deep_sort/utils/json_logger.py b/deep_sort/utils/json_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..0afd0b45df736866c49473db78286685d77660ac --- /dev/null +++ b/deep_sort/utils/json_logger.py @@ -0,0 +1,383 @@ +""" +References: + https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f +""" +import json +from os import makedirs +from os.path import exists, join +from datetime import datetime + + +class JsonMeta(object): + HOURS = 3 + MINUTES = 59 + SECONDS = 59 + PATH_TO_SAVE = 'LOGS' + DEFAULT_FILE_NAME = 'remaining' + + +class BaseJsonLogger(object): + """ + This is the base class that returns __dict__ of its own + it also returns the dicts of objects in the attributes that are list instances + + """ + + def dic(self): + # returns dicts of objects + out = {} + for k, v in self.__dict__.items(): + if hasattr(v, 'dic'): + out[k] = v.dic() + elif isinstance(v, list): + out[k] = self.list(v) + else: + out[k] = v + return out + + @staticmethod + def list(values): + # applies the dic method on items in the list + return [v.dic() if hasattr(v, 'dic') else v for v in values] + + +class Label(BaseJsonLogger): + """ + For each bounding box there are various categories with confidences. Label class keeps track of that information. + """ + + def __init__(self, category: str, confidence: float): + self.category = category + self.confidence = confidence + + +class Bbox(BaseJsonLogger): + """ + This module stores the information for each frame and use them in JsonParser + Attributes: + labels (list): List of label module. + top (int): + left (int): + width (int): + height (int): + + Args: + bbox_id (float): + top (int): + left (int): + width (int): + height (int): + + References: + Check Label module for better understanding. + + + """ + + def __init__(self, bbox_id, top, left, width, height): + self.labels = [] + self.bbox_id = bbox_id + self.top = top + self.left = left + self.width = width + self.height = height + + def add_label(self, category, confidence): + # adds category and confidence only if top_k is not exceeded. + self.labels.append(Label(category, confidence)) + + def labels_full(self, value): + return len(self.labels) == value + + +class Frame(BaseJsonLogger): + """ + This module stores the information for each frame and use them in JsonParser + Attributes: + timestamp (float): The elapsed time of captured frame + frame_id (int): The frame number of the captured video + bboxes (list of Bbox objects): Stores the list of bbox objects. + + References: + Check Bbox class for better information + + Args: + timestamp (float): + frame_id (int): + + """ + + def __init__(self, frame_id: int, timestamp: float = None): + self.frame_id = frame_id + self.timestamp = timestamp + self.bboxes = [] + + def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int): + bboxes_ids = [bbox.bbox_id for bbox in self.bboxes] + if bbox_id not in bboxes_ids: + self.bboxes.append(Bbox(bbox_id, top, left, width, height)) + else: + raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id)) + + def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float): + bboxes = {bbox.id: bbox for bbox in self.bboxes} + if bbox_id in bboxes.keys(): + res = bboxes.get(bbox_id) + res.add_label(category, confidence) + else: + raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id)) + + +class BboxToJsonLogger(BaseJsonLogger): + """ + ُ This module is designed to automate the task of logging jsons. An example json is used + to show the contents of json file shortly + Example: + { + "video_details": { + "frame_width": 1920, + "frame_height": 1080, + "frame_rate": 20, + "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi" + }, + "frames": [ + { + "frame_id": 329, + "timestamp": 3365.1254 + "bboxes": [ + { + "labels": [ + { + "category": "pedestrian", + "confidence": 0.9 + } + ], + "bbox_id": 0, + "top": 1257, + "left": 138, + "width": 68, + "height": 109 + } + ] + }], + + Attributes: + frames (dict): It's a dictionary that maps each frame_id to json attributes. + video_details (dict): information about video file. + top_k_labels (int): shows the allowed number of labels + start_time (datetime object): we use it to automate the json output by time. + + Args: + top_k_labels (int): shows the allowed number of labels + + """ + + def __init__(self, top_k_labels: int = 1): + self.frames = {} + self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None, + video_name=None) + self.top_k_labels = top_k_labels + self.start_time = datetime.now() + + def set_top_k(self, value): + self.top_k_labels = value + + def frame_exists(self, frame_id: int) -> bool: + """ + Args: + frame_id (int): + + Returns: + bool: true if frame_id is recognized + """ + return frame_id in self.frames.keys() + + def add_frame(self, frame_id: int, timestamp: float = None) -> None: + """ + Args: + frame_id (int): + timestamp (float): opencv captured frame time property + + Raises: + ValueError: if frame_id would not exist in class frames attribute + + Returns: + None + + """ + if not self.frame_exists(frame_id): + self.frames[frame_id] = Frame(frame_id, timestamp) + else: + raise ValueError("Frame id: {} already exists".format(frame_id)) + + def bbox_exists(self, frame_id: int, bbox_id: int) -> bool: + """ + Args: + frame_id: + bbox_id: + + Returns: + bool: if bbox exists in frame bboxes list + """ + bboxes = [] + if self.frame_exists(frame_id=frame_id): + bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes] + return bbox_id in bboxes + + def find_bbox(self, frame_id: int, bbox_id: int): + """ + + Args: + frame_id: + bbox_id: + + Returns: + bbox_id (int): + + Raises: + ValueError: if bbox_id does not exist in the bbox list of specific frame. + """ + if not self.bbox_exists(frame_id, bbox_id): + raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id)) + bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes} + return bboxes.get(bbox_id) + + def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None: + """ + + Args: + frame_id (int): + bbox_id (int): + top (int): + left (int): + width (int): + height (int): + + Returns: + None + + Raises: + ValueError: if bbox_id already exist in frame information with frame_id + ValueError: if frame_id does not exist in frames attribute + """ + if self.frame_exists(frame_id): + frame = self.frames[frame_id] + if not self.bbox_exists(frame_id, bbox_id): + frame.add_bbox(bbox_id, top, left, width, height) + else: + raise ValueError( + "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id)) + else: + raise ValueError("frame with frame_id: {} does not exist".format(frame_id)) + + def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float): + """ + Args: + frame_id: + bbox_id: + category: + confidence: the confidence value returned from yolo detection + + Returns: + None + + Raises: + ValueError: if labels quota (top_k_labels) exceeds. + """ + bbox = self.find_bbox(frame_id, bbox_id) + if not bbox.labels_full(self.top_k_labels): + bbox.add_label(category, confidence) + else: + raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id)) + + def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None, + video_name: str = None): + self.video_details['frame_width'] = frame_width + self.video_details['frame_height'] = frame_height + self.video_details['frame_rate'] = frame_rate + self.video_details['video_name'] = video_name + + def output(self): + output = {'video_details': self.video_details} + result = list(self.frames.values()) + output['frames'] = [item.dic() for item in result] + return output + + def json_output(self, output_name): + """ + Args: + output_name: + + Returns: + None + + Notes: + It creates the json output with `output_name` name. + """ + if not output_name.endswith('.json'): + output_name += '.json' + with open(output_name, 'w') as file: + json.dump(self.output(), file) + file.close() + + def set_start(self): + self.start_time = datetime.now() + + def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0, + seconds: int = 60) -> None: + """ + Notes: + Creates folder and then periodically stores the jsons on that address. + + Args: + output_dir (str): the directory where output files will be stored + hours (int): + minutes (int): + seconds (int): + + Returns: + None + + """ + end = datetime.now() + interval = 0 + interval += abs(min([hours, JsonMeta.HOURS]) * 3600) + interval += abs(min([minutes, JsonMeta.MINUTES]) * 60) + interval += abs(min([seconds, JsonMeta.SECONDS])) + diff = (end - self.start_time).seconds + + if diff > interval: + output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json' + if not exists(output_dir): + makedirs(output_dir) + output = join(output_dir, output_name) + self.json_output(output_name=output) + self.frames = {} + self.start_time = datetime.now() + + def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE): + """ + saves as the number of frames quota increases higher. + :param frames_quota: + :param frame_counter: + :param output_dir: + :return: + """ + pass + + def flush(self, output_dir): + """ + Notes: + We use this function to output jsons whenever possible. + like the time that we exit the while loop of opencv. + + Args: + output_dir: + + Returns: + None + + """ + filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json' + output = join(output_dir, filename) + self.json_output(output_name=output) diff --git a/deep_sort/utils/log.py b/deep_sort/utils/log.py new file mode 100644 index 0000000000000000000000000000000000000000..0d48757dca88f35e9ea2cd1ca16e41bac9976a45 --- /dev/null +++ b/deep_sort/utils/log.py @@ -0,0 +1,17 @@ +import logging + + +def get_logger(name='root'): + formatter = logging.Formatter( + # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') + fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.addHandler(handler) + return logger + + diff --git a/deep_sort/utils/parser.py b/deep_sort/utils/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..0a611bd0f182d16fecb4ab987d07896e4d1c7a48 --- /dev/null +++ b/deep_sort/utils/parser.py @@ -0,0 +1,38 @@ +import os +import yaml +from easydict import EasyDict as edict + +class YamlParser(edict): + """ + This is yaml parser based on EasyDict. + """ + def __init__(self, cfg_dict=None, config_file=None): + if cfg_dict is None: + cfg_dict = {} + + if config_file is not None: + assert(os.path.isfile(config_file)) + with open(config_file, 'r') as fo: + cfg_dict.update(yaml.load(fo.read())) + + super(YamlParser, self).__init__(cfg_dict) + + + def merge_from_file(self, config_file): + with open(config_file, 'r') as fo: + #self.update(yaml.load(fo.read())) + self.update(yaml.load(fo.read(),Loader=yaml.FullLoader)) + + def merge_from_dict(self, config_dict): + self.update(config_dict) + + +def get_config(config_file=None): + return YamlParser(config_file=config_file) + + +if __name__ == "__main__": + cfg = YamlParser(config_file="../configs/yolov3.yaml") + cfg.merge_from_file("../configs/deep_sort.yaml") + + import ipdb; ipdb.set_trace() \ No newline at end of file diff --git a/deep_sort/utils/tools.py b/deep_sort/utils/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..965fb69c2df41510fd740a4ab57d8fc7b81012de --- /dev/null +++ b/deep_sort/utils/tools.py @@ -0,0 +1,39 @@ +from functools import wraps +from time import time + + +def is_video(ext: str): + """ + Returns true if ext exists in + allowed_exts for video files. + + Args: + ext: + + Returns: + + """ + + allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') + return any((ext.endswith(x) for x in allowed_exts)) + + +def tik_tok(func): + """ + keep track of time for each process. + Args: + func: + + Returns: + + """ + @wraps(func) + def _time_it(*args, **kwargs): + start = time() + try: + return func(*args, **kwargs) + finally: + end_ = time() + print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) + + return _time_it diff --git a/demo.png b/demo.png new file mode 100644 index 0000000000000000000000000000000000000000..7cd76ebef34492ccdbfd2c4018faed60bd224dc0 --- /dev/null +++ b/demo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63badaa5a46ac0df31a2f659acae12cf4fdf5fe1389c0047a8a094a303ae0e4e +size 1548344 diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d8905fe547b5cefa5e3215d89cfd1d4ae2735a93 --- /dev/null +++ b/main.py @@ -0,0 +1,118 @@ +from ultralytics import YOLO +import cv2 +import numpy as np +import tempfile +from pathlib import Path +import deep_sort.deep_sort.deep_sort as ds + +def putTextWithBackground(img, text, origin, font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1, text_color=(255, 255, 255), bg_color=(0, 0, 0), thickness=1): + """绘制带有背景的文本。 + + :param img: 输入图像。 + :param text: 要绘制的文本。 + :param origin: 文本的左上角坐标。 + :param font: 字体类型。 + :param font_scale: 字体大小。 + :param text_color: 文本的颜色。 + :param bg_color: 背景的颜色。 + :param thickness: 文本的线条厚度。 + """ + # 计算文本的尺寸 + (text_width, text_height), _ = cv2.getTextSize(text, font, font_scale, thickness) + + # 绘制背景矩形 + bottom_left = origin + top_right = (origin[0] + text_width, origin[1] - text_height - 5) # 减去5以留出一些边距 + cv2.rectangle(img, bottom_left, top_right, bg_color, -1) + + # 在矩形上绘制文本 + text_origin = (origin[0], origin[1] - 5) # 从左上角的位置减去5来留出一些边距 + cv2.putText(img, text, text_origin, font, font_scale, text_color, thickness, lineType=cv2.LINE_AA) + +# 视频处理 +def processVideo(inputPath: str) -> Path: + """处理视频,检测并跟踪行人。 + + :param inputPath: 视频文件路径 + :return: 输出视频的路径 + """ + # 读取视频文件 + cap = cv2.VideoCapture(inputPath) + fps = cap.get(cv2.CAP_PROP_FPS) # 获取视频的帧率 + size = ( + int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), + ) # 获取视频的大小 + output_video = cv2.VideoWriter() # 初始化视频写入 + + # 输出格式为XVID格式的avi文件 + # 如果需要使用h264编码或者需要保存为其他格式,可能需要下载openh264-1.8.0 + # 下载地址:https://github.com/cisco/openh264/releases/tag/v1.8.0 + # 下载完成后将dll文件放在当前文件夹内 + fourcc = cv2.VideoWriter_fourcc(*"XVID") + video_save_path = Path(outputPath) / "output.avi" # 创建输出视频路径 + + output_video.open(video_save_path.as_posix(), fourcc, fps, size, isColor=True) + + # 对每一帧图片进行读取和处理 + while True: + success, frame = cap.read() + if not (success): + break + + # 获取每一帧的目标检测推理结果 + results = model(frame, stream=True) + + detections = [] # 存放bounding box结果 + confarray = [] # 存放每个检测结果的置信度 + + # 读取目标检测推理结果 + # 参考: https://docs.ultralytics.com/modes/predict/#working-with-results + for r in results: + boxes = r.boxes + for box in boxes: + x1, y1, x2, y2 = map(int, box.xywh[0]) # 提取矩形框左上和右下的点,并将tensor类型转为整型 + conf = round(float(box.conf[0]), 2) # 对conf四舍五入到2位小数 + cls = int(box.cls[0]) # 获取物体类别标签 + + if cls == detect_class: + detections.append([x1, y1, x2, y2]) + confarray.append(conf) + + # 使用deepsort进行跟踪 + resultsTracker = tracker.update(np.array(detections), confarray, frame) + for x1, y1, x2, y2, Id in resultsTracker: + x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) + + # 绘制bounding box + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3) + putTextWithBackground(frame, str(int(Id)), (max(-10, x1), max(40, y1)), font_scale=1.5, text_color=(255, 255, 255), bg_color=(255, 0, 255)) + + output_video.write(frame) # 将处理后的图像写入视频 + output_video.release() # 释放 + cap.release() # 释放 + print(f'output dir is: {video_save_path}') + return video_save_path + + +if __name__ == "__main__": + # 在这里填入视频文件路径 + ###### + input_video_path = "test.mp4" + ###### + + # 输出文件夹,默认为系统的临时文件夹路径 + outputPath = tempfile.mkdtemp() # 创建临时文件夹用于存储输出视频 + + # 加载yoloV8模型权重 + model = YOLO("yolov8n.pt") + + # 需要跟踪的物体类别,model.names返回模型所支持的所有物体类别 + # yoloV8官方模型的第一个类别为'person' + detect_class = 0 + print(f"detecting {model.names[detect_class]}") + + # 加载deepsort模型权重 + tracker = ds.DeepSort("deep_sort/deep_sort/deep/checkpoint/ckpt.t7") + + processVideo(input_video_path) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ff50d3b773b9a435e6f3ebd450dae97abbf42bb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +# requirements +# Example: pip install -r requirements.txt + +# Base --------------------------------------- +ultralytics +opencv-python +torch +matplotlib + +# WebUI --------------------------------------- +# gradio \ No newline at end of file diff --git a/test.mp4 b/test.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..760e480e7965cc4bc5f42e4228a681a5e98dd2f5 --- /dev/null +++ b/test.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fd481972ef0de65551d0b1197e6911b4d3fde4babde4190b1a454a7186426e +size 1266562 diff --git a/webui.png b/webui.png new file mode 100644 index 0000000000000000000000000000000000000000..c893f57c825815374f76ae1c7b6a24e8cca2d5bc Binary files /dev/null and b/webui.png differ diff --git a/yolov8n.pt b/yolov8n.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0becea028c1952ecc77c608b46b246e8254c88 --- /dev/null +++ b/yolov8n.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95 +size 6534387