Spaces:

KdaiP
/

yolov8-deepsort-tracking

Runtime error

App Files Files Community

KdaiP commited on Feb 11, 2024

Commit

3dcbe08

verified ·

1 Parent(s): 72c39aa

update

Browse files

Files changed (6) hide show

.gitattributes +1 -0
README.md +75 -13
app.py +145 -78
main.py +71 -52
requirements.txt +1 -0
webui.png +0 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 deep_sort/deep_sort/deep/checkpoint/ckpt.t7 filter=lfs diff=lfs merge=lfs -text
 demo.png filter=lfs diff=lfs merge=lfs -text
 test.mp4 filter=lfs diff=lfs merge=lfs -text

 deep_sort/deep_sort/deep/checkpoint/ckpt.t7 filter=lfs diff=lfs merge=lfs -text
 demo.png filter=lfs diff=lfs merge=lfs -text
 test.mp4 filter=lfs diff=lfs merge=lfs -text
+webui.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,75 @@
----
-title: Yolov8 Deepsort Tracking
-emoji: 👀
-colorFrom: red
-colorTo: green
-sdk: gradio
-sdk_version: 3.48.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<div align="center">
+<h1> yolov8-deepsort-tracking </h1>
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/KdaiP/yolov8-deepsort-tracking)
+</div>
+![示例图片](./demo.png)
+opencv+yolov8+deepsort的行人检测与跟踪。当然，也可以识别车辆等其他类别。
+# 更新历史
+2024/2/11更新：清理代码，完善注释。WebUI新增识别目标选择、进度条显示、终止推理、示例等功能。
+2023/10/17更新：简化代码，删除不必要的依赖。解决webui上传视频不会清空tracker ID的问题。
+2023/7/4更新：加入了一个基于Gradio的WebUI界面
+## 安装
+环境：Python>=3.8
+本项目需要pytorch，建议手动在[pytorch官网](https://pytorch.org/get-started/locally/)根据自己的平台和CUDA环境安装对应的版本。
+pytorch的详细安装教程可以参照[Conda Quickstart Guide for Ultralytics](https://docs.ultralytics.com/guides/conda-quickstart/)
+安装完pytorch后，需要通过以下命令来安装其他依赖：
+```shell
+$ pip install -r requirements.txt
+```
+如果需要使用GUI，需要通过以下命令安装tqdm进度条和Gradio库：
+```shell
+$ pip install tqdm gradio
+```
+## 配置(非WebUI)
+在main.py中修改以下代码，将输入视频路径换成你要处理的视频的路径：
+```python
+input_path = "test.mp4"
+```
+模型默认使用Ultralytics官方的YOLOv8n模型：
+```python
+model = YOLO("yolov8n.pt")
+```
+第一次使用会自动从官网下载模型，如果网速过慢，可以在[ultralytics的官方文档](https://docs.ultralytics.com/tasks/detect/)下载模型，然后将模型文件拷贝到程序所在目录下。
+## 运行(非WebUI)
+运行main.py
+推理完成后，终端会显示输出视频所在的路径。
+## WebUI界面的配置和运行
+demo: [Huggingface demo](https://huggingface.co/spaces/KdaiP/yolov8-deepsort-tracking)
+运行app.py，如果控制台出现以下消息代表成功运行：
+```shell
+Running on local URL:  http://127.0.0.1:6006
+To create a public link, set `share=True` in `launch()`
+```
+浏览器打开该URL即可使用WebUI界面
+![WebUI](./webui.png)

app.py CHANGED Viewed

@@ -3,13 +3,47 @@ import cv2
 import numpy as np
 import tempfile
 from pathlib import Path
 import deep_sort.deep_sort.deep_sort as ds
 import gradio as gr
-# YoloV8官方模型，从左往右由小到大，第一次使用会自动下载
-model_list = ["yolov8n.pt", "yolov8s.pt", "yolov8m.pt", "yolov8l.pt", "yolov8x.pt"]
 def putTextWithBackground(
     img,
     text,
@@ -53,113 +87,146 @@ def putTextWithBackground(
     )
-# 视频处理
-def processVideo(inputPath, model):
-    """处理视频，检测并跟踪行人。
-    :param inputPath: 视频文件路径
-    :return: 输出视频的路径
     """
-    tracker = ds.DeepSort(
-        "deep_sort/deep_sort/deep/checkpoint/ckpt.t7"
-    )  # 加载deepsort权重文件
-    model = YOLO(model)  # 加载YOLO模型文件
-    # 读取视频文件
-    cap = cv2.VideoCapture(inputPath)
     fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频的帧率
-    size = (
-        int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
-        int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
-    )  # 获取视频的大小
-    output_video = cv2.VideoWriter()  # 初始化视频写入
-    outputPath = tempfile.mkdtemp()  # 创建输出视频的临时文件夹的路径
-    # 输出格式为XVID格式的avi文件
     # 如果需要使用h264编码或者需要保存为其他格式，可能需要下载openh264-1.8.0
     # 下载地址：https://github.com/cisco/openh264/releases/tag/v1.8.0
     # 下载完成后将dll文件放在当前文件夹内
-    output_type = "avi"
-    if output_type == "avi":
-        fourcc = cv2.VideoWriter_fourcc(*"XVID")
-        video_save_path = Path(outputPath) / "output.avi"  # 创建输出视频路径
-    if output_type == "mp4":  # 浏览器只支持播放h264编码的mp4视频文件
-        fourcc = cv2.VideoWriter_fourcc(*"h264")
-        video_save_path = Path(outputPath) / "output.mp4"
-    output_video.open(video_save_path.as_posix(), fourcc, fps, size, True)
     # 对每一帧图片进行读取和处理
-    while True:
-        success, frame = cap.read()
         if not (success):
             break
-        # 获取每一帧的目标检测推理结果
         results = model(frame, stream=True)
-        detections = np.empty((0, 4))  # 存放bounding box结果
-        confarray = []  # 存放每个检测结果的置信度
-        # 读取目标检测推理结果
-        # 参考： https://docs.ultralytics.com/modes/predict/#working-with-results
-        for r in results:
-            boxes = r.boxes
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box.xywh[0])  # 提取矩形框左上和右下的点，并将tensor类型转为整型
-                conf = round(float(box.conf[0]), 2)  # 对conf四舍五入到2位小数
-                cls = int(box.cls[0])  # 获取物体类别标签
-                if cls == detect_class:
-                    detections = np.vstack((detections,np.array([x1,y1,x2,y2])))
-                    confarray.append(conf)
-        # 使用deepsort进行跟踪
         resultsTracker = tracker.update(detections, confarray, frame)
         for x1, y1, x2, y2, Id in resultsTracker:
-            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
-            # 绘制bounding box
             cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
-            putTextWithBackground(
-                frame,
-                str(int(Id)),
-                (max(-10, x1), max(40, y1)),
-                font_scale=1.5,
-                text_color=(255, 255, 255),
-                bg_color=(255, 0, 255),
-            )
-        output_video.write(frame)  # 将处理后的图像写入视频
-    output_video.release()  # 释放
-    cap.release()  # 释放
-    print(f"output dir is: {video_save_path.as_posix()}")
-    return video_save_path.as_posix(), video_save_path.as_posix()  # Gradio的视频控件实际读取的是文件路径
 if __name__ == "__main__":
-    # 需要跟踪的物体类别
-    detect_class = 0
     # Gradio参考文档：https://www.gradio.app/guides/blocks-and-event-listeners
     with gr.Blocks() as demo:
         with gr.Tab("Tracking"):
             gr.Markdown(
                 """
-                # YoloV8 + deepsort
                 基于opencv + YoloV8 + deepsort
                 """
             )
             with gr.Row():
                 with gr.Column():
-                    input_video = gr.Video(label="Input video")
-                    model = gr.Dropdown(model_list, value="yolov8n.pt", label="Model")
                 with gr.Column():
-                    output = gr.Video()
-                    output_path = gr.Textbox(label="Output path")
-            button = gr.Button("Process")
-        button.click(
-            processVideo, inputs=[input_video, model], outputs=[output, output_path]
-        )
     demo.launch()

 import numpy as np
 import tempfile
 from pathlib import Path
+from tqdm.auto import tqdm
 import deep_sort.deep_sort.deep_sort as ds
 import gradio as gr
+# 控制处理流程是否终止
+should_continue = True
+def get_detectable_classes(model_file):
+    """获取给定模型文件可以检测的类别。
+    参数:
+    - model_file: 模型文件名。
+    返回:
+    - class_names: 可检测的类别名称。
+    """
+    model = YOLO(model_file)
+    class_names = list(model.names.values())  # 直接获取类别名称列表
+    del model  # 删除模型实例释放资源
+    return class_names
+# 用于终止视频处理
+def stop_processing():
+    global should_continue
+    should_continue = False  # 更改变量来停止处理
+    return "尝试终止处理..."
+# 用于开始视频处理
+# gr.Progress(track_tqdm=True)用于捕获tqdm进度条，从而在GUI上显示进度
+def start_processing(input_path, output_path, detect_class, model, progress=gr.Progress(track_tqdm=True)):
+    global should_continue
+    should_continue = True
+    detect_class = int(detect_class)
+    model = YOLO(model)
+    tracker = ds.DeepSort("deep_sort/deep_sort/deep/checkpoint/ckpt.t7")
+    output_video_path = detect_and_track(input_path, output_path, detect_class, model, tracker)
+    return output_video_path, output_video_path
 def putTextWithBackground(
     img,
     text,
     )
+def extract_detections(results, detect_class):
+    """
+    从模型结果中提取和处理检测信息。
+    - results: YoloV8模型预测结果，包含检测到的物体的位置、类别和置信度等信息。
+    - detect_class: 需要提取的目标类别的索引。
+    参考: https://docs.ultralytics.com/modes/predict/#working-with-results
     """
+    # 初始化一个空的二维numpy数组，用于存放检测到的目标的位置信息
+    # 如果视频中没有需要提取的目标类别，如果不初始化，会导致tracker报错
+    detections = np.empty((0, 4))
+    confarray = [] # 初始化一个空列表，用于存放检测到的目标的置信度。
+    # 遍历检测结果
+    # 参考：https://docs.ultralytics.com/modes/predict/#working-with-results
+    for r in results:
+        for box in r.boxes:
+            # 如果检测到的目标类别与指定的目标类别相匹配，提取目标的位置信息和置信度
+            if box.cls[0].int() == detect_class:
+                x1, y1, x2, y2 = box.xywh[0].int().tolist() # 提取目标的位置信息，并从tensor转换为整数列表。
+                conf = round(box.conf[0].item(), 2) # 提取目标的置信度，从tensor中取出浮点数结果，并四舍五入到小数点后两位。
+                detections = np.vstack((detections, np.array([x1, y1, x2, y2]))) # 将目标的位置信息添加到detections数组中。
+                confarray.append(conf) # 将目标的置信度添加到confarray列表中。
+    return detections, confarray # 返回提取出的位置信息和置信度。
+# 视频处理
+def detect_and_track(input_path: str, output_path: str, detect_class: int, model, tracker) -> Path:
+    """
+    处理视频，检测并跟踪目标。
+    - input_path: 输入视频文件的路径。
+    - output_path: 处理后视频保存的路径。
+    - detect_class: 需要检测和跟踪的目标类别的索引。
+    - model: 用于目标检测的模型。
+    - tracker: 用于目标跟踪的模型。
+    """
+    global should_continue
+    cap = cv2.VideoCapture(input_path)  # 使用OpenCV打开视频文件。
+    if not cap.isOpened():  # 检查视频文件是否成功打开。
+        print(f"Error opening video file {input_path}")
+        return None
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 获取视频总帧数
     fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频的帧率
+    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) # 获取视频的分辨率（宽度和高度）。
+    output_video_path = Path(output_path) / "output.avi" # 设置输出视频的保存路径。
+    # 设置视频编码格式为XVID格式的avi文件
     # 如果需要使用h264编码或者需要保存为其他格式，可能需要下载openh264-1.8.0
     # 下载地址：https://github.com/cisco/openh264/releases/tag/v1.8.0
     # 下载完成后将dll文件放在当前文件夹内
+    fourcc = cv2.VideoWriter_fourcc(*"XVID")
+    output_video = cv2.VideoWriter(output_video_path.as_posix(), fourcc, fps, size, isColor=True) # 创建一个VideoWriter对象用于写视频。
     # 对每一帧图片进行读取和处理
+    # 使用tqdm显示处理进度。
+    for _ in tqdm(range(total_frames)):
+        # 如果全局变量should_continue为False（通常由于GUI上按下Stop按钮），则终止目标检测和跟踪，返回已处理的视频部分
+        if not should_continue:
+            print('stopping process')
+            break
+        success, frame = cap.read() # 逐帧读取视频。
+        # 如果读取失败（或者视频已处理完毕），则跳出循环。
         if not (success):
             break
+        # 使用YoloV8模型对当前帧进行目标检测。
         results = model(frame, stream=True)
+        # 从预测结果中提取检测信息。
+        detections, confarray = extract_detections(results, detect_class)
+        # 使用deepsort模型对检测到的目标进行跟踪。
         resultsTracker = tracker.update(detections, confarray, frame)
         for x1, y1, x2, y2, Id in resultsTracker:
+            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) # 将位置信息转换为整数。
+            # 绘制bounding box和文本
             cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
+            putTextWithBackground(frame, str(int(Id)), (max(-10, x1), max(40, y1)), font_scale=1.5, text_color=(255, 255, 255), bg_color=(255, 0, 255))
+        output_video.write(frame)  # 将处理后的帧写入到输出视频文件中。
+    output_video.release()  # 释放VideoWriter对象。
+    cap.release()  # 释放视频文件。
+    print(f'output dir is: {output_video_path}')
+    return output_video_path
 if __name__ == "__main__":
+    # YoloV8官方模型列表，从左往右由小到大，第一次使用会自动下载
+    model_list = ["yolov8n.pt", "yolov8s.pt", "yolov8m.pt", "yolov8l.pt", "yolov8x.pt"]
+    # 获取YoloV8模型可以检测的所有类别，默认调用model_list中第一个模型
+    detect_classes = get_detectable_classes(model_list[0])
+    # gradio界面的输入示例，包含一个测试视频文件路径、一个随机生成的输出目录、检测的类别、使用的模型
+    examples = [["test.mp4", tempfile.mkdtemp(), detect_classes[0], model_list[0]],]
+    # 使用Gradio的Blocks创建一个GUI界面
     # Gradio参考文档：https://www.gradio.app/guides/blocks-and-event-listeners
     with gr.Blocks() as demo:
         with gr.Tab("Tracking"):
+            # 使用Markdown显示文本信息，介绍界面的功能
             gr.Markdown(
                 """
+                # 目标检测与跟踪
                 基于opencv + YoloV8 + deepsort
                 """
             )
+            # 行容器，水平排列元素
             with gr.Row():
+                # 列容器，垂直排列元素
                 with gr.Column():
+                    input_path = gr.Video(label="Input video") # 视频输入控件，用于上传视频文件
+                    model = gr.Dropdown(model_list, value=0, label="Model") # 下拉菜单控件，用于选择模型
+                    detect_class = gr.Dropdown(detect_classes, value=0, label="Class", type='index') # 下拉菜单控件，用于选择要检测的目标类别
+                    output_dir = gr.Textbox(label="Output dir", value=tempfile.mkdtemp()) # 文本框控件，用于指定输出视频的保存路径，默认为一个临时生成的目录
+                    with gr.Row():
+                        # 创建两个按钮控件，分别用于开始处理和停止处理
+                        start_button = gr.Button("Process")
+                        stop_button = gr.Button("Stop")
                 with gr.Column():
+                    output = gr.Video() # 视频显示控件，展示处理后的输出视频
+                    output_path = gr.Textbox(label="Output path") # 文本框控件，用于显示输出视频的文件路径
+                    # 添加示例到GUI中，允许用户选择预定义的输入进行快速测试
+                    gr.Examples(examples,label="Examples",
+                            inputs=[input_path, output_dir, detect_class, model],
+                            outputs=[output, output_path],
+                            fn=start_processing, # 指定处理示例时调用的函数
+                            cache_examples=False) # 禁用示例缓存
+        # 将按钮与处理函数绑定
+        start_button.click(start_processing, inputs=[input_path, output_dir, detect_class, model], outputs=[output, output_path])
+        stop_button.click(stop_processing)
     demo.launch()

main.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from ultralytics import YOLO
-import cv2
-import numpy as np
 import tempfile
 from pathlib import Path
 import deep_sort.deep_sort.deep_sort as ds
 def putTextWithBackground(img, text, origin, font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1, text_color=(255, 255, 255), bg_color=(0, 0, 0), thickness=1):
@@ -28,91 +29,109 @@ def putTextWithBackground(img, text, origin, font=cv2.FONT_HERSHEY_SIMPLEX, font
     # 在矩形上绘制文本
     text_origin = (origin[0], origin[1] - 5)  # 从左上角的位置减去5来留出一些边距
     cv2.putText(img, text, text_origin, font, font_scale, text_color, thickness, lineType=cv2.LINE_AA)
 # 视频处理
-def processVideo(inputPath: str) -> Path:
-    """处理视频，检测并跟踪行人。
-    :param inputPath: 视频文件路径
-    :return: 输出视频的路径
     """
-    # 读取视频文件
-    cap = cv2.VideoCapture(inputPath)
     fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频的帧率
-    size = (
-        int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
-        int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
-    )  # 获取视频的大小
-    output_video = cv2.VideoWriter()  # 初始化视频写入
-    # 输出格式为XVID格式的avi文件
     # 如果需要使用h264编码或者需要保存为其他格式，可能需要下载openh264-1.8.0
     # 下载地址：https://github.com/cisco/openh264/releases/tag/v1.8.0
     # 下载完成后将dll文件放在当前文件夹内
     fourcc = cv2.VideoWriter_fourcc(*"XVID")
-    video_save_path = Path(outputPath) / "output.avi"  # 创建输出视频路径
-    output_video.open(video_save_path.as_posix(), fourcc, fps, size, isColor=True)
     # 对每一帧图片进行读取和处理
     while True:
-        success, frame = cap.read()
         if not (success):
             break
-        # 获取每一帧的目标检测推理结果
         results = model(frame, stream=True)
-        detections = np.empty((0, 4)) # 存放bounding box结果
-        confarray = [] # 存放每个检测结果的置��度
-        # 读取目标检测推理结果
-        # 参考： https://docs.ultralytics.com/modes/predict/#working-with-results
-        for r in results:
-            boxes = r.boxes
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box.xywh[0])  # 提取矩形框左上和右下的点，并将tensor类型转为整型
-                conf = round(float(box.conf[0]), 2)  # 对conf四舍五入到2位小数
-                cls = int(box.cls[0])  # 获取物体类别标签
-                if cls == detect_class:
-                    detections = np.vstack((detections,np.array([x1,y1,x2,y2])))
-                    confarray.append(conf)
-        # 使用deepsort进行跟踪
         resultsTracker = tracker.update(detections, confarray, frame)
         for x1, y1, x2, y2, Id in resultsTracker:
-            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
-            # 绘制bounding box
             cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
             putTextWithBackground(frame, str(int(Id)), (max(-10, x1), max(40, y1)), font_scale=1.5, text_color=(255, 255, 255), bg_color=(255, 0, 255))
-        output_video.write(frame)  # 将处理后的图像写入视频
-    output_video.release()  # 释放
-    cap.release()  # 释放
-    print(f'output dir is: {video_save_path}')
-    return video_save_path
 if __name__ == "__main__":
-    # 在这里填入视频文件路径
     ######
-    input_video_path = "test.mp4"
     ######
     # 输出文件夹，默认为系统的临时文件夹路径
-    outputPath = tempfile.mkdtemp()  # 创建临时文件夹用于存储输出视频
     # 加载yoloV8模型权重
     model = YOLO("yolov8n.pt")
-    # 需要跟踪的物体类别，model.names返回模型所支持的所有物体类别
     # yoloV8官方模型的第一个类别为'person'
     detect_class = 0
-    print(f"detecting {model.names[detect_class]}")
-    # 加载deepsort模型权重
     tracker = ds.DeepSort("deep_sort/deep_sort/deep/checkpoint/ckpt.t7")
-    processVideo(input_video_path)

 import tempfile
 from pathlib import Path
+import numpy as np
+import cv2 # opencv-python
+from ultralytics import YOLO
 import deep_sort.deep_sort.deep_sort as ds
 def putTextWithBackground(img, text, origin, font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1, text_color=(255, 255, 255), bg_color=(0, 0, 0), thickness=1):
     # 在矩形上绘制文本
     text_origin = (origin[0], origin[1] - 5)  # 从左上角的位置减去5来留出一些边距
     cv2.putText(img, text, text_origin, font, font_scale, text_color, thickness, lineType=cv2.LINE_AA)
+def extract_detections(results, detect_class):
+    """
+    从模型结果中提取和处理检测信息。
+    - results: YoloV8模型预测结果，包含检测到的物体的位置、类别和置信度等信息。
+    - detect_class: 需要提取的目标类别的索引。
+    参考: https://docs.ultralytics.com/modes/predict/#working-with-results
+    """
+    # 初始化一个空的二维numpy数组，用于存放检测到的目标的位置信息
+    # 如果视频中没有需要提取的目标类别，如果不初始化，会导致tracker报错
+    detections = np.empty((0, 4))
+    confarray = [] # 初始化一个空列表，用于存放检测到的目标的置信度。
+    # 遍历检测结果
+    # 参考：https://docs.ultralytics.com/modes/predict/#working-with-results
+    for r in results:
+        for box in r.boxes:
+            # 如果检测到的目标类别与指定的目标类别相匹配，提取目标的位置信息和置信度
+            if box.cls[0].int() == detect_class:
+                x1, y1, x2, y2 = box.xywh[0].int().tolist() # 提取目标的位置信息，并从tensor转换为整数列表。
+                conf = round(box.conf[0].item(), 2) # 提取目标的置信度，从tensor中取出浮点数结果，并四舍五入到小数点后两位。
+                detections = np.vstack((detections, np.array([x1, y1, x2, y2]))) # 将目标的位置信息添加到detections数组中。
+                confarray.append(conf) # 将目标的置信度添加到confarray列表中。
+    return detections, confarray # 返回提取出的位置信息和置信度。
 # 视频处理
+def detect_and_track(input_path: str, output_path: str, detect_class: int, model, tracker) -> Path:
+    """
+    处理视频，检测并跟踪目标。
+    - input_path: 输入视频文件的路径。
+    - output_path: 处理后视频保存的路径。
+    - detect_class: 需要检测和跟踪的目标类别的索引。
+    - model: 用于目标检测的模型。
+    - tracker: 用于目标跟踪的模型。
     """
+    cap = cv2.VideoCapture(input_path)  # 使用OpenCV打开视频文件。
+    if not cap.isOpened():  # 检查视频文件是否成功打开。
+        print(f"Error opening video file {input_path}")
+        return None
     fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频的帧率
+    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) # 获取视频的分辨率（宽度和高度）。
+    output_video_path = Path(output_path) / "output.avi" # 设置输出视频的保存路径。
+    # 设置视频编码格式为XVID格式的avi文件
     # 如果需要使用h264编码或者需要保存为其他格式，可能需要下载openh264-1.8.0
     # 下载地址：https://github.com/cisco/openh264/releases/tag/v1.8.0
     # 下载完成后将dll文件放在当前文件夹内
     fourcc = cv2.VideoWriter_fourcc(*"XVID")
+    output_video = cv2.VideoWriter(output_video_path.as_posix(), fourcc, fps, size, isColor=True) # 创建一个VideoWriter对象用于写视频。
     # 对每一帧图片进行读取和处理
     while True:
+        success, frame = cap.read() # 逐帧读取视频。
+        # 如果读取失败（或者视频已处理完毕），则跳出循环。
         if not (success):
             break
+        # 使用YoloV8模型对当前帧进行目标检测。
         results = model(frame, stream=True)
+        # 从预测结果中提取检测信息。
+        detections, confarray = extract_detections(results, detect_class)
+        # 使用deepsort模型对检测到的目标进行跟踪。
         resultsTracker = tracker.update(detections, confarray, frame)
         for x1, y1, x2, y2, Id in resultsTracker:
+            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) # 将位置信息转换为整数。
+            # 绘制bounding box和文本
             cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
             putTextWithBackground(frame, str(int(Id)), (max(-10, x1), max(40, y1)), font_scale=1.5, text_color=(255, 255, 255), bg_color=(255, 0, 255))
+        output_video.write(frame)  # 将处理后的帧写入到输出视频文件中。
+    output_video.release()  # 释放VideoWriter对象。
+    cap.release()  # 释放视频文件。
+    print(f'output dir is: {output_video_path}')
+    return output_video_path
 if __name__ == "__main__":
+    # 指定输入视频的路径。
     ######
+    input_path = "test.mp4"
     ######
     # 输出文件夹，默认为系统的临时文件夹路径
+    output_path = tempfile.mkdtemp()  # 创建一个临时目录用于存放输出视频。
     # 加载yoloV8模型权重
     model = YOLO("yolov8n.pt")
+    # 设置需要检测和跟踪的目标类别
     # yoloV8官方模型的第一个类别为'person'
     detect_class = 0
+    print(f"detecting {model.names[detect_class]}") # model.names返回模型所支持的所有物体类别
+    # 加载DeepSort模型
     tracker = ds.DeepSort("deep_sort/deep_sort/deep/checkpoint/ckpt.t7")
+    detect_and_track(input_path, output_path, detect_class, model, tracker)

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ torch
 matplotlib
 # WebUI ---------------------------------------
 # gradio

 matplotlib
 # WebUI ---------------------------------------
+# tqdm
 # gradio

webui.png CHANGED Viewed

Git LFS Details

SHA256: c14e7d199c011d1cf6ba0897854cc983b4c0816071515debd21355b606d1a26e
Pointer size: 132 Bytes
Size of remote file: 1.15 MB