{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "YOLOv5 Tutorial", "provenance": [], "collapsed_sections": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "eb95db7cae194218b3fcefb439b6352f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_769ecde6f2e64bacb596ce972f8d3d2d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_384a001876054c93b0af45cd1e960bfe", "IPY_MODEL_dded0aeae74440f7ba2ffa0beb8dd612", "IPY_MODEL_5296d28be75740b2892ae421bbec3657" ] } }, "769ecde6f2e64bacb596ce972f8d3d2d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "384a001876054c93b0af45cd1e960bfe": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_9f09facb2a6c4a7096810d327c8b551c", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_25621cff5d16448cb7260e839fd0f543" } }, "dded0aeae74440f7ba2ffa0beb8dd612": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_0ce7164fc0c74bb9a2b5c7037375a727", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 818322941, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 818322941, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c4c4593c10904cb5b8a5724d60c7e181" } }, "5296d28be75740b2892ae421bbec3657": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_473371611126476c88d5d42ec7031ed6", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 780M/780M [00:11<00:00, 91.9MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_65efdfd0d26c46e79c8c5ff3b77126cc" } }, "9f09facb2a6c4a7096810d327c8b551c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "25621cff5d16448cb7260e839fd0f543": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0ce7164fc0c74bb9a2b5c7037375a727": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "c4c4593c10904cb5b8a5724d60c7e181": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "473371611126476c88d5d42ec7031ed6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "65efdfd0d26c46e79c8c5ff3b77126cc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "id": "t6MPjfT5NrKQ" }, "source": [ "\n", "\n", "\n", "This is the **official YOLOv5 🚀 notebook** by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). \n", "For more information please visit https://github.com/ultralytics/yolov5 and https://ultralytics.com. Thank you!" ] }, { "cell_type": "markdown", "metadata": { "id": "7mGmQbAO5pQb" }, "source": [ "# Setup\n", "\n", "Clone repo, install dependencies and check PyTorch and GPU." ] }, { "cell_type": "code", "metadata": { "id": "wbvMlHd_QwMG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3809e5a9-dd41-4577-fe62-5531abf7cca2" }, "source": [ "!git clone https://github.com/ultralytics/yolov5 # clone\n", "%cd yolov5\n", "%pip install -qr requirements.txt # install\n", "\n", "import torch\n", "from yolov5 import utils\n", "display = utils.notebook_init() # checks" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 42.2/166.8 GB disk)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "4JnkELT0cIJg" }, "source": [ "# 1. Inference\n", "\n", "`detect.py` runs YOLOv5 inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/detect`. Example inference sources are:\n", "\n", "```shell\n", "python detect.py --source 0 # webcam\n", " img.jpg # image \n", " vid.mp4 # video\n", " path/ # directory\n", " path/*.jpg # glob\n", " 'https://youtu.be/Zgi9g1ksQHc' # YouTube\n", " 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n", "```" ] }, { "cell_type": "code", "metadata": { "id": "zR9ZbuQCH7FX", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "8f7e6588-215d-4ebd-93af-88b871e770a7" }, "source": [ "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n", "display.Image(filename='runs/detect/exp/zidane.jpg', width=600)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov5s.pt'], source=data/images, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False\n", "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "\n", "Fusing layers... \n", "Model Summary: 213 layers, 7225885 parameters, 0 gradients\n", "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, Done. (0.007s)\n", "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, Done. (0.007s)\n", "Speed: 0.5ms pre-process, 6.9ms inference, 1.3ms NMS per image at shape (1, 3, 640, 640)\n", "Results saved to \u001b[1mruns/detect/exp\u001b[0m\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "hkAzDWJ7cWTr" }, "source": [ " \n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "0eq1SMWl6Sfn" }, "source": [ "# 2. Validate\n", "Validate a model's accuracy on [COCO](https://cocodataset.org/#home) val or test-dev datasets. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag. Note that `pycocotools` metrics may be ~1% better than the equivalent repo metrics, as is visible below, due to slight differences in mAP computation." ] }, { "cell_type": "markdown", "metadata": { "id": "eyTZYGgRjnMc" }, "source": [ "## COCO val\n", "Download [COCO val 2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L14) dataset (1GB - 5000 images), and test model accuracy." ] }, { "cell_type": "code", "metadata": { "id": "WQPtK1QYVaD_", "colab": { "base_uri": "https://localhost:8080/", "height": 48, "referenced_widgets": [ "eb95db7cae194218b3fcefb439b6352f", "769ecde6f2e64bacb596ce972f8d3d2d", "384a001876054c93b0af45cd1e960bfe", "dded0aeae74440f7ba2ffa0beb8dd612", "5296d28be75740b2892ae421bbec3657", "9f09facb2a6c4a7096810d327c8b551c", "25621cff5d16448cb7260e839fd0f543", "0ce7164fc0c74bb9a2b5c7037375a727", "c4c4593c10904cb5b8a5724d60c7e181", "473371611126476c88d5d42ec7031ed6", "65efdfd0d26c46e79c8c5ff3b77126cc" ] }, "outputId": "bcf9a448-1f9b-4a41-ad49-12f181faf05a" }, "source": [ "# Download COCO val\n", "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "eb95db7cae194218b3fcefb439b6352f", "version_minor": 0, "version_major": 2 }, "text/plain": [ " 0%| | 0.00/780M [00:00, ?B/s]" ] }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "id": "X58w8JLpMnjH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "74f1dfa9-6b6d-4b36-f67e-bbae243869f9" }, "source": [ "# Run YOLOv5x on COCO val\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[34m\u001b[1mval: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True\n", "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5x.pt to yolov5x.pt...\n", "100% 166M/166M [00:03<00:00, 54.1MB/s]\n", "\n", "Fusing layers... \n", "Model Summary: 444 layers, 86705005 parameters, 0 gradients\n", "\u001b[34m\u001b[1mval: \u001b[0mScanning '../datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2636.64it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../datasets/coco/val2017.cache\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:12<00:00, 2.17it/s]\n", " all 5000 36335 0.729 0.63 0.683 0.496\n", "Speed: 0.1ms pre-process, 4.9ms inference, 1.9ms NMS per image at shape (32, 3, 640, 640)\n", "\n", "Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json...\n", "loading annotations into memory...\n", "Done (t=0.46s)\n", "creating index...\n", "index created!\n", "Loading and preparing results...\n", "DONE (t=5.15s)\n", "creating index...\n", "index created!\n", "Running per image evaluation...\n", "Evaluate annotation type *bbox*\n", "DONE (t=90.39s).\n", "Accumulating evaluation results...\n", "DONE (t=14.54s).\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.507\n", " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.689\n", " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.552\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.345\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.559\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.652\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.381\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.630\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.682\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.526\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.732\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.829\n", "Results saved to \u001b[1mruns/val/exp\u001b[0m\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "rc_KbFk0juX2" }, "source": [ "## COCO test\n", "Download [COCO test2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L15) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794." ] }, { "cell_type": "code", "metadata": { "id": "V0AJnSeCIHyJ" }, "source": [ "# Download COCO test-dev2017\n", "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017labels.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip\n", "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "29GJXAP_lPrt" }, "source": [ "# Run YOLOv5x on COCO test\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half --task test" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ZY2VXXXu74w5" }, "source": [ "# 3. Train\n", "\n", "
\n", "Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n", "