{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "YOLOv5 Tutorial", "provenance": [], "collapsed_sections": [], "machine_shape": "hm", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "d90eeb56398f458086e3b2b41dbd9fec": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d91d8347f17349a4987cea29eac0a49c", "IPY_MODEL_8f4ffda703ac4348ab7edf1d12a188e1", "IPY_MODEL_8c2d91f564de45f8a403386eeeccac27" ], "layout": "IPY_MODEL_5dd95d3eda8b49f7910620edcdcbdcdc" } }, "d91d8347f17349a4987cea29eac0a49c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_520e5b7e80eb450188261cffbc574d25", "placeholder": "", "style": "IPY_MODEL_3cef138c5f7743858bb0f87b65dd3c76", "value": "100%" } }, "8f4ffda703ac4348ab7edf1d12a188e1": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c3782c6dda80400ba7f8c5345624bf87", "max": 818322941, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_11415bab172a4904b73e29ff60f6fce1", "value": 818322941 } }, "8c2d91f564de45f8a403386eeeccac27": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_eac18040908042dbae67a47d23e95b47", "placeholder": "", "style": "IPY_MODEL_e0fc1d6eb478469c9098aa9518d7b358", "value": " 780M/780M [01:17<00:00, 17.7MB/s]" } }, "5dd95d3eda8b49f7910620edcdcbdcdc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "520e5b7e80eb450188261cffbc574d25": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3cef138c5f7743858bb0f87b65dd3c76": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c3782c6dda80400ba7f8c5345624bf87": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11415bab172a4904b73e29ff60f6fce1": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "eac18040908042dbae67a47d23e95b47": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e0fc1d6eb478469c9098aa9518d7b358": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "id": "t6MPjfT5NrKQ" }, "source": [ "\n", "\n", "\n", "This is the **official YOLOv5 🚀 notebook** by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). \n", "For more information please visit https://github.com/ultralytics/yolov5 and https://ultralytics.com. Thank you!" ] }, { "cell_type": "markdown", "metadata": { "id": "7mGmQbAO5pQb" }, "source": [ "# Setup\n", "\n", "Clone repo, install dependencies and check PyTorch and GPU." ] }, { "cell_type": "code", "metadata": { "id": "wbvMlHd_QwMG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ebf225bd-e109-4dbd-8561-3b15514ca47c" }, "source": [ "!git clone https://github.com/ultralytics/yolov5 # clone\n", "%cd yolov5\n", "%pip install -qr requirements.txt # install\n", "\n", "import torch\n", "import utils\n", "display = utils.notebook_init() # checks" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "YOLOv5 🚀 v6.1-174-gc4cb7c6 torch 1.11.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Setup complete ✅ (8 CPUs, 51.0 GB RAM, 38.2/166.8 GB disk)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "4JnkELT0cIJg" }, "source": [ "# 1. Inference\n", "\n", "`detect.py` runs YOLOv5 inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/detect`. Example inference sources are:\n", "\n", "```shell\n", "python detect.py --source 0 # webcam\n", " img.jpg # image \n", " vid.mp4 # video\n", " path/ # directory\n", " path/*.jpg # glob\n", " 'https://youtu.be/Zgi9g1ksQHc' # YouTube\n", " 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n", "```" ] }, { "cell_type": "code", "metadata": { "id": "zR9ZbuQCH7FX", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2f43338d-f533-4277-ef9f-b37b565e2702" }, "source": [ "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n", "display.Image(filename='runs/detect/exp/zidane.jpg', width=600)" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov5s.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False\n", "YOLOv5 🚀 v6.1-174-gc4cb7c6 torch 1.11.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt to yolov5s.pt...\n", "100% 14.1M/14.1M [00:00<00:00, 220MB/s]\n", "\n", "Fusing layers... \n", "YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients\n", "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, Done. (0.012s)\n", "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.013s)\n", "Speed: 0.5ms pre-process, 12.5ms inference, 17.3ms NMS per image at shape (1, 3, 640, 640)\n", "Results saved to \u001b[1mruns/detect/exp\u001b[0m\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "hkAzDWJ7cWTr" }, "source": [ " \n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "0eq1SMWl6Sfn" }, "source": [ "# 2. Validate\n", "Validate a model's accuracy on [COCO](https://cocodataset.org/#home) val or test-dev datasets. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag. Note that `pycocotools` metrics may be ~1% better than the equivalent repo metrics, as is visible below, due to slight differences in mAP computation." ] }, { "cell_type": "markdown", "metadata": { "id": "eyTZYGgRjnMc" }, "source": [ "## COCO val\n", "Download [COCO val 2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L14) dataset (1GB - 5000 images), and test model accuracy." ] }, { "cell_type": "code", "metadata": { "id": "WQPtK1QYVaD_", "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "d90eeb56398f458086e3b2b41dbd9fec", "d91d8347f17349a4987cea29eac0a49c", "8f4ffda703ac4348ab7edf1d12a188e1", "8c2d91f564de45f8a403386eeeccac27", "5dd95d3eda8b49f7910620edcdcbdcdc", "520e5b7e80eb450188261cffbc574d25", "3cef138c5f7743858bb0f87b65dd3c76", "c3782c6dda80400ba7f8c5345624bf87", "11415bab172a4904b73e29ff60f6fce1", "eac18040908042dbae67a47d23e95b47", "e0fc1d6eb478469c9098aa9518d7b358" ] }, "outputId": "26f3c005-cc13-4b7c-8523-844b56a0b0e3" }, "source": [ "# Download COCO val\n", "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip" ], "execution_count": 3, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ " 0%| | 0.00/780M [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "d90eeb56398f458086e3b2b41dbd9fec" } }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "id": "X58w8JLpMnjH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c73097d6-02a8-43af-9962-ba6500b793ff" }, "source": [ "# Run YOLOv5x on COCO val\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[34m\u001b[1mval: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False\n", "YOLOv5 🚀 v6.1-174-gc4cb7c6 torch 1.11.0+cu113 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", "\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5x.pt to yolov5x.pt...\n", "100% 166M/166M [00:05<00:00, 33.5MB/s]\n", "\n", "Fusing layers... \n", "YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients\n", "Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...\n", "100% 755k/755k [00:00<00:00, 49.6MB/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mScanning '/content/datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupt: 100% 5000/5000 [00:00<00:00, 10667.19it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [00:58<00:00, 2.70it/s]\n", " all 5000 36335 0.743 0.626 0.683 0.496\n", "Speed: 0.1ms pre-process, 4.8ms inference, 1.2ms NMS per image at shape (32, 3, 640, 640)\n", "\n", "Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json...\n", "loading annotations into memory...\n", "Done (t=0.38s)\n", "creating index...\n", "index created!\n", "Loading and preparing results...\n", "DONE (t=5.42s)\n", "creating index...\n", "index created!\n", "Running per image evaluation...\n", "Evaluate annotation type *bbox*\n", "DONE (t=72.67s).\n", "Accumulating evaluation results...\n", "DONE (t=13.48s).\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.506\n", " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.688\n", " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.549\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.340\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.557\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.651\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.382\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.631\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.684\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.528\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.737\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.833\n", "Results saved to \u001b[1mruns/val/exp\u001b[0m\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "rc_KbFk0juX2" }, "source": [ "## COCO test\n", "Download [COCO test2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L15) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794." ] }, { "cell_type": "code", "metadata": { "id": "V0AJnSeCIHyJ" }, "source": [ "# Download COCO test-dev2017\n", "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017labels.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip\n", "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d ../datasets/coco/images" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "29GJXAP_lPrt" }, "source": [ "# Run YOLOv5x on COCO test\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half --task test" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ZY2VXXXu74w5" }, "source": [ "# 3. Train\n", "\n", "
\n", "Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n", "