{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "YOLOv5 Tutorial", "provenance": [], "collapsed_sections": [], "toc_visible": true, "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "2e915d9016c846e095e382b6a02ee773": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_cb7fc3a5c6cc4fde8d2c83e594a7c86e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_ac3edef4e3434f4587e6cbf8aa048770", "IPY_MODEL_853ac234cc2a4236946fc516871e10eb" ] } }, "cb7fc3a5c6cc4fde8d2c83e594a7c86e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ac3edef4e3434f4587e6cbf8aa048770": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_13842ca90c0047e584b8d68d99dad2b1", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 818322941, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 818322941, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f454999c3a924c7bad0746fb453dec36" } }, "853ac234cc2a4236946fc516871e10eb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_f94a7ca8c1f04761bf38fdc5f99664b8", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 780M/780M [03:59<00:00, 3.42MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_9da1a23b042c41618dd14b0e30aa7cbe" } }, "13842ca90c0047e584b8d68d99dad2b1": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "f454999c3a924c7bad0746fb453dec36": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f94a7ca8c1f04761bf38fdc5f99664b8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "9da1a23b042c41618dd14b0e30aa7cbe": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6ff8a710ded44391a624dec5c460b771": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_3c19729b51cd45d4848035da06e96ff8", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_23b2f0ae3d46438c8de375987c77f580", "IPY_MODEL_dd9498c321a9422da6faf17a0be026d4" ] } }, "3c19729b51cd45d4848035da06e96ff8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "23b2f0ae3d46438c8de375987c77f580": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_d8dda4b2ce864fd682e558b9a48f602e", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 6984509, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 6984509, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_ff8151449e444a14869684212b9ab14e" } }, "dd9498c321a9422da6faf17a0be026d4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_0f84fe609bcf4aa9afdc32a8cf076909", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 6.66M/6.66M [00:01<00:00, 6.08MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_8fda673769984e2b928ef820d34c85c3" } }, "d8dda4b2ce864fd682e558b9a48f602e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "ff8151449e444a14869684212b9ab14e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0f84fe609bcf4aa9afdc32a8cf076909": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "8fda673769984e2b928ef820d34c85c3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "t6MPjfT5NrKQ" }, "source": [ "\n", "\n", "\n", "This is the **official YOLOv5 🚀 notebook** authored by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). \n", "For more information please visit https://github.com/ultralytics/yolov5 and https://ultralytics.com. Thank you!" ] }, { "cell_type": "markdown", "metadata": { "id": "7mGmQbAO5pQb" }, "source": [ "# Setup\n", "\n", "Clone repo, install dependencies and check PyTorch and GPU." ] }, { "cell_type": "code", "metadata": { "id": "wbvMlHd_QwMG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ada1dd8d-e0aa-4858-e893-dc320319ca30" }, "source": [ "!git clone https://github.com/ultralytics/yolov5 # clone repo\n", "%cd yolov5\n", "%pip install -qr requirements.txt # install dependencies\n", "\n", "import torch\n", "from IPython.display import Image, clear_output # to display images\n", "\n", "clear_output()\n", "print(f\"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})\")" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Setup complete. Using torch 1.9.0+cu102 (Tesla V100-SXM2-16GB)\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "4JnkELT0cIJg" }, "source": [ "# 1. Inference\n", "\n", "`detect.py` runs YOLOv5 inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/detect`. Example inference sources are:\n", "\n", " " ] }, { "cell_type": "code", "metadata": { "id": "zR9ZbuQCH7FX", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a7a37616-a82b-4bdb-a463-6ead850b5615" }, "source": [ "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images/\n", "Image(filename='runs/detect/exp/zidane.jpg', width=600)" ], "execution_count": 9, "outputs": [ { "output_type": "stream", "text": [ "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['yolov5s.pt'], source=data/images/, imgsz=640, conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False\n", "YOLOv5 🚀 v5.0-330-g18f6ba7 torch 1.9.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n", "\n", "Fusing layers... \n", "Model Summary: 224 layers, 7266973 parameters, 0 gradients\n", "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, 1 fire hydrant, Done. (0.008s)\n", "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.008s)\n", "Results saved to runs/detect/exp\n", "Done. (0.091s)\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "hkAzDWJ7cWTr" }, "source": [ "        \n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "0eq1SMWl6Sfn" }, "source": [ "# 2. Validate\n", "Validate a model's accuracy on [COCO](https://cocodataset.org/#home) val or test-dev datasets. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag. Note that `pycocotools` metrics may be ~1% better than the equivalent repo metrics, as is visible below, due to slight differences in mAP computation." ] }, { "cell_type": "markdown", "metadata": { "id": "eyTZYGgRjnMc" }, "source": [ "## COCO val2017\n", "Download [COCO val 2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L14) dataset (1GB - 5000 images), and test model accuracy." ] }, { "cell_type": "code", "metadata": { "id": "WQPtK1QYVaD_", "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "2e915d9016c846e095e382b6a02ee773", "cb7fc3a5c6cc4fde8d2c83e594a7c86e", "ac3edef4e3434f4587e6cbf8aa048770", "853ac234cc2a4236946fc516871e10eb", "13842ca90c0047e584b8d68d99dad2b1", "f454999c3a924c7bad0746fb453dec36", "f94a7ca8c1f04761bf38fdc5f99664b8", "9da1a23b042c41618dd14b0e30aa7cbe" ] }, "outputId": "3606f305-aa67-43fd-d5d6-93d1f311768c" }, "source": [ "# Download COCO val2017\n", "torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017val.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip" ], "execution_count": 10, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e915d9016c846e095e382b6a02ee773", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=818322941.0), HTML(value='')))" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "X58w8JLpMnjH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "20fbc423-f536-43ff-e70b-3acf6aeade99" }, "source": [ "# Run YOLOv5x on COCO val2017\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half" ], "execution_count": 11, "outputs": [ { "output_type": "stream", "text": [ "\u001b[34m\u001b[1mval: \u001b[0mdata=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True\n", "YOLOv5 🚀 v5.0-330-g18f6ba7 torch 1.9.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n", "\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5x.pt to yolov5x.pt...\n", "100% 168M/168M [00:05<00:00, 31.9MB/s]\n", "\n", "Fusing layers... \n", "Model Summary: 476 layers, 87730285 parameters, 0 gradients\n", "\u001b[34m\u001b[1mval: \u001b[0mScanning '../datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2653.03it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../datasets/coco/val2017.cache\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:18<00:00, 2.00it/s]\n", " all 5000 36335 0.746 0.626 0.68 0.49\n", "Speed: 0.1ms pre-process, 5.1ms inference, 1.5ms NMS per image at shape (32, 3, 640, 640)\n", "\n", "Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json...\n", "loading annotations into memory...\n", "Done (t=0.44s)\n", "creating index...\n", "index created!\n", "Loading and preparing results...\n", "DONE (t=4.82s)\n", "creating index...\n", "index created!\n", "Running per image evaluation...\n", "Evaluate annotation type *bbox*\n", "DONE (t=84.52s).\n", "Accumulating evaluation results...\n", "DONE (t=13.82s).\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.504\n", " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.688\n", " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.546\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.351\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.551\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.644\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.382\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.629\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.681\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.524\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.735\n", " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.827\n", "Results saved to runs/val/exp\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "rc_KbFk0juX2" }, "source": [ "## COCO test-dev2017\n", "Download [COCO test2017](https://github.com/ultralytics/yolov5/blob/74b34872fdf41941cddcf243951cdb090fbac17b/data/coco.yaml#L15) dataset (7GB - 40,000 images), to test model accuracy on test-dev set (**20,000 images, no labels**). Results are saved to a `*.json` file which should be **zipped** and submitted to the evaluation server at https://competitions.codalab.org/competitions/20794." ] }, { "cell_type": "code", "metadata": { "id": "V0AJnSeCIHyJ" }, "source": [ "# Download COCO test-dev2017\n", "torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../ && rm tmp.zip # unzip labels\n", "!f=\"test2017.zip\" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 7GB, 41k images\n", "%mv ./test2017 ../coco/images # move to /coco" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "29GJXAP_lPrt" }, "source": [ "# Run YOLOv5s on COCO test-dev2017 using --task test\n", "!python val.py --weights yolov5s.pt --data coco.yaml --task test" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "VUOiNLtMP5aG" }, "source": [ "# 3. Train\n", "\n", "Download [COCO128](https://www.kaggle.com/ultralytics/coco128), a small 128-image tutorial dataset, start tensorboard and train YOLOv5s from a pretrained checkpoint for 3 epochs (note actual training is typically much longer, around **300-1000 epochs**, depending on your dataset)." ] }, { "cell_type": "code", "metadata": { "id": "Knxi2ncxWffW", "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "6ff8a710ded44391a624dec5c460b771", "3c19729b51cd45d4848035da06e96ff8", "23b2f0ae3d46438c8de375987c77f580", "dd9498c321a9422da6faf17a0be026d4", "d8dda4b2ce864fd682e558b9a48f602e", "ff8151449e444a14869684212b9ab14e", "0f84fe609bcf4aa9afdc32a8cf076909", "8fda673769984e2b928ef820d34c85c3" ] }, "outputId": "4510c6b0-8d2a-436c-d3f4-c8f8470d913a" }, "source": [ "# Download COCO128\n", "torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../ && rm tmp.zip" ], "execution_count": 12, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6ff8a710ded44391a624dec5c460b771", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=6984509.0), HTML(value='')))" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "_pOkGLv1dMqh" }, "source": [ "Train a YOLOv5s model on [COCO128](https://www.kaggle.com/ultralytics/coco128) with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and **COCO, COCO128, and VOC datasets are downloaded automatically** on first use.\n", "\n", "All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.\n" ] }, { "cell_type": "code", "metadata": { "id": "bOy5KI2ncnWd" }, "source": [ "# Tensorboard (optional)\n", "%load_ext tensorboard\n", "%tensorboard --logdir runs/train" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "2fLAV42oNb7M" }, "source": [ "# Weights & Biases (optional)\n", "%pip install -q wandb\n", "import wandb\n", "wandb.login()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "1NcFxRcFdJ_O", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "cd8ac17d-19a8-4e87-ab6a-31af1edac1ef" }, "source": [ "# Train YOLOv5s on COCO128 for 3 epochs\n", "!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache" ], "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ "\u001b[34m\u001b[1mtrain: \u001b[0mweights=yolov5s.pt, cfg=, data=coco128.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache_images=True, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, entity=None, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias=latest, local_rank=-1\n", "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n", "YOLOv5 🚀 v5.0-330-g18f6ba7 torch 1.9.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n", "\n", "\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n", "\u001b[34m\u001b[1mWeights & Biases: \u001b[0mrun 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)\n", "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/\n", "2021-07-29 22:56:52.096481: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", "\n", "WARNING: Dataset not found, nonexistent paths: ['/content/datasets/coco128/images/train2017']\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip ...\n", "100% 6.66M/6.66M [00:00<00:00, 44.0MB/s]\n", "Dataset autodownload success\n", "\n", "\n", " from n params module arguments \n", " 0 -1 1 3520 models.common.Focus [3, 32, 3] \n", " 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n", " 2 -1 1 18816 models.common.C3 [64, 64, 1] \n", " 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n", " 4 -1 1 156928 models.common.C3 [128, 128, 3] \n", " 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n", " 6 -1 1 625152 models.common.C3 [256, 256, 3] \n", " 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n", " 8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]] \n", " 9 -1 1 1182720 models.common.C3 [512, 512, 1, False] \n", " 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n", " 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 12 [-1, 6] 1 0 models.common.Concat [1] \n", " 13 -1 1 361984 models.common.C3 [512, 256, 1, False] \n", " 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] \n", " 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 16 [-1, 4] 1 0 models.common.Concat [1] \n", " 17 -1 1 90880 models.common.C3 [256, 128, 1, False] \n", " 18 -1 1 147712 models.common.Conv [128, 128, 3, 2] \n", " 19 [-1, 14] 1 0 models.common.Concat [1] \n", " 20 -1 1 296448 models.common.C3 [256, 256, 1, False] \n", " 21 -1 1 590336 models.common.Conv [256, 256, 3, 2] \n", " 22 [-1, 10] 1 0 models.common.Concat [1] \n", " 23 -1 1 1182720 models.common.C3 [512, 512, 1, False] \n", " 24 [17, 20, 23] 1 229245 models.yolo.Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]\n", "Model Summary: 283 layers, 7276605 parameters, 7276605 gradients, 17.1 GFLOPs\n", "\n", "Transferred 362/362 items from yolov5s.pt\n", "Scaled weight_decay = 0.0005\n", "\u001b[34m\u001b[1moptimizer:\u001b[0m SGD with parameter groups 59 weight, 62 weight (no decay), 62 bias\n", "\u001b[34m\u001b[1malbumentations: \u001b[0mversion 1.0.3 required by YOLOv5, but version 0.1.12 is currently installed\n", "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../datasets/coco128/labels/train2017' images and labels...128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2021.98it/s]\n", "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: ../datasets/coco128/labels/train2017.cache\n", "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 273.58it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mScanning '../datasets/coco128/labels/train2017.cache' images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 506004.63it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:01<00:00, 121.71it/s]\n", "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", "[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", "Plotting labels... \n", "\n", "\u001b[34m\u001b[1mautoanchor: \u001b[0mAnalyzing anchors... anchors/target = 4.27, Best Possible Recall (BPR) = 0.9935\n", "Image sizes 640 train, 640 val\n", "Using 2 dataloader workers\n", "Logging results to runs/train/exp\n", "Starting training for 3 epochs...\n", "\n", " Epoch gpu_mem box obj cls labels img_size\n", " 0/2 3.64G 0.0441 0.06646 0.02229 290 640: 100% 8/8 [00:04<00:00, 1.93it/s]\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:01<00:00, 3.45it/s]\n", " all 128 929 0.696 0.562 0.644 0.419\n", "\n", " Epoch gpu_mem box obj cls labels img_size\n", " 1/2 5.04G 0.04573 0.06289 0.021 226 640: 100% 8/8 [00:01<00:00, 5.46it/s]\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:01<00:00, 3.16it/s]\n", " all 128 929 0.71 0.567 0.654 0.424\n", "\n", " Epoch gpu_mem box obj cls labels img_size\n", " 2/2 5.04G 0.04542 0.0715 0.02028 242 640: 100% 8/8 [00:01<00:00, 5.12it/s]\n", " Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:02<00:00, 1.46it/s]\n", " all 128 929 0.731 0.563 0.658 0.427\n", "3 epochs completed in 0.006 hours.\n", "\n", "Optimizer stripped from runs/train/exp/weights/last.pt, 14.8MB\n", "Optimizer stripped from runs/train/exp/weights/best.pt, 14.8MB\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "15glLzbQx5u0" }, "source": [ "# 4. Visualize" ] }, { "cell_type": "markdown", "metadata": { "id": "DLI1JmHU7B0l" }, "source": [ "## Weights & Biases Logging 🌟 NEW\n", "\n", "[Weights & Biases](https://wandb.ai/site?utm_campaign=repo_yolo_notebook) (W&B) is now integrated with YOLOv5 for real-time visualization and cloud logging of training runs. This allows for better run comparison and introspection, as well improved visibility and collaboration for teams. To enable W&B `pip install wandb`, and then train normally (you will be guided through setup on first use). \n", "\n", "During training you will see live updates at [https://wandb.ai/home](https://wandb.ai/home?utm_campaign=repo_yolo_notebook), and you can create and share detailed [Reports](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY) of your results. For more information see the [YOLOv5 Weights & Biases Tutorial](https://github.com/ultralytics/yolov5/issues/1289). \n", "\n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "-WPvRbS5Swl6" }, "source": [ "## Local Logging\n", "\n", "All results are logged by default to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc. View train and val jpgs to see mosaics, labels, predictions and augmentation effects. Note an Ultralytics **Mosaic Dataloader** is used for training (shown below), which combines 4 images into 1 mosaic during training.\n", "\n", "> \n", "`train_batch0.jpg` shows train batch 0 mosaics and labels\n", "\n", "> \n", "`test_batch0_labels.jpg` shows val batch 0 labels\n", "\n", "> \n", "`test_batch0_pred.jpg` shows val batch 0 _predictions_\n", "\n", "Training results are automatically logged to [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) as `results.csv`, which is plotted as `results.png` (below) after training completes. You can also plot any `results.csv` file manually:\n", "\n", "```python\n", "from utils.plots import plot_results \n", "plot_results('path/to/results.csv') # plot 'results.csv' as 'results.png'\n", "```\n", "\n", "\"COCO128" ] }, { "cell_type": "markdown", "metadata": { "id": "Zelyeqbyt3GD" }, "source": [ "# Environments\n", "\n", "YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n", "\n", "- **Google Colab and Kaggle** notebooks with free GPU: \"Open \"Open\n", "- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)\n", "- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)\n", "- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) \"Docker\n" ] }, { "cell_type": "markdown", "metadata": { "id": "6Qu7Iesl0p54" }, "source": [ "# Status\n", "\n", "![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)\n", "\n", "If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "IEijrePND_2I" }, "source": [ "# Appendix\n", "\n", "Optional extras below. Unit tests validate repo functionality and should be run on any PRs submitted.\n" ] }, { "cell_type": "code", "metadata": { "id": "mcKoSIK2WSzj" }, "source": [ "# Reproduce\n", "for x in 'yolov5s', 'yolov5m', 'yolov5l', 'yolov5x':\n", " !python val.py --weights {x}.pt --data coco.yaml --img 640 --conf 0.25 --iou 0.45 # speed\n", " !python val.py --weights {x}.pt --data coco.yaml --img 640 --conf 0.001 --iou 0.65 # mAP" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "GMusP4OAxFu6" }, "source": [ "# PyTorch Hub\n", "import torch\n", "\n", "# Model\n", "model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n", "\n", "# Images\n", "dir = 'https://ultralytics.com/images/'\n", "imgs = [dir + f for f in ('zidane.jpg', 'bus.jpg')] # batch of images\n", "\n", "# Inference\n", "results = model(imgs)\n", "results.print() # or .show(), .save()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "FGH0ZjkGjejy" }, "source": [ "# Unit tests\n", "%%shell\n", "export PYTHONPATH=\"$PWD\" # to run *.py. files in subdirectories\n", "\n", "rm -rf runs # remove runs/\n", "for m in yolov5s; do # models\n", " python train.py --weights $m.pt --epochs 3 --img 320 --device 0 # train pretrained\n", " python train.py --weights '' --cfg $m.yaml --epochs 3 --img 320 --device 0 # train scratch\n", " for d in 0 cpu; do # devices\n", " python detect.py --weights $m.pt --device $d # detect official\n", " python detect.py --weights runs/train/exp/weights/best.pt --device $d # detect custom\n", " python val.py --weights $m.pt --device $d # val official\n", " python val.py --weights runs/train/exp/weights/best.pt --device $d # val custom\n", " done\n", " python hubconf.py # hub\n", " python models/yolo.py --cfg $m.yaml # inspect\n", " python export.py --weights $m.pt --img 640 --batch 1 # export\n", "done" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "gogI-kwi3Tye" }, "source": [ "# Profile\n", "from utils.torch_utils import profile\n", "\n", "m1 = lambda x: x * torch.sigmoid(x)\n", "m2 = torch.nn.SiLU()\n", "results = profile(input=torch.randn(16, 3, 640, 640), ops=[m1, m2], n=100)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "RVRSOhEvUdb5" }, "source": [ "# Evolve\n", "!python train.py --img 640 --batch 64 --epochs 100 --data coco128.yaml --weights yolov5s.pt --cache --noautoanchor --evolve\n", "!d=runs/train/evolve && cp evolve.* $d && zip -r evolve.zip $d && gsutil mv evolve.zip gs://bucket # upload results (optional)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "BSgFCAcMbk1R" }, "source": [ "# VOC\n", "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n", " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}" ], "execution_count": null, "outputs": [] } ] }