{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard", "widgets": { "application/vnd.jupyter.widget-state+json": { "17e75f6149c14f8d9619f319bf9ee553": { "model_module": "@jupyter-widgets/output", "model_name": "OutputModel", "model_module_version": "1.0.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_cbb6deb9baf742e8a6765d0891f193c7", "msg_id": "", "outputs": [ { "output_type": "display_data", "data": { "text/plain": "\u001b[35m 93%\u001b[0m \u001b[38;2;249;38;114m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[38;2;249;38;114m╸\u001b[0m\u001b[38;5;237m━━━━\u001b[0m \u001b[32m9,314/10,000 \u001b[0m [ \u001b[33m0:00:02\u001b[0m < \u001b[36m0:00:01\u001b[0m , \u001b[31m3,340 it/s\u001b[0m ]\n", "text/html": "
  93% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸━━━━ 9,314/10,000  [ 0:00:02 < 0:00:01 , 3,340 it/s ]\n
\n" }, "metadata": {} } ] } }, "cbb6deb9baf742e8a6765d0891f193c7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6314513b30fd4f11ad8b59b0bcdee8d8": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_1966d341666441089910ba16f7ac169c", "IPY_MODEL_965ba6c8ce3d4c5bbe14c9a7192a221c", "IPY_MODEL_6f5377c55bfe4428805ed94034e68d6b", "IPY_MODEL_2d986d4e45d44a2b858390514b770a9d", "IPY_MODEL_3c532f73534240b19c47e2b943418e93" ], "layout": "IPY_MODEL_ac189f596cb343bd9a6ffcf1b158f56b" } }, "1966d341666441089910ba16f7ac169c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7c2c6b532db14e9bafcdb0beeb1c33bc", "placeholder": "​", "style": "IPY_MODEL_cd4b78c526cf4df88744348e711754da", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "965ba6c8ce3d4c5bbe14c9a7192a221c": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_506ae0904a434d15891a7c8ded20f2db", "placeholder": "​", "style": "IPY_MODEL_c7616f2c72f5492cba051c5fea08af09", "value": "" } }, "6f5377c55bfe4428805ed94034e68d6b": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_550b8e46070445ec8669b4d0bf4ec8a8", "style": "IPY_MODEL_435bcee2de28494cb3584f63b25a5f2e", "value": true } }, "2d986d4e45d44a2b858390514b770a9d": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_73e497fcd5644d21a00d37ee8dd9803e", "style": "IPY_MODEL_07a4cd80556d43d590a8673530c26538", "tooltip": "" } }, "3c532f73534240b19c47e2b943418e93": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b626797b5b0c4a91b8564b089ef219c2", "placeholder": "​", "style": "IPY_MODEL_a39d2452d85841c396c498f5f53ef48e", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "ac189f596cb343bd9a6ffcf1b158f56b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "7c2c6b532db14e9bafcdb0beeb1c33bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cd4b78c526cf4df88744348e711754da": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "506ae0904a434d15891a7c8ded20f2db": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c7616f2c72f5492cba051c5fea08af09": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "550b8e46070445ec8669b4d0bf4ec8a8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "435bcee2de28494cb3584f63b25a5f2e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "73e497fcd5644d21a00d37ee8dd9803e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "07a4cd80556d43d590a8673530c26538": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "b626797b5b0c4a91b8564b089ef219c2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a39d2452d85841c396c498f5f53ef48e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "#Install" ], "metadata": { "id": "59ZIMWD6yV2q" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "LDIiamp-xyyw", "outputId": "a4107dcd-ef6a-4a37-b150-3285488d7720" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "cmake is already the newest version (3.10.2-1ubuntu2.18.04.2).\n", "The following package was automatically installed and is no longer required:\n", " libnvidia-common-460\n", "Use 'apt autoremove' to remove it.\n", "Suggested packages:\n", " swig-doc swig-examples swig3.0-examples swig3.0-doc\n", "The following NEW packages will be installed:\n", " swig swig3.0\n", "0 upgraded, 2 newly installed, 0 to remove and 20 not upgraded.\n", "Need to get 1,100 kB of archives.\n", "After this operation, 5,822 kB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 swig3.0 amd64 3.0.12-1 [1,094 kB]\n", "Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 swig amd64 3.0.12-1 [6,460 B]\n", "Fetched 1,100 kB in 1s (844 kB/s)\n", "Selecting previously unselected package swig3.0.\n", "(Reading database ... 124016 files and directories currently installed.)\n", "Preparing to unpack .../swig3.0_3.0.12-1_amd64.deb ...\n", "Unpacking swig3.0 (3.0.12-1) ...\n", "Selecting previously unselected package swig.\n", "Preparing to unpack .../swig_3.0.12-1_amd64.deb ...\n", "Unpacking swig (3.0.12-1) ...\n", "Setting up swig3.0 (3.0.12-1) ...\n", "Setting up swig (3.0.12-1) ...\n", "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting stable-baselines3[extra]\n", " Downloading stable_baselines3-1.6.2-py3-none-any.whl (170 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m170.0/170.0 KB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting box2d\n", " Downloading Box2D-2.3.10-cp38-cp38-manylinux1_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting box2d-kengz\n", " Downloading Box2D-kengz-2.3.3.tar.gz (425 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m425.4/425.4 KB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting huggingface_sb3\n", " Downloading huggingface_sb3-2.2.4-py3-none-any.whl (9.4 kB)\n", "Collecting pyglet==1.5.1\n", " Downloading pyglet-1.5.1-py2.py3-none-any.whl (1.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m53.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.2.2)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.3.5)\n", "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.5.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.21.6)\n", "Collecting importlib-metadata~=4.13\n", " Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)\n", "Collecting gym==0.21\n", " Downloading gym-0.21.0.tar.gz (1.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: torch>=1.11 in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.13.0+cu116)\n", "Collecting rich\n", " Downloading rich-13.0.0-py3-none-any.whl (238 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m238.1/238.1 KB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting autorom[accept-rom-license]~=0.4.2\n", " Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (4.64.1)\n", "Collecting ale-py==0.7.4\n", " Downloading ale_py-0.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m75.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pillow in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (7.1.2)\n", "Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.9.1)\n", "Requirement already satisfied: opencv-python in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (4.6.0.66)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (5.4.8)\n", "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.8/dist-packages (from ale-py==0.7.4->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (5.10.1)\n", "Requirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.8/dist-packages (from huggingface_sb3->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4)) (6.0)\n", "Collecting huggingface-hub~=0.8\n", " Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.4/182.4 KB\u001b[0m \u001b[31m22.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting cloudpickle\n", " Downloading cloudpickle-2.2.0-py3-none-any.whl (25 kB)\n", "Requirement already satisfied: wasabi in /usr/local/lib/python3.8/dist-packages (from huggingface_sb3->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4)) (0.10.1)\n", "Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (7.1.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.25.1)\n", "Collecting AutoROM.accept-rom-license\n", " Downloading AutoROM.accept-rom-license-0.5.0.tar.gz (10 kB)\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub~=0.8->huggingface_sb3->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4)) (4.4.0)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub~=0.8->huggingface_sb3->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4)) (21.3)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from huggingface-hub~=0.8->huggingface_sb3->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4)) (3.8.2)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata~=4.13->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.11.0)\n", "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (57.4.0)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.0.1)\n", "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.8.1)\n", "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.4.6)\n", "Requirement already satisfied: protobuf<3.20,>=3.9.2 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.19.6)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.4.1)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.15.0)\n", "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.3.0)\n", "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.51.1)\n", "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.6.1)\n", "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.8/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.38.4)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.4.4)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.0.9)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.8.2)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.11.0)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2022.7)\n", "Requirement already satisfied: pygments<3.0.0,>=2.6.0 in /usr/local/lib/python3.8/dist-packages (from rich->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.6.1)\n", "Collecting commonmark<0.10.0,>=0.9.0\n", " Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.1/51.1 KB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.8/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (4.9)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (5.2.0)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.8/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.15.0)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.8/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.2.8)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.8/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.3.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (1.24.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2.10)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (4.0.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (2022.12.7)\n", "Collecting libtorrent\n", " Using cached libtorrent-2.0.7-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (8.6 MB)\n", "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.8/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (0.4.8)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.9.1->stable-baselines3[extra]->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1)) (3.2.2)\n", "Building wheels for collected packages: gym, box2d-kengz, AutoROM.accept-rom-license\n", " Building wheel for gym (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616823 sha256=778c35c52b1d64ae226db61a0d518e1c3ef6a26a960639903fcbe2e0c73e71a9\n", " Stored in directory: /root/.cache/pip/wheels/27/6d/b3/a3a6e10704795c9b9000f1ab2dc480dfe7bed42f5972806e73\n", " Building wheel for box2d-kengz (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for box2d-kengz: filename=Box2D_kengz-2.3.3-cp38-cp38-linux_x86_64.whl size=2054123 sha256=a4094d8c43749b14b68aeff2eb2eef90383d1f2f197c08fab47128d6b9e07380\n", " Stored in directory: /root/.cache/pip/wheels/87/3a/ed/260cc09ed176c5b06aed67364b2387a3a62e7351396a979555\n", " Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.5.0-py3-none-any.whl size=440868 sha256=cb6113ecf8f9713f6331e1a2757df1b219447ebcfd6b36f184b047ab79f47ac4\n", " Stored in directory: /root/.cache/pip/wheels/bf/c9/25/578470ae932b494c313dc22e6c57afff192140fb3cd5acf185\n", "Successfully built gym box2d-kengz AutoROM.accept-rom-license\n", "Installing collected packages: pyglet, libtorrent, commonmark, box2d-kengz, box2d, rich, importlib-metadata, cloudpickle, huggingface-hub, gym, AutoROM.accept-rom-license, autorom, ale-py, stable-baselines3, huggingface_sb3\n", " Attempting uninstall: importlib-metadata\n", " Found existing installation: importlib-metadata 5.2.0\n", " Uninstalling importlib-metadata-5.2.0:\n", " Successfully uninstalled importlib-metadata-5.2.0\n", " Attempting uninstall: cloudpickle\n", " Found existing installation: cloudpickle 1.5.0\n", " Uninstalling cloudpickle-1.5.0:\n", " Successfully uninstalled cloudpickle-1.5.0\n", " Attempting uninstall: gym\n", " Found existing installation: gym 0.25.2\n", " Uninstalling gym-0.25.2:\n", " Successfully uninstalled gym-0.25.2\n", "Successfully installed AutoROM.accept-rom-license-0.5.0 ale-py-0.7.4 autorom-0.4.2 box2d-2.3.10 box2d-kengz-2.3.3 cloudpickle-2.2.0 commonmark-0.9.1 gym-0.21.0 huggingface-hub-0.11.1 huggingface_sb3-2.2.4 importlib-metadata-4.13.0 libtorrent-2.0.7 pyglet-1.5.1 rich-13.0.0 stable-baselines3-1.6.2\n", "Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n", "Ign:2 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", "Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", "Hit:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", "Get:5 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ Packages [102 kB]\n", "Hit:6 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n", "Get:8 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n", "Hit:9 http://archive.ubuntu.com/ubuntu bionic InRelease\n", "Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", "Get:11 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n", "Hit:12 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n", "Get:13 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease [21.3 kB]\n", "Get:14 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [1,568 kB]\n", "Get:15 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [83.3 kB]\n", "Get:16 http://archive.ubuntu.com/ubuntu bionic-updates/multiverse amd64 Packages [30.9 kB]\n", "Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [3,549 kB]\n", "Get:18 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic/main amd64 Packages [43.2 kB]\n", "Get:19 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [3,102 kB]\n", "Get:20 http://security.ubuntu.com/ubuntu bionic-security/restricted amd64 Packages [1,315 kB]\n", "Get:21 http://archive.ubuntu.com/ubuntu bionic-updates/restricted amd64 Packages [1,389 kB]\n", "Get:22 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [2,348 kB]\n", "Fetched 13.7 MB in 3s (4,566 kB/s)\n", "Reading package lists... Done\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following package was automatically installed and is no longer required:\n", " libnvidia-common-460\n", "Use 'apt autoremove' to remove it.\n", "The following additional packages will be installed:\n", " freeglut3\n", "Suggested packages:\n", " libgle3\n", "The following NEW packages will be installed:\n", " freeglut3 python-opengl\n", "0 upgraded, 2 newly installed, 0 to remove and 27 not upgraded.\n", "Need to get 570 kB of archives.\n", "After this operation, 5,733 kB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 freeglut3 amd64 2.8.1-3 [73.6 kB]\n", "Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-opengl all 3.1.0+dfsg-1 [496 kB]\n", "Fetched 570 kB in 1s (993 kB/s)\n", "Selecting previously unselected package freeglut3:amd64.\n", "(Reading database ... 124807 files and directories currently installed.)\n", "Preparing to unpack .../freeglut3_2.8.1-3_amd64.deb ...\n", "Unpacking freeglut3:amd64 (2.8.1-3) ...\n", "Selecting previously unselected package python-opengl.\n", "Preparing to unpack .../python-opengl_3.1.0+dfsg-1_all.deb ...\n", "Unpacking python-opengl (3.1.0+dfsg-1) ...\n", "Setting up freeglut3:amd64 (2.8.1-3) ...\n", "Setting up python-opengl (3.1.0+dfsg-1) ...\n", "Processing triggers for libc-bin (2.27-3ubuntu1.6) ...\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "ffmpeg is already the newest version (7:3.4.11-0ubuntu0.1).\n", "The following package was automatically installed and is no longer required:\n", " libnvidia-common-460\n", "Use 'apt autoremove' to remove it.\n", "0 upgraded, 0 newly installed, 0 to remove and 27 not upgraded.\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following package was automatically installed and is no longer required:\n", " libnvidia-common-460\n", "Use 'apt autoremove' to remove it.\n", "The following NEW packages will be installed:\n", " xvfb\n", "0 upgraded, 1 newly installed, 0 to remove and 27 not upgraded.\n", "Need to get 785 kB of archives.\n", "After this operation, 2,271 kB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 xvfb amd64 2:1.19.6-1ubuntu4.13 [785 kB]\n", "Fetched 785 kB in 1s (1,446 kB/s)\n", "Selecting previously unselected package xvfb.\n", "(Reading database ... 127167 files and directories currently installed.)\n", "Preparing to unpack .../xvfb_2%3a1.19.6-1ubuntu4.13_amd64.deb ...\n", "Unpacking xvfb (2:1.19.6-1ubuntu4.13) ...\n", "Setting up xvfb (2:1.19.6-1ubuntu4.13) ...\n", "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting pyvirtualdisplay\n", " Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)\n", "Installing collected packages: pyvirtualdisplay\n", "Successfully installed pyvirtualdisplay-3.0\n" ] } ], "source": [ "#COURSE: https://huggingface.co/deep-rl-course/unit1/hands-on?fw=pt\n", "#SB3: https://stable-baselines3.readthedocs.io/en/master/\n", "\n", "# The first step is to install the dependencies, we’ll install multiple ones.\n", "\n", "# gym[box2D]: Contains the LunarLander-v2 environment 🌛 (we use gym==0.21)\n", "!apt install swig cmake\n", "# stable-baselines3[extra]: The deep reinforcement learning library.\n", "# huggingface_sb3: Additional code for Stable-baselines3 to load and upload models from the Hugging Face 🤗 Hub.\n", "!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt\n", "\n", "# During the notebook, we’ll need to generate a replay video. To do so, with colab, we need to have a virtual screen to be able to render the environment (and thus record the frames).\n", "# Hence the following cell will install virtual screen libraries and create and run a virtual screen 🖥\n", "!sudo apt-get update\n", "!apt install python-opengl\n", "!apt install ffmpeg\n", "!apt install xvfb\n", "!pip3 install pyvirtualdisplay\n", "\n", "# To make sure the new installed libraries are used, sometimes it’s required to restart the notebook runtime. \n", "# The next cell will force the runtime to crash, so you’ll need to connect again and run the code starting from here.\n", "# Thanks for this trick, we will be able to run our virtual screen.\n", "import os\n", "os.kill(os.getpid(), 9)" ] }, { "cell_type": "markdown", "source": [ "#101 RL" ], "metadata": { "id": "DvV1yXbg3dHd" } }, { "cell_type": "code", "source": [ "# Virtual display: \n", "from pyvirtualdisplay import Display\n", "\n", "virtual_display = Display(visible=0, size=(1400, 900))\n", "virtual_display.start()\n", "\n", "#gym, training enviroment\n", "import gym\n", "\n", "#hf api to commit and upload in hub\n", "from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub\n", "from huggingface_hub import (\n", " notebook_login,\n", ") # To log to our Hugging Face account to be able to upload models to the Hub.\n", "\n", "#SB3 imports\n", "from stable_baselines3 import PPO,DQN\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env" ], "metadata": { "id": "rSW85SrGzIst" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# We create our environment with gym.make(\"\")\n", "env = gym.make(\"LunarLander-v2\")\n", "env.reset()\n", "print(\"_____OBSERVATION SPACE_____ \\n\")\n", "print(\"Observation Space Shape\", env.observation_space.shape)\n", "print(\"Sample observation\", env.observation_space.sample()) # Get a random observation" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Zzhpmu4R1-0m", "outputId": "d9b45523-b57c-4f78-da5e-98d853dd5936" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "_____OBSERVATION SPACE_____ \n", "\n", "Observation Space Shape (8,)\n", "Sample observation [-1.1949229 1.0401516 -1.5713538 0.6417865 -0.99068826 0.9180964\n", " 0.6063702 -0.7064557 ]\n" ] } ] }, { "cell_type": "markdown", "source": [ "1. Horizontal pad coordinate (x)\n", "2. Vertical pad coordinate (y)\n", "3. Horizontal speed (x)\n", "4. Vertical speed (y)\n", "5. Angle\n", "6. Angular speed\n", "7. If the left leg has contact point touched the land\n", "8. If the right leg has contact point touched the land\n", "\n", "The action space (the set of possible actions the agent can take) is discrete with 4 actions available 🎮:\n", "\n", "1. Do nothing,\n", "2. Fire left orientation engine,\n", "3. Fire the main engine,\n", "4. Fire right orientation engine." ], "metadata": { "id": "wrUpJRA03Vun" } }, { "cell_type": "markdown", "source": [ "## Example in gym enviroment" ], "metadata": { "id": "o7GDXD3h5ONv" } }, { "cell_type": "code", "source": [ "import gym\n", "\n", "# First, we create our environment called LunarLander-v2\n", "env = gym.make(\"LunarLander-v2\")\n", "\n", "# Then we reset this environment\n", "observation = env.reset()\n", "\n", "for _ in range(20):\n", " # Take a random action\n", " action = env.action_space.sample()\n", " print(\"Action taken:\", action)\n", "\n", " # Do this action in the environment and get\n", " # next_state, reward, done and info\n", " observation, reward, done, info = env.step(action)\n", " print('Observation Space: ', observation)\n", " print('Reward: ', reward)\n", "\n", " # If the game is done (in our case we land, crashed or timeout)\n", " if done:\n", " # Reset the environment\n", " print(\"Environment is reset\")\n", " observation = env.reset()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5BkQTdj56Xnx", "outputId": "d6b4b271-31b0-4b0b-a5dc-198bd7cde8b3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Action taken: 3\n", "Observation Space: [-0.01036568 1.4191318 -0.5172001 0.16945073 0.00962685 0.07120871\n", " 0. 0. ]\n", "Reward: 1.269672058751156\n", "Action taken: 2\n", "Observation Space: [-0.01563349 1.4233158 -0.52976215 0.18593512 0.01258225 0.05911319\n", " 0. 0. ]\n", "Reward: -2.738042432763211\n", "Action taken: 2\n", "Observation Space: [-0.02082663 1.4282496 -0.52270544 0.21925125 0.01595003 0.06736136\n", " 0. 0. ]\n", "Reward: -1.6749531743428634\n", "Action taken: 2\n", "Observation Space: [-0.0260541 1.4340469 -0.52605623 0.2576173 0.01922631 0.06553184\n", " 0. 0. ]\n", "Reward: -3.108104401038008\n", "Action taken: 2\n", "Observation Space: [-0.03140211 1.4405689 -0.53762066 0.28983435 0.0220068 0.05561444\n", " 0. 0. ]\n", "Reward: -3.742918376601284\n", "Action taken: 0\n", "Observation Space: [-0.03675032 1.4464911 -0.5376283 0.2631606 0.0247872 0.05561341\n", " 0. 0. ]\n", "Reward: 0.3363140354737766\n", "Action taken: 1\n", "Observation Space: [-0.04218302 1.4518094 -0.54822737 0.23627064 0.0296919 0.09810287\n", " 0. 0. ]\n", "Reward: -0.9062384705031536\n", "Action taken: 2\n", "Observation Space: [-0.04748182 1.4573607 -0.53555346 0.24660602 0.03531078 0.11238781\n", " 0. 0. ]\n", "Reward: -0.6960898534258491\n", "Action taken: 2\n", "Observation Space: [-0.05264034 1.4628719 -0.52226806 0.24477519 0.04165727 0.12694128\n", " 0. 0. ]\n", "Reward: -0.2211075106117903\n", "Action taken: 2\n", "Observation Space: [-0.05776501 1.4683405 -0.5191156 0.24284819 0.04823701 0.13160713\n", " 0. 0. ]\n", "Reward: -1.1565209748743712\n", "Action taken: 3\n", "Observation Space: [-0.06282406 1.4732045 -0.51088494 0.21600994 0.05316728 0.09861431\n", " 0. 0. ]\n", "Reward: 0.8139199113958295\n", "Action taken: 0\n", "Observation Space: [-0.0678833 1.4774688 -0.510898 0.1893415 0.05809759 0.0986151\n", " 0. 0. ]\n", "Reward: 0.04050476469234354\n", "Action taken: 3\n", "Observation Space: [-0.07287674 1.481145 -0.50263083 0.16324933 0.06135691 0.06519198\n", " 0. 0. ]\n", "Reward: 0.8909257167214559\n", "Action taken: 3\n", "Observation Space: [-0.07778721 1.4842278 -0.4922166 0.13696656 0.06252229 0.02330985\n", " 0. 0. ]\n", "Reward: 1.2766033730314927\n", "Action taken: 2\n", "Observation Space: [-0.0827959 1.4875321 -0.5017478 0.14681949 0.06338704 0.01729673\n", " 0. 0. ]\n", "Reward: -1.9304261530586075\n", "Action taken: 1\n", "Observation Space: [-0.08788119 1.4902319 -0.51135445 0.11986744 0.06617891 0.05584253\n", " 0. 0. ]\n", "Reward: -0.8506436435645515\n", "Action taken: 0\n", "Observation Space: [-0.09296665 1.4923317 -0.5113637 0.09320094 0.06896942 0.05581524\n", " 0. 0. ]\n", "Reward: 0.023374666425382884\n", "Action taken: 0\n", "Observation Space: [-0.09805222 1.4938316 -0.5113714 0.06653143 0.07175992 0.05581499\n", " 0. 0. ]\n", "Reward: -0.05054930086708964\n", "Action taken: 2\n", "Observation Space: [-0.10325174 1.4959832 -0.52252233 0.09549902 0.07429763 0.05075917\n", " 0. 0. ]\n", "Reward: -2.3530189327438675\n", "Action taken: 0\n", "Observation Space: [-0.10845137 1.4975345 -0.522528 0.06881952 0.07683524 0.05075677\n", " 0. 0. ]\n", "Reward: -0.031459577542790385\n" ] } ] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "NCe8YvLW-cj-" } }, { "cell_type": "markdown", "source": [ "## Tutorial in SB3 - CartPole" ], "metadata": { "id": "blma7gTL-hYZ" } }, { "cell_type": "code", "source": [ "#https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/stable_baselines_getting_started.ipynb\n", "#IMPORTS\n", "import gym\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.evaluation import evaluate_policy" ], "metadata": { "id": "IytEpH50Awzd" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Evaluate untrained model\n", "#create gym enviroment baased on repo cartpole\n", "env = gym.make('CartPole-v1')\n", "\n", "#create model from scratch\n", "# doc: https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters\n", "model = PPO('MlpPolicy',env,verbose=1)\n", "\n", "#Use a separeted enviroment for evaluation\n", "env_eval = gym.make('CartPole-v1')\n", "\n", "#random angent, without training\n", "mean_reward, std_reward = evaluate_policy(model,env_eval,n_eval_episodes=100)\n", "\n", "#print results\n", "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QqjbDDfq9cOG", "outputId": "80965138-3590-418b-c7f9-f505914a1545" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.8/dist-packages/stable_baselines3/common/evaluation.py:65: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "mean_reward:36.41 +/- 13.44\n" ] } ] }, { "cell_type": "code", "source": [ "#@title train agent and evaluate it\n", "model.learn(total_timesteps=10000);\n", "\n", "#evaluate the trained agent\n", "mean_rwd, std_rwd = evaluate_policy(model,env_eval,n_eval_episodes=100)\n", "print(f'reward: {mean_rwd:.2f} +/- {std_rwd:.2f}')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "P30MgPaWBJIl", "outputId": "c9d2eb4f-bb11-4182-8245-957642408db2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 23.8 |\n", "| ep_rew_mean | 23.8 |\n", "| time/ | |\n", "| fps | 939 |\n", "| iterations | 1 |\n", "| time_elapsed | 2 |\n", "| total_timesteps | 2048 |\n", "---------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 26.2 |\n", "| ep_rew_mean | 26.2 |\n", "| time/ | |\n", "| fps | 680 |\n", "| iterations | 2 |\n", "| time_elapsed | 6 |\n", "| total_timesteps | 4096 |\n", "| train/ | |\n", "| approx_kl | 0.0073210318 |\n", "| clip_fraction | 0.0722 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.687 |\n", "| explained_variance | 0.0085 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.15 |\n", "| n_updates | 10 |\n", "| policy_gradient_loss | -0.00975 |\n", "| value_loss | 57 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 32.5 |\n", "| ep_rew_mean | 32.5 |\n", "| time/ | |\n", "| fps | 637 |\n", "| iterations | 3 |\n", "| time_elapsed | 9 |\n", "| total_timesteps | 6144 |\n", "| train/ | |\n", "| approx_kl | 0.011862973 |\n", "| clip_fraction | 0.0742 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.672 |\n", "| explained_variance | 0.066 |\n", "| learning_rate | 0.0003 |\n", "| loss | 16.6 |\n", "| n_updates | 20 |\n", "| policy_gradient_loss | -0.0175 |\n", "| value_loss | 32.6 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 45.6 |\n", "| ep_rew_mean | 45.6 |\n", "| time/ | |\n", "| fps | 619 |\n", "| iterations | 4 |\n", "| time_elapsed | 13 |\n", "| total_timesteps | 8192 |\n", "| train/ | |\n", "| approx_kl | 0.011331993 |\n", "| clip_fraction | 0.13 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.635 |\n", "| explained_variance | 0.342 |\n", "| learning_rate | 0.0003 |\n", "| loss | 19.9 |\n", "| n_updates | 30 |\n", "| policy_gradient_loss | -0.024 |\n", "| value_loss | 46.8 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 56 |\n", "| ep_rew_mean | 56 |\n", "| time/ | |\n", "| fps | 608 |\n", "| iterations | 5 |\n", "| time_elapsed | 16 |\n", "| total_timesteps | 10240 |\n", "| train/ | |\n", "| approx_kl | 0.0070447056 |\n", "| clip_fraction | 0.0625 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.608 |\n", "| explained_variance | 0.192 |\n", "| learning_rate | 0.0003 |\n", "| loss | 22.3 |\n", "| n_updates | 40 |\n", "| policy_gradient_loss | -0.0137 |\n", "| value_loss | 62.8 |\n", "------------------------------------------\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.8/dist-packages/stable_baselines3/common/evaluation.py:65: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "reward: 396.51 +/- 142.18224185882005\n" ] } ] }, { "cell_type": "code", "source": [ "#@title Record episode\n", "# Set up fake display; otherwise rendering will fail\n", "import os\n", "os.system(\"Xvfb :1 -screen 0 1024x768x24 &\")\n", "os.environ['DISPLAY'] = ':1'" ], "metadata": { "id": "RJ1EDH1gCeVy" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv\n", "\n", "def record_video(env_id, model, video_length=500, prefix='', video_folder='videos/'):\n", " \"\"\"\n", " :param env_id: (str)\n", " :param model: (RL model)\n", " :param video_length: (int)\n", " :param prefix: (str)\n", " :param video_folder: (str)\n", " \"\"\"\n", " eval_env = DummyVecEnv([lambda: gym.make('CartPole-v1')])\n", " # Start the video at step=0 and record 500 steps\n", " eval_env = VecVideoRecorder(eval_env, video_folder=video_folder,\n", " record_video_trigger=lambda step: step == 0, video_length=video_length,\n", " name_prefix=prefix)\n", "\n", " obs = eval_env.reset()\n", " for _ in range(video_length):\n", " action, _ = model.predict(obs)\n", " obs, _, _, _ = eval_env.step(action)\n", "\n", " # Close the video recorder\n", " eval_env.close()" ], "metadata": { "id": "ZuNIRAC7DGxl" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "record_video('CartPole-v1', model, video_length=500, prefix='ppo-cartpole')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tZLC88oiDHYj", "outputId": "2898125e-4c38-453b-f989-7ec60f3cc781" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Saving video to /content/videos/ppo-cartpole-step-0-to-step-500.mp4\n" ] } ] }, { "cell_type": "code", "source": [ "import base64\n", "from pathlib import Path\n", "\n", "from IPython import display as ipythondisplay\n", "\n", "def show_videos(video_path='', prefix=''):\n", " \"\"\"\n", " Taken from https://github.com/eleurent/highway-env\n", "\n", " :param video_path: (str) Path to the folder containing videos\n", " :param prefix: (str) Filter the video, showing only the only starting with this prefix\n", " \"\"\"\n", " html = []\n", " for mp4 in Path(video_path).glob(\"{}*.mp4\".format(prefix)):\n", " video_b64 = base64.b64encode(mp4.read_bytes())\n", " html.append(''''''.format(mp4, video_b64.decode('ascii')))\n", " ipythondisplay.display(ipythondisplay.HTML(data=\"
\".join(html)))" ], "metadata": { "id": "T4vOde0WC_z9" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "show_videos('videos', prefix='ppo')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 421 }, "id": "dadNQWrwDlkO", "outputId": "6d1364dd-4500-4f68-f548-730617e6c8ef" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "## Tutorial in SB3 - Lunar Lander" ], "metadata": { "id": "43a3FyJJGE7B" } }, { "cell_type": "code", "source": [ "#https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#id4\n", "import gym\n", "from stable_baselines3 import DQN\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env #used to build vectorized enviroment\n", "\n", "#create enviroment - lets use vec_env\n", "env = gym.make('LunarLander-v2')\n", "\n", "#create model\n", "model = DQN('MlpPolicy',env,verbose=0).learn(total_timesteps=int(1e4),progress_bar=True)\n", "\n", "#save the agent\n", "# model.save('dqn_lunar')\n", "\n", "# del model # delete trained model to demonstrate loading\n", "\n", "# # Load the trained agent\n", "# # NOTE: if you have loading issue, you can pass `print_system_info=True`\n", "# # to compare the system on which the model was trained vs the current one\n", "# # model = DQN.load(\"dqn_lunar\", env=env, print_system_info=True)\n", "# model = DQN.load(\"dqn_lunar\", env=env)\n", "\n", "# We create a vectorized environment \n", "# (method for stacking multiple independent environments into a single environment) \n", "# of 16 environments, this way, we’ll have more diverse experiences during the training.\n", "env_eval = make_vec_env(\"LunarLander-v2\", n_envs=5)\n", "\n", "#evaluate model\n", "mean_rwd,std_rwd = evaluate_policy(model,model.get_env(),n_eval_episodes=10)\n", "\n", "#print results\n", "print(f\"mean_reward:{mean_rwd:.2f} +/- {std_rwd:.2f}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51, "referenced_widgets": [ "17e75f6149c14f8d9619f319bf9ee553", "cbb6deb9baf742e8a6765d0891f193c7" ] }, "collapsed": true, "id": "GlGM6JRSGNvR", "outputId": "f0e9f914-edff-47b3-a406-62c858192523" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Output()" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "17e75f6149c14f8d9619f319bf9ee553" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [], "text/html": [ "
\n"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "\n"
            ],
            "text/html": [
              "
\n",
              "
\n" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "mean_reward:-147.89 +/- 27.79\n" ] } ] }, { "cell_type": "code", "source": [ "# Enjoy trained agent\n", "vec_env = model.get_env()\n", "obs = vec_env.reset()\n", "for i in range(1000):\n", " action, _states = model.predict(obs, deterministic=True)\n", " obs, rewards, dones, info = vec_env.step(action)\n", " vec_env.render()" ], "metadata": { "id": "rpqfDLuFJnnO" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv\n", "\n", "def record_video(env_id, model, video_length=500, prefix='', video_folder='videos/'):\n", " \"\"\"\n", " :param env_id: (str)\n", " :param model: (RL model)\n", " :param video_length: (int)\n", " :param prefix: (str)\n", " :param video_folder: (str)\n", " \"\"\"\n", " eval_env = model.get_env()\n", " # Start the video at step=0 and record 500 steps\n", " eval_env = VecVideoRecorder(eval_env, video_folder=video_folder,\n", " record_video_trigger=lambda step: step == 0, video_length=video_length,\n", " name_prefix=prefix)\n", "\n", " obs = eval_env.reset()\n", " for _ in range(video_length):\n", " action, _ = model.predict(obs)\n", " obs, _, _, _ = eval_env.step(action)\n", "\n", " # Close the video recorder\n", " eval_env.close()\n", "\n", "import base64\n", "from pathlib import Path\n", "\n", "from IPython import display as ipythondisplay\n", "\n", "def show_videos(video_path='', prefix=''):\n", " \"\"\"\n", " Taken from https://github.com/eleurent/highway-env\n", "\n", " :param video_path: (str) Path to the folder containing videos\n", " :param prefix: (str) Filter the video, showing only the only starting with this prefix\n", " \"\"\"\n", " html = []\n", " for mp4 in Path(video_path).glob(\"{}*.mp4\".format(prefix)):\n", " video_b64 = base64.b64encode(mp4.read_bytes())\n", " html.append(''''''.format(mp4, video_b64.decode('ascii')))\n", " ipythondisplay.display(ipythondisplay.HTML(data=\"
\".join(html)))" ], "metadata": { "id": "jVe1E_fBMQ4P" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "record_video('LunarLander-v2', model, video_length=500, prefix='dqn-ll')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EwOzWQH4MXGO", "outputId": "5de308ad-901e-4228-c192-964a4aec3310" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Saving video to /content/videos/dqn-ll-step-0-to-step-500.mp4\n" ] } ] }, { "cell_type": "code", "source": [ "show_videos('videos', prefix='dqn')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 421 }, "id": "-m7ndJM7MZA3", "outputId": "a442132a-0231-416a-f2a9-d1b90192993c" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "#MODEL" ], "metadata": { "id": "FdPwV7IJ3kJ-" } }, { "cell_type": "code", "source": [ "#@title imports\n", "#HF tutorial\n", "#https://huggingface.co/deep-rl-course/unit1/hands-on\n", "\n", "#imports\n", "import gym\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env\n", "from stable_baselines3.common.vec_env import DummyVecEnv\n", "from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub\n", "from huggingface_hub import (\n", " notebook_login,\n", ")\n", "\n", "# Virtual display\n", "from pyvirtualdisplay import Display\n", "\n", "virtual_display = Display(visible=0, size=(1400, 900));\n", "virtual_display.start();\n", "\n", "#create seed\n", "seed=11" ], "metadata": { "collapsed": true, "id": "X4Gy8azCDsPX" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "%%time\n", "#@title create and train the model\n", "#create enviroment\n", "env = gym.make('LunarLander-v2')\n", "\n", "#reset enviroment ot initial state\n", "env.reset()\n", "\n", "#create vectorized enviroment\n", "env = make_vec_env(\"LunarLander-v2\", n_envs=16)\n", "\n", "#instanciate the agent\n", "#params: https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters\n", "model = PPO('MlpPolicy',env,verbose=1,\n", " learning_rate=0.0003,\n", " # n_steps=2048,\n", " n_steps=1024,\n", " batch_size=64,\n", " # n_epochs=10,\n", " n_epochs=4,\n", " # gamma=0.99,\n", " gamma=0.999,\n", " # gae_lambda=0.95,\n", " gae_lambda=0.98,\n", " clip_range=0.2,\n", " clip_range_vf=None,\n", " normalize_advantage=True,\n", " # ent_coef=0.0,\n", " ent_coef=0.01\n", " # vf_coef=0.5,\n", " # max_grad_norm=0.5,\n", " # use_sde=False,\n", " # sde_sample_freq=-1,\n", " # target_kl=None,\n", " # tensorboard_log=None,\n", " # policy_kwargs=None,\n", " # verbose=0,\n", " # seed=seed,\n", " # device='auto',\n", " # _init_setup_model=True \n", " )\n", "\n", "#train model\n", "model.learn(total_timesteps=int(1e6))\n", "\n", "# Save the model\n", "# model_name = \"ppo_notrain-LunarLander-v2\"\n", "# model.save(model_name)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nb7KJDAtFTqx", "outputId": "bb8fb90a-54d9-4ca0-8da9-71d0192e89c8" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Using cuda device\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 96.5 |\n", "| ep_rew_mean | -185 |\n", "| time/ | |\n", "| fps | 1563 |\n", "| iterations | 1 |\n", "| time_elapsed | 10 |\n", "| total_timesteps | 16384 |\n", "---------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 89.2 |\n", "| ep_rew_mean | -137 |\n", "| time/ | |\n", "| fps | 1667 |\n", "| iterations | 2 |\n", "| time_elapsed | 19 |\n", "| total_timesteps | 32768 |\n", "| train/ | |\n", "| approx_kl | 0.0072074104 |\n", "| clip_fraction | 0.0691 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.38 |\n", "| explained_variance | 0.0007 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.33e+03 |\n", "| n_updates | 4 |\n", "| policy_gradient_loss | -0.00629 |\n", "| value_loss | 4.99e+03 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 104 |\n", "| ep_rew_mean | -125 |\n", "| time/ | |\n", "| fps | 1702 |\n", "| iterations | 3 |\n", "| time_elapsed | 28 |\n", "| total_timesteps | 49152 |\n", "| train/ | |\n", "| approx_kl | 0.006704938 |\n", "| clip_fraction | 0.0408 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.37 |\n", "| explained_variance | 0.00203 |\n", "| learning_rate | 0.0003 |\n", "| loss | 772 |\n", "| n_updates | 8 |\n", "| policy_gradient_loss | -0.00598 |\n", "| value_loss | 1.82e+03 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 97 |\n", "| ep_rew_mean | -128 |\n", "| time/ | |\n", "| fps | 1635 |\n", "| iterations | 4 |\n", "| time_elapsed | 40 |\n", "| total_timesteps | 65536 |\n", "| train/ | |\n", "| approx_kl | 0.0066061467 |\n", "| clip_fraction | 0.0476 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.36 |\n", "| explained_variance | 0.00563 |\n", "| learning_rate | 0.0003 |\n", "| loss | 486 |\n", "| n_updates | 12 |\n", "| policy_gradient_loss | -0.00385 |\n", "| value_loss | 1.1e+03 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 96.8 |\n", "| ep_rew_mean | -104 |\n", "| time/ | |\n", "| fps | 1583 |\n", "| iterations | 5 |\n", "| time_elapsed | 51 |\n", "| total_timesteps | 81920 |\n", "| train/ | |\n", "| approx_kl | 0.008154811 |\n", "| clip_fraction | 0.0765 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.35 |\n", "| explained_variance | -0.00918 |\n", "| learning_rate | 0.0003 |\n", "| loss | 477 |\n", "| n_updates | 16 |\n", "| policy_gradient_loss | -0.00348 |\n", "| value_loss | 995 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 103 |\n", "| ep_rew_mean | -100 |\n", "| time/ | |\n", "| fps | 1631 |\n", "| iterations | 6 |\n", "| time_elapsed | 60 |\n", "| total_timesteps | 98304 |\n", "| train/ | |\n", "| approx_kl | 0.0100449305 |\n", "| clip_fraction | 0.0683 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.35 |\n", "| explained_variance | 0.00492 |\n", "| learning_rate | 0.0003 |\n", "| loss | 228 |\n", "| n_updates | 20 |\n", "| policy_gradient_loss | -0.00505 |\n", "| value_loss | 721 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 109 |\n", "| ep_rew_mean | -78.2 |\n", "| time/ | |\n", "| fps | 1607 |\n", "| iterations | 7 |\n", "| time_elapsed | 71 |\n", "| total_timesteps | 114688 |\n", "| train/ | |\n", "| approx_kl | 0.010880301 |\n", "| clip_fraction | 0.101 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.32 |\n", "| explained_variance | 0.000472 |\n", "| learning_rate | 0.0003 |\n", "| loss | 420 |\n", "| n_updates | 24 |\n", "| policy_gradient_loss | -0.00438 |\n", "| value_loss | 505 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 112 |\n", "| ep_rew_mean | -51.3 |\n", "| time/ | |\n", "| fps | 1603 |\n", "| iterations | 8 |\n", "| time_elapsed | 81 |\n", "| total_timesteps | 131072 |\n", "| train/ | |\n", "| approx_kl | 0.006525865 |\n", "| clip_fraction | 0.0393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.29 |\n", "| explained_variance | -0.0186 |\n", "| learning_rate | 0.0003 |\n", "| loss | 306 |\n", "| n_updates | 28 |\n", "| policy_gradient_loss | -0.00367 |\n", "| value_loss | 454 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 113 |\n", "| ep_rew_mean | -41.3 |\n", "| time/ | |\n", "| fps | 1619 |\n", "| iterations | 9 |\n", "| time_elapsed | 91 |\n", "| total_timesteps | 147456 |\n", "| train/ | |\n", "| approx_kl | 0.0074994955 |\n", "| clip_fraction | 0.0291 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.24 |\n", "| explained_variance | 1.69e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 146 |\n", "| n_updates | 32 |\n", "| policy_gradient_loss | -0.00373 |\n", "| value_loss | 424 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 135 |\n", "| ep_rew_mean | -29.3 |\n", "| time/ | |\n", "| fps | 1557 |\n", "| iterations | 10 |\n", "| time_elapsed | 105 |\n", "| total_timesteps | 163840 |\n", "| train/ | |\n", "| approx_kl | 0.010937294 |\n", "| clip_fraction | 0.081 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.2 |\n", "| explained_variance | -3.19e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 282 |\n", "| n_updates | 36 |\n", "| policy_gradient_loss | -0.00622 |\n", "| value_loss | 435 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 141 |\n", "| ep_rew_mean | -24.5 |\n", "| time/ | |\n", "| fps | 1532 |\n", "| iterations | 11 |\n", "| time_elapsed | 117 |\n", "| total_timesteps | 180224 |\n", "| train/ | |\n", "| approx_kl | 0.008629812 |\n", "| clip_fraction | 0.0526 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.19 |\n", "| explained_variance | -0.000213 |\n", "| learning_rate | 0.0003 |\n", "| loss | 288 |\n", "| n_updates | 40 |\n", "| policy_gradient_loss | -0.00274 |\n", "| value_loss | 489 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 131 |\n", "| ep_rew_mean | -15.4 |\n", "| time/ | |\n", "| fps | 1540 |\n", "| iterations | 12 |\n", "| time_elapsed | 127 |\n", "| total_timesteps | 196608 |\n", "| train/ | |\n", "| approx_kl | 0.0052077975 |\n", "| clip_fraction | 0.0344 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | -2.15e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 293 |\n", "| n_updates | 44 |\n", "| policy_gradient_loss | -0.00119 |\n", "| value_loss | 577 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 149 |\n", "| ep_rew_mean | -12.5 |\n", "| time/ | |\n", "| fps | 1505 |\n", "| iterations | 13 |\n", "| time_elapsed | 141 |\n", "| total_timesteps | 212992 |\n", "| train/ | |\n", "| approx_kl | 0.006667701 |\n", "| clip_fraction | 0.0438 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | -5.13e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 338 |\n", "| n_updates | 48 |\n", "| policy_gradient_loss | -0.00266 |\n", "| value_loss | 677 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 188 |\n", "| ep_rew_mean | -3.19 |\n", "| time/ | |\n", "| fps | 1437 |\n", "| iterations | 14 |\n", "| time_elapsed | 159 |\n", "| total_timesteps | 229376 |\n", "| train/ | |\n", "| approx_kl | 0.0070217205 |\n", "| clip_fraction | 0.0468 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 1.75e-05 |\n", "| learning_rate | 0.0003 |\n", "| loss | 184 |\n", "| n_updates | 52 |\n", "| policy_gradient_loss | -0.00182 |\n", "| value_loss | 636 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 254 |\n", "| ep_rew_mean | -8.23 |\n", "| time/ | |\n", "| fps | 1367 |\n", "| iterations | 15 |\n", "| time_elapsed | 179 |\n", "| total_timesteps | 245760 |\n", "| train/ | |\n", "| approx_kl | 0.0043239947 |\n", "| clip_fraction | 0.0285 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | 0.00239 |\n", "| learning_rate | 0.0003 |\n", "| loss | 224 |\n", "| n_updates | 56 |\n", "| policy_gradient_loss | -0.00181 |\n", "| value_loss | 579 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 362 |\n", "| ep_rew_mean | -12.9 |\n", "| time/ | |\n", "| fps | 1298 |\n", "| iterations | 16 |\n", "| time_elapsed | 201 |\n", "| total_timesteps | 262144 |\n", "| train/ | |\n", "| approx_kl | 0.0047796033 |\n", "| clip_fraction | 0.0128 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.19 |\n", "| explained_variance | 0.107 |\n", "| learning_rate | 0.0003 |\n", "| loss | 236 |\n", "| n_updates | 60 |\n", "| policy_gradient_loss | -0.00337 |\n", "| value_loss | 513 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 395 |\n", "| ep_rew_mean | -5.92 |\n", "| time/ | |\n", "| fps | 1225 |\n", "| iterations | 17 |\n", "| time_elapsed | 227 |\n", "| total_timesteps | 278528 |\n", "| train/ | |\n", "| approx_kl | 0.0053736507 |\n", "| clip_fraction | 0.0182 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | 0.34 |\n", "| learning_rate | 0.0003 |\n", "| loss | 223 |\n", "| n_updates | 64 |\n", "| policy_gradient_loss | -0.00293 |\n", "| value_loss | 372 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 495 |\n", "| ep_rew_mean | -7.24 |\n", "| time/ | |\n", "| fps | 1163 |\n", "| iterations | 18 |\n", "| time_elapsed | 253 |\n", "| total_timesteps | 294912 |\n", "| train/ | |\n", "| approx_kl | 0.0040366603 |\n", "| clip_fraction | 0.0154 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | 0.572 |\n", "| learning_rate | 0.0003 |\n", "| loss | 82.3 |\n", "| n_updates | 68 |\n", "| policy_gradient_loss | -0.00172 |\n", "| value_loss | 232 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 585 |\n", "| ep_rew_mean | 2.97 |\n", "| time/ | |\n", "| fps | 1124 |\n", "| iterations | 19 |\n", "| time_elapsed | 276 |\n", "| total_timesteps | 311296 |\n", "| train/ | |\n", "| approx_kl | 0.004679693 |\n", "| clip_fraction | 0.0307 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | 0.692 |\n", "| learning_rate | 0.0003 |\n", "| loss | 78.4 |\n", "| n_updates | 72 |\n", "| policy_gradient_loss | -0.00332 |\n", "| value_loss | 213 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 623 |\n", "| ep_rew_mean | 8.01 |\n", "| time/ | |\n", "| fps | 1095 |\n", "| iterations | 20 |\n", "| time_elapsed | 299 |\n", "| total_timesteps | 327680 |\n", "| train/ | |\n", "| approx_kl | 0.0063947802 |\n", "| clip_fraction | 0.0378 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.25 |\n", "| explained_variance | 0.763 |\n", "| learning_rate | 0.0003 |\n", "| loss | 51.9 |\n", "| n_updates | 76 |\n", "| policy_gradient_loss | -0.00195 |\n", "| value_loss | 151 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 595 |\n", "| ep_rew_mean | 3.66 |\n", "| time/ | |\n", "| fps | 1060 |\n", "| iterations | 21 |\n", "| time_elapsed | 324 |\n", "| total_timesteps | 344064 |\n", "| train/ | |\n", "| approx_kl | 0.006050572 |\n", "| clip_fraction | 0.05 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | 0.781 |\n", "| learning_rate | 0.0003 |\n", "| loss | 93.8 |\n", "| n_updates | 80 |\n", "| policy_gradient_loss | -0.00283 |\n", "| value_loss | 208 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 633 |\n", "| ep_rew_mean | 1.84 |\n", "| time/ | |\n", "| fps | 1040 |\n", "| iterations | 22 |\n", "| time_elapsed | 346 |\n", "| total_timesteps | 360448 |\n", "| train/ | |\n", "| approx_kl | 0.0070051556 |\n", "| clip_fraction | 0.0654 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.22 |\n", "| explained_variance | 0.798 |\n", "| learning_rate | 0.0003 |\n", "| loss | 24.7 |\n", "| n_updates | 84 |\n", "| policy_gradient_loss | -0.00282 |\n", "| value_loss | 236 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 653 |\n", "| ep_rew_mean | 14.3 |\n", "| time/ | |\n", "| fps | 1017 |\n", "| iterations | 23 |\n", "| time_elapsed | 370 |\n", "| total_timesteps | 376832 |\n", "| train/ | |\n", "| approx_kl | 0.006572186 |\n", "| clip_fraction | 0.0339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.22 |\n", "| explained_variance | 0.735 |\n", "| learning_rate | 0.0003 |\n", "| loss | 40.6 |\n", "| n_updates | 88 |\n", "| policy_gradient_loss | -0.00144 |\n", "| value_loss | 288 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 697 |\n", "| ep_rew_mean | 22.7 |\n", "| time/ | |\n", "| fps | 991 |\n", "| iterations | 24 |\n", "| time_elapsed | 396 |\n", "| total_timesteps | 393216 |\n", "| train/ | |\n", "| approx_kl | 0.0060662907 |\n", "| clip_fraction | 0.0443 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.16 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 111 |\n", "| n_updates | 92 |\n", "| policy_gradient_loss | -0.00145 |\n", "| value_loss | 97 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 771 |\n", "| ep_rew_mean | 35.9 |\n", "| time/ | |\n", "| fps | 970 |\n", "| iterations | 25 |\n", "| time_elapsed | 421 |\n", "| total_timesteps | 409600 |\n", "| train/ | |\n", "| approx_kl | 0.005505248 |\n", "| clip_fraction | 0.0356 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.16 |\n", "| explained_variance | 0.833 |\n", "| learning_rate | 0.0003 |\n", "| loss | 105 |\n", "| n_updates | 96 |\n", "| policy_gradient_loss | -0.00241 |\n", "| value_loss | 155 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 44.4 |\n", "| time/ | |\n", "| fps | 953 |\n", "| iterations | 26 |\n", "| time_elapsed | 446 |\n", "| total_timesteps | 425984 |\n", "| train/ | |\n", "| approx_kl | 0.0062015317 |\n", "| clip_fraction | 0.047 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.907 |\n", "| learning_rate | 0.0003 |\n", "| loss | 18.5 |\n", "| n_updates | 100 |\n", "| policy_gradient_loss | -0.00201 |\n", "| value_loss | 73.8 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 779 |\n", "| ep_rew_mean | 57.9 |\n", "| time/ | |\n", "| fps | 937 |\n", "| iterations | 27 |\n", "| time_elapsed | 472 |\n", "| total_timesteps | 442368 |\n", "| train/ | |\n", "| approx_kl | 0.0054453197 |\n", "| clip_fraction | 0.0328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.11 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.56 |\n", "| n_updates | 104 |\n", "| policy_gradient_loss | -0.00224 |\n", "| value_loss | 46.1 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 62.5 |\n", "| time/ | |\n", "| fps | 926 |\n", "| iterations | 28 |\n", "| time_elapsed | 495 |\n", "| total_timesteps | 458752 |\n", "| train/ | |\n", "| approx_kl | 0.005300224 |\n", "| clip_fraction | 0.032 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.857 |\n", "| learning_rate | 0.0003 |\n", "| loss | 97.7 |\n", "| n_updates | 108 |\n", "| policy_gradient_loss | -0.00108 |\n", "| value_loss | 129 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 71.1 |\n", "| time/ | |\n", "| fps | 917 |\n", "| iterations | 29 |\n", "| time_elapsed | 517 |\n", "| total_timesteps | 475136 |\n", "| train/ | |\n", "| approx_kl | 0.005210985 |\n", "| clip_fraction | 0.0342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.13 |\n", "| explained_variance | 0.923 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.5 |\n", "| n_updates | 112 |\n", "| policy_gradient_loss | -0.000584 |\n", "| value_loss | 73.6 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 770 |\n", "| ep_rew_mean | 77 |\n", "| time/ | |\n", "| fps | 909 |\n", "| iterations | 30 |\n", "| time_elapsed | 540 |\n", "| total_timesteps | 491520 |\n", "| train/ | |\n", "| approx_kl | 0.004735007 |\n", "| clip_fraction | 0.0405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 50.4 |\n", "| n_updates | 116 |\n", "| policy_gradient_loss | -0.00151 |\n", "| value_loss | 89.5 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 82.7 |\n", "| time/ | |\n", "| fps | 900 |\n", "| iterations | 31 |\n", "| time_elapsed | 564 |\n", "| total_timesteps | 507904 |\n", "| train/ | |\n", "| approx_kl | 0.0066191936 |\n", "| clip_fraction | 0.0486 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.11 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12.8 |\n", "| n_updates | 120 |\n", "| policy_gradient_loss | -0.00218 |\n", "| value_loss | 63 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 778 |\n", "| ep_rew_mean | 85.4 |\n", "| time/ | |\n", "| fps | 890 |\n", "| iterations | 32 |\n", "| time_elapsed | 588 |\n", "| total_timesteps | 524288 |\n", "| train/ | |\n", "| approx_kl | 0.0065373005 |\n", "| clip_fraction | 0.0398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.13 |\n", "| explained_variance | 0.967 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.5 |\n", "| n_updates | 124 |\n", "| policy_gradient_loss | -0.00064 |\n", "| value_loss | 28.4 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 804 |\n", "| ep_rew_mean | 92.8 |\n", "| time/ | |\n", "| fps | 883 |\n", "| iterations | 33 |\n", "| time_elapsed | 611 |\n", "| total_timesteps | 540672 |\n", "| train/ | |\n", "| approx_kl | 0.0042019626 |\n", "| clip_fraction | 0.0199 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.946 |\n", "| learning_rate | 0.0003 |\n", "| loss | 13.9 |\n", "| n_updates | 128 |\n", "| policy_gradient_loss | -0.000678 |\n", "| value_loss | 50.5 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 849 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 877 |\n", "| iterations | 34 |\n", "| time_elapsed | 634 |\n", "| total_timesteps | 557056 |\n", "| train/ | |\n", "| approx_kl | 0.004625935 |\n", "| clip_fraction | 0.0307 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.19 |\n", "| n_updates | 132 |\n", "| policy_gradient_loss | -0.000516 |\n", "| value_loss | 44.9 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 891 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 872 |\n", "| iterations | 35 |\n", "| time_elapsed | 656 |\n", "| total_timesteps | 573440 |\n", "| train/ | |\n", "| approx_kl | 0.0066321297 |\n", "| clip_fraction | 0.0426 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.33 |\n", "| n_updates | 136 |\n", "| policy_gradient_loss | -0.00137 |\n", "| value_loss | 51.3 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 914 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 869 |\n", "| iterations | 36 |\n", "| time_elapsed | 678 |\n", "| total_timesteps | 589824 |\n", "| train/ | |\n", "| approx_kl | 0.0062832776 |\n", "| clip_fraction | 0.0513 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.987 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.56 |\n", "| n_updates | 140 |\n", "| policy_gradient_loss | -0.00125 |\n", "| value_loss | 11.9 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 921 |\n", "| ep_rew_mean | 118 |\n", "| time/ | |\n", "| fps | 865 |\n", "| iterations | 37 |\n", "| time_elapsed | 700 |\n", "| total_timesteps | 606208 |\n", "| train/ | |\n", "| approx_kl | 0.005479424 |\n", "| clip_fraction | 0.0395 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.985 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.37 |\n", "| n_updates | 144 |\n", "| policy_gradient_loss | -0.00262 |\n", "| value_loss | 15.4 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 923 |\n", "| ep_rew_mean | 117 |\n", "| time/ | |\n", "| fps | 862 |\n", "| iterations | 38 |\n", "| time_elapsed | 722 |\n", "| total_timesteps | 622592 |\n", "| train/ | |\n", "| approx_kl | 0.006192011 |\n", "| clip_fraction | 0.0255 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.962 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.13 |\n", "| n_updates | 148 |\n", "| policy_gradient_loss | -0.000916 |\n", "| value_loss | 39 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 896 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 858 |\n", "| iterations | 39 |\n", "| time_elapsed | 744 |\n", "| total_timesteps | 638976 |\n", "| train/ | |\n", "| approx_kl | 0.0042629438 |\n", "| clip_fraction | 0.0215 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.04 |\n", "| explained_variance | 0.974 |\n", "| learning_rate | 0.0003 |\n", "| loss | 69 |\n", "| n_updates | 152 |\n", "| policy_gradient_loss | -0.000256 |\n", "| value_loss | 32.4 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 881 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 855 |\n", "| iterations | 40 |\n", "| time_elapsed | 766 |\n", "| total_timesteps | 655360 |\n", "| train/ | |\n", "| approx_kl | 0.0060078157 |\n", "| clip_fraction | 0.0397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.997 |\n", "| explained_variance | 0.966 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.63 |\n", "| n_updates | 156 |\n", "| policy_gradient_loss | -0.000987 |\n", "| value_loss | 36.2 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 868 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 851 |\n", "| iterations | 41 |\n", "| time_elapsed | 788 |\n", "| total_timesteps | 671744 |\n", "| train/ | |\n", "| approx_kl | 0.004950188 |\n", "| clip_fraction | 0.0336 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.968 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.24 |\n", "| n_updates | 160 |\n", "| policy_gradient_loss | -0.000195 |\n", "| value_loss | 40.3 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 847 |\n", "| iterations | 42 |\n", "| time_elapsed | 811 |\n", "| total_timesteps | 688128 |\n", "| train/ | |\n", "| approx_kl | 0.0063575464 |\n", "| clip_fraction | 0.0361 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.01 |\n", "| explained_variance | 0.981 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.72 |\n", "| n_updates | 164 |\n", "| policy_gradient_loss | -0.000318 |\n", "| value_loss | 20.4 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 877 |\n", "| ep_rew_mean | 116 |\n", "| time/ | |\n", "| fps | 844 |\n", "| iterations | 43 |\n", "| time_elapsed | 834 |\n", "| total_timesteps | 704512 |\n", "| train/ | |\n", "| approx_kl | 0.0032634623 |\n", "| clip_fraction | 0.0195 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1 |\n", "| explained_variance | 0.97 |\n", "| learning_rate | 0.0003 |\n", "| loss | 26.6 |\n", "| n_updates | 168 |\n", "| policy_gradient_loss | -0.000547 |\n", "| value_loss | 39.4 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 899 |\n", "| ep_rew_mean | 123 |\n", "| time/ | |\n", "| fps | 838 |\n", "| iterations | 44 |\n", "| time_elapsed | 859 |\n", "| total_timesteps | 720896 |\n", "| train/ | |\n", "| approx_kl | 0.0044095377 |\n", "| clip_fraction | 0.032 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.983 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.11 |\n", "| n_updates | 172 |\n", "| policy_gradient_loss | 1.51e-05 |\n", "| value_loss | 20.8 |\n", "------------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 922 |\n", "| ep_rew_mean | 127 |\n", "| time/ | |\n", "| fps | 836 |\n", "| iterations | 45 |\n", "| time_elapsed | 881 |\n", "| total_timesteps | 737280 |\n", "| train/ | |\n", "| approx_kl | 0.00429631 |\n", "| clip_fraction | 0.0429 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1 |\n", "| explained_variance | 0.988 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.04 |\n", "| n_updates | 176 |\n", "| policy_gradient_loss | -0.00107 |\n", "| value_loss | 14.2 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 906 |\n", "| ep_rew_mean | 127 |\n", "| time/ | |\n", "| fps | 835 |\n", "| iterations | 46 |\n", "| time_elapsed | 901 |\n", "| total_timesteps | 753664 |\n", "| train/ | |\n", "| approx_kl | 0.005283635 |\n", "| clip_fraction | 0.06 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.979 |\n", "| explained_variance | 0.989 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.908 |\n", "| n_updates | 180 |\n", "| policy_gradient_loss | -0.000466 |\n", "| value_loss | 12.5 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 919 |\n", "| ep_rew_mean | 132 |\n", "| time/ | |\n", "| fps | 833 |\n", "| iterations | 47 |\n", "| time_elapsed | 923 |\n", "| total_timesteps | 770048 |\n", "| train/ | |\n", "| approx_kl | 0.0018034502 |\n", "| clip_fraction | 0.0142 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.96 |\n", "| explained_variance | 0.975 |\n", "| learning_rate | 0.0003 |\n", "| loss | 47.6 |\n", "| n_updates | 184 |\n", "| policy_gradient_loss | 0.000105 |\n", "| value_loss | 30.8 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 911 |\n", "| ep_rew_mean | 133 |\n", "| time/ | |\n", "| fps | 832 |\n", "| iterations | 48 |\n", "| time_elapsed | 944 |\n", "| total_timesteps | 786432 |\n", "| train/ | |\n", "| approx_kl | 0.003585513 |\n", "| clip_fraction | 0.0295 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.961 |\n", "| explained_variance | 0.982 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.43 |\n", "| n_updates | 188 |\n", "| policy_gradient_loss | 0.00025 |\n", "| value_loss | 22.7 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 925 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 830 |\n", "| iterations | 49 |\n", "| time_elapsed | 966 |\n", "| total_timesteps | 802816 |\n", "| train/ | |\n", "| approx_kl | 0.0033805869 |\n", "| clip_fraction | 0.0384 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.97 |\n", "| explained_variance | 0.985 |\n", "| learning_rate | 0.0003 |\n", "| loss | 13.5 |\n", "| n_updates | 192 |\n", "| policy_gradient_loss | 7.86e-05 |\n", "| value_loss | 17.8 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 925 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 829 |\n", "| iterations | 50 |\n", "| time_elapsed | 987 |\n", "| total_timesteps | 819200 |\n", "| train/ | |\n", "| approx_kl | 0.0064871325 |\n", "| clip_fraction | 0.034 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.958 |\n", "| explained_variance | 0.996 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.95 |\n", "| n_updates | 196 |\n", "| policy_gradient_loss | -0.00154 |\n", "| value_loss | 3.01 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 924 |\n", "| ep_rew_mean | 136 |\n", "| time/ | |\n", "| fps | 828 |\n", "| iterations | 51 |\n", "| time_elapsed | 1007 |\n", "| total_timesteps | 835584 |\n", "| train/ | |\n", "| approx_kl | 0.0034764456 |\n", "| clip_fraction | 0.0318 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.9 |\n", "| explained_variance | 0.989 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.29 |\n", "| n_updates | 200 |\n", "| policy_gradient_loss | -0.000736 |\n", "| value_loss | 12 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 937 |\n", "| ep_rew_mean | 140 |\n", "| time/ | |\n", "| fps | 827 |\n", "| iterations | 52 |\n", "| time_elapsed | 1029 |\n", "| total_timesteps | 851968 |\n", "| train/ | |\n", "| approx_kl | 0.0025564209 |\n", "| clip_fraction | 0.0342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.873 |\n", "| explained_variance | 0.973 |\n", "| learning_rate | 0.0003 |\n", "| loss | 27.3 |\n", "| n_updates | 204 |\n", "| policy_gradient_loss | -0.000729 |\n", "| value_loss | 27.2 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 929 |\n", "| ep_rew_mean | 141 |\n", "| time/ | |\n", "| fps | 826 |\n", "| iterations | 53 |\n", "| time_elapsed | 1050 |\n", "| total_timesteps | 868352 |\n", "| train/ | |\n", "| approx_kl | 0.0032787342 |\n", "| clip_fraction | 0.0297 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.848 |\n", "| explained_variance | 0.975 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.44 |\n", "| n_updates | 208 |\n", "| policy_gradient_loss | -0.000646 |\n", "| value_loss | 37.8 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 922 |\n", "| ep_rew_mean | 143 |\n", "| time/ | |\n", "| fps | 824 |\n", "| iterations | 54 |\n", "| time_elapsed | 1072 |\n", "| total_timesteps | 884736 |\n", "| train/ | |\n", "| approx_kl | 0.003402636 |\n", "| clip_fraction | 0.029 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.872 |\n", "| explained_variance | 0.973 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.64 |\n", "| n_updates | 212 |\n", "| policy_gradient_loss | -0.000658 |\n", "| value_loss | 41.5 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 907 |\n", "| ep_rew_mean | 142 |\n", "| time/ | |\n", "| fps | 823 |\n", "| iterations | 55 |\n", "| time_elapsed | 1094 |\n", "| total_timesteps | 901120 |\n", "| train/ | |\n", "| approx_kl | 0.005119282 |\n", "| clip_fraction | 0.0469 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.871 |\n", "| explained_variance | 0.977 |\n", "| learning_rate | 0.0003 |\n", "| loss | 63.4 |\n", "| n_updates | 216 |\n", "| policy_gradient_loss | -7.96e-05 |\n", "| value_loss | 37.3 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 916 |\n", "| ep_rew_mean | 146 |\n", "| time/ | |\n", "| fps | 820 |\n", "| iterations | 56 |\n", "| time_elapsed | 1118 |\n", "| total_timesteps | 917504 |\n", "| train/ | |\n", "| approx_kl | 0.003549051 |\n", "| clip_fraction | 0.0502 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.796 |\n", "| explained_variance | 0.964 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.17 |\n", "| n_updates | 220 |\n", "| policy_gradient_loss | -0.000213 |\n", "| value_loss | 58.5 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 890 |\n", "| ep_rew_mean | 145 |\n", "| time/ | |\n", "| fps | 820 |\n", "| iterations | 57 |\n", "| time_elapsed | 1138 |\n", "| total_timesteps | 933888 |\n", "| train/ | |\n", "| approx_kl | 0.004806907 |\n", "| clip_fraction | 0.0527 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.826 |\n", "| explained_variance | 0.994 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.22 |\n", "| n_updates | 224 |\n", "| policy_gradient_loss | 0.00064 |\n", "| value_loss | 3.08 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 890 |\n", "| ep_rew_mean | 144 |\n", "| time/ | |\n", "| fps | 819 |\n", "| iterations | 58 |\n", "| time_elapsed | 1160 |\n", "| total_timesteps | 950272 |\n", "| train/ | |\n", "| approx_kl | 0.003774609 |\n", "| clip_fraction | 0.0375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.862 |\n", "| explained_variance | 0.955 |\n", "| learning_rate | 0.0003 |\n", "| loss | 66.3 |\n", "| n_updates | 228 |\n", "| policy_gradient_loss | -0.00141 |\n", "| value_loss | 73.8 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 907 |\n", "| ep_rew_mean | 147 |\n", "| time/ | |\n", "| fps | 818 |\n", "| iterations | 59 |\n", "| time_elapsed | 1181 |\n", "| total_timesteps | 966656 |\n", "| train/ | |\n", "| approx_kl | 0.0048555927 |\n", "| clip_fraction | 0.0443 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.797 |\n", "| explained_variance | 0.99 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.75 |\n", "| n_updates | 232 |\n", "| policy_gradient_loss | -0.000123 |\n", "| value_loss | 12.5 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 916 |\n", "| ep_rew_mean | 148 |\n", "| time/ | |\n", "| fps | 817 |\n", "| iterations | 60 |\n", "| time_elapsed | 1201 |\n", "| total_timesteps | 983040 |\n", "| train/ | |\n", "| approx_kl | 0.004135864 |\n", "| clip_fraction | 0.0485 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.827 |\n", "| explained_variance | 0.978 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.7 |\n", "| n_updates | 236 |\n", "| policy_gradient_loss | -0.00131 |\n", "| value_loss | 35.1 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 926 |\n", "| ep_rew_mean | 152 |\n", "| time/ | |\n", "| fps | 818 |\n", "| iterations | 61 |\n", "| time_elapsed | 1221 |\n", "| total_timesteps | 999424 |\n", "| train/ | |\n", "| approx_kl | 0.0048224786 |\n", "| clip_fraction | 0.0465 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.82 |\n", "| explained_variance | 0.982 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.13 |\n", "| n_updates | 240 |\n", "| policy_gradient_loss | 7.21e-05 |\n", "| value_loss | 23.4 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 931 |\n", "| ep_rew_mean | 153 |\n", "| time/ | |\n", "| fps | 817 |\n", "| iterations | 62 |\n", "| time_elapsed | 1242 |\n", "| total_timesteps | 1015808 |\n", "| train/ | |\n", "| approx_kl | 0.0047184844 |\n", "| clip_fraction | 0.0447 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.835 |\n", "| explained_variance | 0.973 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.35 |\n", "| n_updates | 244 |\n", "| policy_gradient_loss | -0.000695 |\n", "| value_loss | 43.4 |\n", "------------------------------------------\n", "CPU times: user 20min 18s, sys: 9.93 s, total: 20min 28s\n", "Wall time: 20min 52s\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "code", "source": [ "#@title evaluate model\n", "\n", "# Create a new environment for evaluation\n", "eval_env = gym.make('LunarLander-v2')\n", "eval_env.reset()\n", "\n", "# Evaluate the model with 10 evaluation episodes and deterministic=True\n", "mean_reward, std_reward = evaluate_policy(model,eval_env,n_eval_episodes=10,deterministic=True)\n", "\n", "# Print the results\n", "print(f'reward: {mean_reward:.2f} +/- {std_reward:.2f}')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HpTC-ULrJ21K", "outputId": "b25eeb2c-d71f-4d3b-e8bc-566396395694" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.8/dist-packages/stable_baselines3/common/evaluation.py:65: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "reward: 242.08 +/- 17.93\n" ] } ] }, { "cell_type": "code", "source": [ "#https://huggingface.co/deep-rl-course/unit1/hands-on?fw=pt#publish-our-trained-model-on-the-hub\n", "#@title upload model and video in HF hub\n", "# To log to our Hugging Face account to be able to upload models to the Hub.\n", "notebook_login() #copy and paste the token\n", "!git config --global credential.helper store\n", "\n", "# TODO: Define the name of the environment\n", "env_id = 'LunarLander-v2'\n", "\n", "# Create the evaluation env\n", "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n", "\n", "# Define the model architecture we used\n", "model_architecture = \"PPO\"\n", "\n", "model_name = \"ppo-LunarLander-v2\"\n", "\n", "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {username}/{model_architecture}-{env_id} for instance ThomasSimonini/ppo-LunarLander-v2\n", "repo_id = f'asuzuki/{model_architecture}-{env_id}'\n", "\n", "## TODO: Define the commit message\n", "commit_message = \"first commit - model PPO performing good\"\n", "\n", "# Create the evaluation env\n", "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n", "\n", "# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub\n", "package_to_hub(model=model, # Our trained model\n", " model_name=model_name, # The name of our trained model\n", " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n", " env_id=env_id, # Name of the environment\n", " eval_env=eval_env, # Evaluation Environment\n", " repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n", " commit_message=commit_message)\n", "\n", "# Note: if after running the package_to_hub function and it gives an issue of rebasing, please run the following code\n", "# cd && git add . && git commit -m \"Add message\" && git pull\n", "# And don't forget to do a \"git push\" at the end to push the change to the hub." ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588, "referenced_widgets": [ "6314513b30fd4f11ad8b59b0bcdee8d8", "1966d341666441089910ba16f7ac169c", "965ba6c8ce3d4c5bbe14c9a7192a221c", "6f5377c55bfe4428805ed94034e68d6b", "2d986d4e45d44a2b858390514b770a9d", "3c532f73534240b19c47e2b943418e93", "ac189f596cb343bd9a6ffcf1b158f56b", "7c2c6b532db14e9bafcdb0beeb1c33bc", "cd4b78c526cf4df88744348e711754da", "506ae0904a434d15891a7c8ded20f2db", "c7616f2c72f5492cba051c5fea08af09", "550b8e46070445ec8669b4d0bf4ec8a8", "435bcee2de28494cb3584f63b25a5f2e", "73e497fcd5644d21a00d37ee8dd9803e", "07a4cd80556d43d590a8673530c26538", "b626797b5b0c4a91b8564b089ef219c2", "a39d2452d85841c396c498f5f53ef48e" ] }, "id": "U28QfLw_TIT9", "outputId": "cebb1d9d-03f3-4d40-92da-771c3d691053" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Token is valid.\n", "Your token has been saved in your configured git credential helpers (store).\n", "Your token has been saved to /root/.huggingface/token\n", "Login successful\n" ] } ] }, { "cell_type": "markdown", "source": [ "#LOAD MODEL" ], "metadata": { "id": "PF4eGTuzcETq" } }, { "cell_type": "code", "source": [ "import gym\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from huggingface_sb3 import load_from_hub\n", "\n", "# TODO: Define the name of the environment\n", "env_id = 'LunarLander-v2'\n", "\n", "# Define the model architecture we used\n", "model_architecture = \"PPO\"\n", "\n", "model_name = \"ppo-LunarLander-v2\"\n", "\n", "repo_id = f'asuzuki/{model_architecture}-{env_id}'\n", "model_name = \"ppo-LunarLander-v2\"\n", "filename = f'{model_name}.zip'\n", "\n", "# When the model was trained on Python 3.8 the pickle protocol is 5\n", "# But Python 3.6, 3.7 use protocol 4\n", "# In order to get compatibility we need to:\n", "# 1. Install pickle5 (we done it at the beginning of the colab)\n", "# 2. Create a custom empty object we pass as parameter to PPO.load()\n", "custom_objects = {\n", " \"learning_rate\": 0.0,\n", " \"lr_schedule\": lambda _: 0.0,\n", " \"clip_range\": lambda _: 0.0,\n", "}\n", "\n", "checkpoint = load_from_hub(repo_id, filename)\n", "model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)\n", "\n", "eval_env = gym.make(\"LunarLander-v2\")\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", "print(f\"\\n\\nmean_reward={mean_reward:.2f} +/- {std_reward}\")" ], "metadata": { "id": "I--dK8P0UTGa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "860d703d-5cae-462d-a378-fb65517602b9" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "== CURRENT SYSTEM INFO ==\n", "OS: Linux-5.10.147+-x86_64-with-glibc2.27 #1 SMP Sat Dec 10 16:00:40 UTC 2022\n", "Python: 3.8.16\n", "Stable-Baselines3: 1.6.2\n", "PyTorch: 1.13.0+cu116\n", "GPU Enabled: True\n", "Numpy: 1.21.6\n", "Gym: 0.21.0\n", "\n", "== SAVED MODEL SYSTEM INFO ==\n", "OS: Linux-5.10.147+-x86_64-with-glibc2.27 #1 SMP Sat Dec 10 16:00:40 UTC 2022\n", "Python: 3.8.16\n", "Stable-Baselines3: 1.6.2\n", "PyTorch: 1.13.0+cu116\n", "GPU Enabled: True\n", "Numpy: 1.21.6\n", "Gym: 0.21.0\n", "\n", "\n", "\n", "mean_reward=251.52 +/- 26.518009760699584\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "K6Sw58YmP-i6" }, "execution_count": null, "outputs": [] } ] }