SuperSecureHuman
/

BipedalWalker-v3-TD3

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f3f1b89",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T15:35:55.593757Z",
+     "start_time": "2022-05-06T15:35:54.206954Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import gym\n",
+    "\n",
+    "from stable_baselines3 import TD3\n",
+    "from stable_baselines3.common.evaluation import evaluate_policy\n",
+    "from stable_baselines3.common.env_util import make_vec_env\n",
+    "\n",
+    "import wandb\n",
+    "from wandb.integration.sb3 import WandbCallback\n",
+    "from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "config = {\n",
+    "    \"policy_type\": \"MlpPolicy\",\n",
+    "    \"env_name\": \"BipedalWalker-v3\",\n",
+    "}"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "run = wandb.init(\n",
+    "    project=\"BiPedalWalker-v3\",\n",
+    "    config=config,\n",
+    "    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics\n",
+    "    monitor_gym=True,  # auto-upload the videos of agents playing the game\n",
+    "    save_code=True,  # optional\n",
+    ")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35ccb2df",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T11:52:04.640671Z",
+     "start_time": "2022-05-06T11:52:00.907411Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import gym\n",
+    "\n",
+    "# First, we create our environment called LunarLander-v2\n",
+    "env = gym.make(\"BipedalWalker-v3\")\n",
+    "\n",
+    "# Then we reset this environment\n",
+    "observation = env.reset()\n",
+    "\n",
+    "for _ in range(200):\n",
+    "  # Take a random action\n",
+    "  action = env.action_space.sample()\n",
+    "  print(\"Action taken:\", action)\n",
+    "  env.render()\n",
+    "\n",
+    "\n",
+    "  # Do this action in the environment and get\n",
+    "  # next_state, reward, done and info\n",
+    "  observation, reward, done, info = env.step(action)\n",
+    "  \n",
+    "  # If the game is done (in our case we land, crashed or timeout)\n",
+    "  if done:\n",
+    "      # Reset the environment\n",
+    "      print(\"Environment is reset\")\n",
+    "      observation = env.reset()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b6a4ef9",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T11:52:07.357076Z",
+     "start_time": "2022-05-06T11:52:07.349795Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db2d1377",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T12:11:02.520195Z",
+     "start_time": "2022-05-06T12:11:02.491149Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "env = make_vec_env(\"BipedalWalker-v3\", n_envs=32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
+    "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a774b23f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T12:18:14.514611Z",
+     "start_time": "2022-05-06T12:18:14.497888Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model = TD3(\n",
+    "        \"MlpPolicy\",\n",
+    "        env,\n",
+    "        learning_rate=0.0001,\n",
+    "        batch_size=128,\n",
+    "        gamma=0.999,\n",
+    "        train_freq=32,\n",
+    "        gradient_steps=32,\n",
+    "        tensorboard_log='model_log/',\n",
+    "        verbose=0\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "env_id = 'BipedalWalker-v3'"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "model.save('300-Trained.zip')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2e07af6",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T15:36:15.322985Z",
+     "start_time": "2022-05-06T15:36:10.718319Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model = TD3.load('30M_Trained.zip')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07d151f7",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T15:36:41.652903Z",
+     "start_time": "2022-05-06T15:36:22.118438Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "eval_env = gym.make(\"BipedalWalker-v3\")\n",
+    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1, deterministic=True, render=True)\n",
+    "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")\n",
+    "eval_env.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de40c367",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e027a847",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-05-06T15:40:59.811143Z",
+     "start_time": "2022-05-06T15:40:59.670690Z"
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import gym\n",
+    "\n",
+    "from stable_baselines3 import PPO\n",
+    "from stable_baselines3.common.vec_env import DummyVecEnv\n",
+    "from stable_baselines3.common.env_util import make_vec_env\n",
+    "\n",
+    "from huggingface_sb3 import package_to_hub\n",
+    "\n",
+    "# PLACE the variables you've just defined two cells above\n",
+    "# Define the name of the environment\n",
+    "env_id = \"BipedalWalker-v3\"\n",
+    "\n",
+    "# TODO: Define the model architecture we used\n",
+    "model_architecture = \"TD3\"\n",
+    "model_name = \"TD3_BipedalWalker-v3\"\n",
+    "\n",
+    "## Define a repo_id\n",
+    "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
+    "## CHANGE WITH YOUR REPO ID\n",
+    "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
+    "\n",
+    "## Define the commit message\n",
+    "commit_message = \"Upload score 300 trained bipedal walker\"\n",
+    "\n",
+    "# Create the evaluation env\n",
+    "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
+    "\n",
+    "# PLACE the package_to_hub function you've just filled here\n",
+    "package_to_hub(model=model, # Our trained model\n",
+    "               model_name=model_name, # The name of our trained model \n",
+    "               model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
+    "               env_id=env_id, # Name of the environment\n",
+    "               eval_env=eval_env, # Evaluation Environment\n",
+    "               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
+    "               commit_message=commit_message)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "eval_env.close()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

log/TD3_1/events.out.tfevents.1651891393.predator.3587258.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85fe860e44b27b167f97cf937be48caf03f27880ebfda093c23636b268980543
+size 1960658