{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3c804433", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:24:40.522711Z", "iopub.status.busy": "2024-05-17T16:24:40.521878Z", "iopub.status.idle": "2024-05-17T16:25:44.391722Z", "shell.execute_reply": "2024-05-17T16:25:44.390499Z" }, "papermill": { "duration": 63.878017, "end_time": "2024-05-17T16:25:44.394274", "exception": false, "start_time": "2024-05-17T16:24:40.516257", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "2f1b6fa4", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:25:44.403173Z", "iopub.status.busy": "2024-05-17T16:25:44.402859Z", "iopub.status.idle": "2024-05-17T16:26:02.976027Z", "shell.execute_reply": "2024-05-17T16:26:02.975252Z" }, "papermill": { "duration": 18.580256, "end_time": "2024-05-17T16:26:02.978413", "exception": false, "start_time": "2024-05-17T16:25:44.398157", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-17 16:25:52.057957: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-17 16:25:52.058066: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-17 16:25:52.239365: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-8\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-8\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 50_000\n", "# Record in approximate quarters\n", "# Using an endpoint about 5% less than the total timesteps will trigger the last video call.\n", "# This doesn't coincide exactly with the end, but gets close.\n", "VIDEO_CALLBACK_FREQ = 240_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_000_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "# Increasing buffer size to 70K, should be able to store it.\n", "BUFFER_SIZE = 70_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 100_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.005\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "329b5f1d", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:26:02.987444Z", "iopub.status.busy": "2024-05-17T16:26:02.986929Z", "iopub.status.idle": "2024-05-17T16:26:02.997299Z", "shell.execute_reply": "2024-05-17T16:26:02.996446Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.016909, "end_time": "2024-05-17T16:26:02.999263", "exception": false, "start_time": "2024-05-17T16:26:02.982354", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "f908c907", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:26:03.007641Z", "iopub.status.busy": "2024-05-17T16:26:03.007083Z", "iopub.status.idle": "2024-05-17T16:26:03.018011Z", "shell.execute_reply": "2024-05-17T16:26:03.017226Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017144, "end_time": "2024-05-17T16:26:03.019947", "exception": false, "start_time": "2024-05-17T16:26:03.002803", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "3c5afa5a", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:26:03.028073Z", "iopub.status.busy": "2024-05-17T16:26:03.027816Z", "iopub.status.idle": "2024-05-17T16:26:03.042627Z", "shell.execute_reply": "2024-05-17T16:26:03.041816Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.021124, "end_time": "2024-05-17T16:26:03.044551", "exception": false, "start_time": "2024-05-17T16:26:03.023427", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "66aeccf6", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:26:03.052815Z", "iopub.status.busy": "2024-05-17T16:26:03.052528Z", "iopub.status.idle": "2024-05-17T16:27:27.490919Z", "shell.execute_reply": "2024-05-17T16:27:27.489858Z" }, "papermill": { "duration": 84.445306, "end_time": "2024-05-17T16:27:27.493415", "exception": false, "start_time": "2024-05-17T16:26:03.048109", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2v7/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2v7/dqn_replay_buffer_pacman_v2-7\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "1250ef68", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:27:27.506331Z", "iopub.status.busy": "2024-05-17T16:27:27.505984Z", "iopub.status.idle": "2024-05-17T16:27:27.512123Z", "shell.execute_reply": "2024-05-17T16:27:27.511292Z" }, "papermill": { "duration": 0.013364, "end_time": "2024-05-17T16:27:27.514315", "exception": false, "start_time": "2024-05-17T16:27:27.500951", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "f3425f13", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T16:27:27.523129Z", "iopub.status.busy": "2024-05-17T16:27:27.522848Z", "iopub.status.idle": "2024-05-17T19:10:49.796802Z", "shell.execute_reply": "2024-05-17T19:10:49.795676Z" }, "papermill": { "duration": 9802.280681, "end_time": "2024-05-17T19:10:49.798946", "exception": false, "start_time": "2024-05-17T16:27:27.518265", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7912 |\n", "| fps | 110 |\n", "| time_elapsed | 188 |\n", "| total_timesteps | 9020882 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.214 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7916 |\n", "| fps | 110 |\n", "| time_elapsed | 229 |\n", "| total_timesteps | 9025408 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0459 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.64e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7920 |\n", "| fps | 110 |\n", "| time_elapsed | 275 |\n", "| total_timesteps | 9030454 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0377 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.64e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7924 |\n", "| fps | 110 |\n", "| time_elapsed | 318 |\n", "| total_timesteps | 9035224 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0424 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7928 |\n", "| fps | 110 |\n", "| time_elapsed | 352 |\n", "| total_timesteps | 9038958 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 315 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7932 |\n", "| fps | 110 |\n", "| time_elapsed | 397 |\n", "| total_timesteps | 9043812 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0281 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 319 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7936 |\n", "| fps | 110 |\n", "| time_elapsed | 437 |\n", "| total_timesteps | 9048240 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0598 |\n", "----------------------------------\n", "Eval num_timesteps=9050000, episode_reward=375.60 +/- 47.53\n", "Episode length: 1300.40 +/- 151.35\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.3e+03 |\n", "| mean_reward | 376 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0443 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7940 |\n", "| fps | 103 |\n", "| time_elapsed | 518 |\n", "| total_timesteps | 9053530 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.307 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7944 |\n", "| fps | 103 |\n", "| time_elapsed | 563 |\n", "| total_timesteps | 9058432 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0435 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 332 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7948 |\n", "| fps | 104 |\n", "| time_elapsed | 605 |\n", "| total_timesteps | 9063114 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0602 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 333 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7952 |\n", "| fps | 104 |\n", "| time_elapsed | 648 |\n", "| total_timesteps | 9067750 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0477 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 337 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7956 |\n", "| fps | 104 |\n", "| time_elapsed | 690 |\n", "| total_timesteps | 9072360 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0525 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 344 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7960 |\n", "| fps | 105 |\n", "| time_elapsed | 734 |\n", "| total_timesteps | 9077210 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0312 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 352 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7964 |\n", "| fps | 105 |\n", "| time_elapsed | 780 |\n", "| total_timesteps | 9082204 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0759 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 351 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7968 |\n", "| fps | 105 |\n", "| time_elapsed | 821 |\n", "| total_timesteps | 9086754 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.372 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 353 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7972 |\n", "| fps | 105 |\n", "| time_elapsed | 863 |\n", "| total_timesteps | 9091330 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0544 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 356 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7976 |\n", "| fps | 105 |\n", "| time_elapsed | 906 |\n", "| total_timesteps | 9096020 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0821 |\n", "----------------------------------\n", "Eval num_timesteps=9100000, episode_reward=468.80 +/- 63.25\n", "Episode length: 1143.60 +/- 59.62\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 469 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9100000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.419 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7980 |\n", "| fps | 103 |\n", "| time_elapsed | 976 |\n", "| total_timesteps | 9100642 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0655 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7984 |\n", "| fps | 104 |\n", "| time_elapsed | 1254 |\n", "| total_timesteps | 9130544 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0223 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7988 |\n", "| fps | 104 |\n", "| time_elapsed | 1294 |\n", "| total_timesteps | 9134706 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0276 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 360 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7992 |\n", "| fps | 104 |\n", "| time_elapsed | 1333 |\n", "| total_timesteps | 9138902 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.334 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 366 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 7996 |\n", "| fps | 104 |\n", "| time_elapsed | 1378 |\n", "| total_timesteps | 9143596 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 364 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8000 |\n", "| fps | 104 |\n", "| time_elapsed | 1421 |\n", "| total_timesteps | 9147938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.092 |\n", "----------------------------------\n", "Eval num_timesteps=9150000, episode_reward=392.30 +/- 177.21\n", "Episode length: 990.40 +/- 219.68\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 990 |\n", "| mean_reward | 392 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0648 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 372 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8004 |\n", "| fps | 102 |\n", "| time_elapsed | 1487 |\n", "| total_timesteps | 9152166 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0247 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 366 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8008 |\n", "| fps | 102 |\n", "| time_elapsed | 1523 |\n", "| total_timesteps | 9155871 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0431 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 357 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8012 |\n", "| fps | 102 |\n", "| time_elapsed | 1550 |\n", "| total_timesteps | 9158641 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0275 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 360 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8016 |\n", "| fps | 102 |\n", "| time_elapsed | 1595 |\n", "| total_timesteps | 9163281 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0376 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 356 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8020 |\n", "| fps | 102 |\n", "| time_elapsed | 1631 |\n", "| total_timesteps | 9167027 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.082 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8024 |\n", "| fps | 102 |\n", "| time_elapsed | 1671 |\n", "| total_timesteps | 9171117 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0486 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 359 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8028 |\n", "| fps | 102 |\n", "| time_elapsed | 1711 |\n", "| total_timesteps | 9175279 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0576 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 357 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8032 |\n", "| fps | 102 |\n", "| time_elapsed | 1751 |\n", "| total_timesteps | 9179553 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0903 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 361 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8036 |\n", "| fps | 102 |\n", "| time_elapsed | 1796 |\n", "| total_timesteps | 9184255 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.047 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 364 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8040 |\n", "| fps | 102 |\n", "| time_elapsed | 1843 |\n", "| total_timesteps | 9189185 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 364 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8044 |\n", "| fps | 102 |\n", "| time_elapsed | 1890 |\n", "| total_timesteps | 9194215 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 359 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8048 |\n", "| fps | 102 |\n", "| time_elapsed | 1931 |\n", "| total_timesteps | 9198489 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0186 |\n", "----------------------------------\n", "Eval num_timesteps=9200000, episode_reward=275.60 +/- 47.36\n", "Episode length: 1239.80 +/- 145.58\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.24e+03 |\n", "| mean_reward | 276 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 359 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8052 |\n", "| fps | 101 |\n", "| time_elapsed | 2017 |\n", "| total_timesteps | 9204181 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0426 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 357 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8056 |\n", "| fps | 101 |\n", "| time_elapsed | 2063 |\n", "| total_timesteps | 9209005 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0684 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 360 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8060 |\n", "| fps | 101 |\n", "| time_elapsed | 2111 |\n", "| total_timesteps | 9214075 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0929 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8064 |\n", "| fps | 101 |\n", "| time_elapsed | 2156 |\n", "| total_timesteps | 9218867 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0499 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 363 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8068 |\n", "| fps | 101 |\n", "| time_elapsed | 2204 |\n", "| total_timesteps | 9223817 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0278 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 364 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8072 |\n", "| fps | 101 |\n", "| time_elapsed | 2248 |\n", "| total_timesteps | 9228423 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 361 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8076 |\n", "| fps | 101 |\n", "| time_elapsed | 2293 |\n", "| total_timesteps | 9233161 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0885 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8080 |\n", "| fps | 101 |\n", "| time_elapsed | 2344 |\n", "| total_timesteps | 9238203 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0899 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 364 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8084 |\n", "| fps | 101 |\n", "| time_elapsed | 2399 |\n", "| total_timesteps | 9243589 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0779 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8088 |\n", "| fps | 101 |\n", "| time_elapsed | 2447 |\n", "| total_timesteps | 9247825 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0362 |\n", "----------------------------------\n", "Eval num_timesteps=9250000, episode_reward=415.70 +/- 38.88\n", "Episode length: 1574.20 +/- 951.59\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.57e+03 |\n", "| mean_reward | 416 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9250000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0483 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 369 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8092 |\n", "| fps | 99 |\n", "| time_elapsed | 2530 |\n", "| total_timesteps | 9252443 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 366 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8096 |\n", "| fps | 99 |\n", "| time_elapsed | 2567 |\n", "| total_timesteps | 9256529 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0733 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8100 |\n", "| fps | 100 |\n", "| time_elapsed | 2605 |\n", "| total_timesteps | 9260719 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0511 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 359 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8104 |\n", "| fps | 100 |\n", "| time_elapsed | 2655 |\n", "| total_timesteps | 9266139 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.663 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8108 |\n", "| fps | 100 |\n", "| time_elapsed | 2695 |\n", "| total_timesteps | 9270549 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0537 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 378 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8112 |\n", "| fps | 100 |\n", "| time_elapsed | 2730 |\n", "| total_timesteps | 9274449 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.62 |\n", "----------------------------------\n", "Eval num_timesteps=9300000, episode_reward=332.70 +/- 77.87\n", "Episode length: 1076.60 +/- 44.04\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.08e+03 |\n", "| mean_reward | 333 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0583 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 373 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8116 |\n", "| fps | 100 |\n", "| time_elapsed | 3032 |\n", "| total_timesteps | 9304557 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.014 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 379 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8120 |\n", "| fps | 100 |\n", "| time_elapsed | 3071 |\n", "| total_timesteps | 9308865 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.52 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 374 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8124 |\n", "| fps | 100 |\n", "| time_elapsed | 3112 |\n", "| total_timesteps | 9313297 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0575 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 371 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8128 |\n", "| fps | 100 |\n", "| time_elapsed | 3147 |\n", "| total_timesteps | 9317125 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0665 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 375 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8132 |\n", "| fps | 100 |\n", "| time_elapsed | 3188 |\n", "| total_timesteps | 9321666 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 377 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8136 |\n", "| fps | 101 |\n", "| time_elapsed | 3235 |\n", "| total_timesteps | 9326840 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.185 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 370 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8140 |\n", "| fps | 101 |\n", "| time_elapsed | 3265 |\n", "| total_timesteps | 9330148 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.313 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 370 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8144 |\n", "| fps | 101 |\n", "| time_elapsed | 3308 |\n", "| total_timesteps | 9334910 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.998 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 376 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8148 |\n", "| fps | 101 |\n", "| time_elapsed | 3353 |\n", "| total_timesteps | 9339886 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.069 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 377 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8152 |\n", "| fps | 101 |\n", "| time_elapsed | 3396 |\n", "| total_timesteps | 9344556 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0568 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 379 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8156 |\n", "| fps | 101 |\n", "| time_elapsed | 3436 |\n", "| total_timesteps | 9348972 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0579 |\n", "----------------------------------\n", "Eval num_timesteps=9350000, episode_reward=456.10 +/- 88.71\n", "Episode length: 1220.20 +/- 179.16\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.22e+03 |\n", "| mean_reward | 456 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 372 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8160 |\n", "| fps | 100 |\n", "| time_elapsed | 3518 |\n", "| total_timesteps | 9354304 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 370 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8164 |\n", "| fps | 100 |\n", "| time_elapsed | 3566 |\n", "| total_timesteps | 9359614 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0807 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 369 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8168 |\n", "| fps | 100 |\n", "| time_elapsed | 3609 |\n", "| total_timesteps | 9364356 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0844 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 368 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8172 |\n", "| fps | 101 |\n", "| time_elapsed | 3650 |\n", "| total_timesteps | 9368838 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0284 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8176 |\n", "| fps | 101 |\n", "| time_elapsed | 3690 |\n", "| total_timesteps | 9373214 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.55 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 361 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8180 |\n", "| fps | 101 |\n", "| time_elapsed | 3727 |\n", "| total_timesteps | 9377278 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0575 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 356 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8184 |\n", "| fps | 101 |\n", "| time_elapsed | 3765 |\n", "| total_timesteps | 9381440 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0637 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 351 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8188 |\n", "| fps | 101 |\n", "| time_elapsed | 3807 |\n", "| total_timesteps | 9386050 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.298 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 348 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8192 |\n", "| fps | 101 |\n", "| time_elapsed | 3849 |\n", "| total_timesteps | 9390710 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0968 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 344 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8196 |\n", "| fps | 101 |\n", "| time_elapsed | 3890 |\n", "| total_timesteps | 9395200 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0704 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 346 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8200 |\n", "| fps | 101 |\n", "| time_elapsed | 3930 |\n", "| total_timesteps | 9399588 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0352 |\n", "----------------------------------\n", "Eval num_timesteps=9400000, episode_reward=427.30 +/- 74.91\n", "Episode length: 1149.20 +/- 78.18\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.15e+03 |\n", "| mean_reward | 427 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9400000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.238 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 343 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8204 |\n", "| fps | 100 |\n", "| time_elapsed | 4002 |\n", "| total_timesteps | 9404140 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 341 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8208 |\n", "| fps | 101 |\n", "| time_elapsed | 4041 |\n", "| total_timesteps | 9408486 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 336 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8212 |\n", "| fps | 101 |\n", "| time_elapsed | 4078 |\n", "| total_timesteps | 9412852 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0525 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 334 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8216 |\n", "| fps | 101 |\n", "| time_elapsed | 4114 |\n", "| total_timesteps | 9416902 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0451 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 331 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8220 |\n", "| fps | 101 |\n", "| time_elapsed | 4151 |\n", "| total_timesteps | 9421320 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 334 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8224 |\n", "| fps | 101 |\n", "| time_elapsed | 4192 |\n", "| total_timesteps | 9425998 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0561 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 333 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8228 |\n", "| fps | 101 |\n", "| time_elapsed | 4233 |\n", "| total_timesteps | 9430548 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0753 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8232 |\n", "| fps | 101 |\n", "| time_elapsed | 4270 |\n", "| total_timesteps | 9434634 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0415 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8236 |\n", "| fps | 101 |\n", "| time_elapsed | 4312 |\n", "| total_timesteps | 9439258 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0812 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 318 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8240 |\n", "| fps | 101 |\n", "| time_elapsed | 4356 |\n", "| total_timesteps | 9444128 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0259 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8244 |\n", "| fps | 102 |\n", "| time_elapsed | 4401 |\n", "| total_timesteps | 9448994 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0707 |\n", "----------------------------------\n", "Eval num_timesteps=9450000, episode_reward=407.40 +/- 49.29\n", "Episode length: 1169.40 +/- 114.01\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 407 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0482 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8248 |\n", "| fps | 101 |\n", "| time_elapsed | 4474 |\n", "| total_timesteps | 9453666 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0436 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8252 |\n", "| fps | 101 |\n", "| time_elapsed | 4518 |\n", "| total_timesteps | 9458496 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.089 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 315 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8256 |\n", "| fps | 101 |\n", "| time_elapsed | 4558 |\n", "| total_timesteps | 9462938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0493 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 316 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8260 |\n", "| fps | 101 |\n", "| time_elapsed | 4600 |\n", "| total_timesteps | 9467644 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8264 |\n", "| fps | 101 |\n", "| time_elapsed | 4645 |\n", "| total_timesteps | 9472610 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0832 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8268 |\n", "| fps | 101 |\n", "| time_elapsed | 4687 |\n", "| total_timesteps | 9477306 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8272 |\n", "| fps | 101 |\n", "| time_elapsed | 4734 |\n", "| total_timesteps | 9482110 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8276 |\n", "| fps | 101 |\n", "| time_elapsed | 4783 |\n", "| total_timesteps | 9486870 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.57 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 327 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8280 |\n", "| fps | 101 |\n", "| time_elapsed | 4826 |\n", "| total_timesteps | 9491624 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0319 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8284 |\n", "| fps | 101 |\n", "| time_elapsed | 4861 |\n", "| total_timesteps | 9495488 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.41 |\n", "----------------------------------\n", "Eval num_timesteps=9500000, episode_reward=405.40 +/- 72.28\n", "Episode length: 1141.40 +/- 119.45\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 405 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0338 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 327 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8288 |\n", "| fps | 101 |\n", "| time_elapsed | 4935 |\n", "| total_timesteps | 9500196 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.059 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 323 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8292 |\n", "| fps | 101 |\n", "| time_elapsed | 4975 |\n", "| total_timesteps | 9504612 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0303 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 328 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8296 |\n", "| fps | 101 |\n", "| time_elapsed | 5020 |\n", "| total_timesteps | 9509514 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 334 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8300 |\n", "| fps | 101 |\n", "| time_elapsed | 5062 |\n", "| total_timesteps | 9514178 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 338 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8304 |\n", "| fps | 101 |\n", "| time_elapsed | 5104 |\n", "| total_timesteps | 9518738 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0899 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 336 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8308 |\n", "| fps | 101 |\n", "| time_elapsed | 5140 |\n", "| total_timesteps | 9522738 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.08 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 338 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8312 |\n", "| fps | 101 |\n", "| time_elapsed | 5184 |\n", "| total_timesteps | 9527514 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0759 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 348 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8316 |\n", "| fps | 101 |\n", "| time_elapsed | 5230 |\n", "| total_timesteps | 9532626 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 343 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8320 |\n", "| fps | 101 |\n", "| time_elapsed | 5266 |\n", "| total_timesteps | 9536582 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.01 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 346 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8324 |\n", "| fps | 101 |\n", "| time_elapsed | 5307 |\n", "| total_timesteps | 9540980 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 350 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8328 |\n", "| fps | 101 |\n", "| time_elapsed | 5350 |\n", "| total_timesteps | 9545760 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.09 |\n", "----------------------------------\n", "Eval num_timesteps=9550000, episode_reward=396.20 +/- 77.46\n", "Episode length: 1201.40 +/- 84.67\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.2e+03 |\n", "| mean_reward | 396 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0615 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8332 |\n", "| fps | 101 |\n", "| time_elapsed | 5426 |\n", "| total_timesteps | 9550556 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8336 |\n", "| fps | 101 |\n", "| time_elapsed | 5470 |\n", "| total_timesteps | 9555372 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0503 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 367 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8340 |\n", "| fps | 101 |\n", "| time_elapsed | 5515 |\n", "| total_timesteps | 9560258 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.057 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 373 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8344 |\n", "| fps | 101 |\n", "| time_elapsed | 5554 |\n", "| total_timesteps | 9564480 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0566 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 377 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8348 |\n", "| fps | 101 |\n", "| time_elapsed | 5598 |\n", "| total_timesteps | 9569374 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 373 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8352 |\n", "| fps | 101 |\n", "| time_elapsed | 5643 |\n", "| total_timesteps | 9574272 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.066 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 370 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8356 |\n", "| fps | 101 |\n", "| time_elapsed | 5684 |\n", "| total_timesteps | 9578770 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0565 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 374 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8360 |\n", "| fps | 101 |\n", "| time_elapsed | 5727 |\n", "| total_timesteps | 9583428 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0697 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 376 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8364 |\n", "| fps | 101 |\n", "| time_elapsed | 5772 |\n", "| total_timesteps | 9588386 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0795 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 378 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8368 |\n", "| fps | 101 |\n", "| time_elapsed | 5817 |\n", "| total_timesteps | 9593264 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0281 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 376 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8372 |\n", "| fps | 102 |\n", "| time_elapsed | 5856 |\n", "| total_timesteps | 9597618 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0521 |\n", "----------------------------------\n", "Eval num_timesteps=9600000, episode_reward=341.80 +/- 181.84\n", "Episode length: 1128.00 +/- 216.15\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.13e+03 |\n", "| mean_reward | 342 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0416 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 377 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8376 |\n", "| fps | 101 |\n", "| time_elapsed | 5928 |\n", "| total_timesteps | 9602186 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.696 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 381 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8380 |\n", "| fps | 101 |\n", "| time_elapsed | 5968 |\n", "| total_timesteps | 9606572 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0885 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 383 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8384 |\n", "| fps | 101 |\n", "| time_elapsed | 6009 |\n", "| total_timesteps | 9610996 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0942 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 383 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8388 |\n", "| fps | 101 |\n", "| time_elapsed | 6050 |\n", "| total_timesteps | 9615540 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 385 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8392 |\n", "| fps | 101 |\n", "| time_elapsed | 6100 |\n", "| total_timesteps | 9620962 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.486 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 390 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8396 |\n", "| fps | 101 |\n", "| time_elapsed | 6140 |\n", "| total_timesteps | 9625396 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.04 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 386 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8400 |\n", "| fps | 101 |\n", "| time_elapsed | 6186 |\n", "| total_timesteps | 9630478 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.19 |\n", "----------------------------------\n", "Eval num_timesteps=9650000, episode_reward=428.40 +/- 50.23\n", "Episode length: 1123.20 +/- 66.91\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.12e+03 |\n", "| mean_reward | 428 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0298 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 386 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8404 |\n", "| fps | 101 |\n", "| time_elapsed | 6494 |\n", "| total_timesteps | 9661102 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0309 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 387 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8408 |\n", "| fps | 101 |\n", "| time_elapsed | 6532 |\n", "| total_timesteps | 9665286 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0202 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 390 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8412 |\n", "| fps | 101 |\n", "| time_elapsed | 6576 |\n", "| total_timesteps | 9670028 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.022 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 384 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8416 |\n", "| fps | 101 |\n", "| time_elapsed | 6619 |\n", "| total_timesteps | 9674734 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0512 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 387 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8420 |\n", "| fps | 101 |\n", "| time_elapsed | 6659 |\n", "| total_timesteps | 9679180 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0287 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 379 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8424 |\n", "| fps | 102 |\n", "| time_elapsed | 6691 |\n", "| total_timesteps | 9682652 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0272 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 381 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8428 |\n", "| fps | 102 |\n", "| time_elapsed | 6737 |\n", "| total_timesteps | 9687710 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0299 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 376 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8432 |\n", "| fps | 102 |\n", "| time_elapsed | 6776 |\n", "| total_timesteps | 9692008 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 374 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8436 |\n", "| fps | 102 |\n", "| time_elapsed | 6824 |\n", "| total_timesteps | 9697244 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0303 |\n", "----------------------------------\n", "Eval num_timesteps=9700000, episode_reward=401.40 +/- 18.36\n", "Episode length: 1153.20 +/- 52.09\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.15e+03 |\n", "| mean_reward | 401 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 375 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8440 |\n", "| fps | 101 |\n", "| time_elapsed | 6903 |\n", "| total_timesteps | 9702496 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0234 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 372 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8444 |\n", "| fps | 101 |\n", "| time_elapsed | 6951 |\n", "| total_timesteps | 9707724 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.344 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 369 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8448 |\n", "| fps | 101 |\n", "| time_elapsed | 6999 |\n", "| total_timesteps | 9712984 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 370 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8452 |\n", "| fps | 101 |\n", "| time_elapsed | 7046 |\n", "| total_timesteps | 9718122 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 369 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8456 |\n", "| fps | 101 |\n", "| time_elapsed | 7097 |\n", "| total_timesteps | 9723266 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0966 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 366 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8460 |\n", "| fps | 101 |\n", "| time_elapsed | 7146 |\n", "| total_timesteps | 9727926 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.5 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 361 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8464 |\n", "| fps | 101 |\n", "| time_elapsed | 7191 |\n", "| total_timesteps | 9732784 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0392 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 359 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8468 |\n", "| fps | 101 |\n", "| time_elapsed | 7235 |\n", "| total_timesteps | 9737691 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 361 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8472 |\n", "| fps | 102 |\n", "| time_elapsed | 7284 |\n", "| total_timesteps | 9743031 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.03 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 358 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8476 |\n", "| fps | 102 |\n", "| time_elapsed | 7332 |\n", "| total_timesteps | 9748259 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "Eval num_timesteps=9750000, episode_reward=385.80 +/- 91.62\n", "Episode length: 1233.30 +/- 201.85\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 386 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.307 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 352 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8480 |\n", "| fps | 101 |\n", "| time_elapsed | 7412 |\n", "| total_timesteps | 9753419 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 351 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8484 |\n", "| fps | 101 |\n", "| time_elapsed | 7458 |\n", "| total_timesteps | 9758463 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0556 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 351 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8488 |\n", "| fps | 101 |\n", "| time_elapsed | 7497 |\n", "| total_timesteps | 9762737 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.51 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 350 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8492 |\n", "| fps | 101 |\n", "| time_elapsed | 7549 |\n", "| total_timesteps | 9768361 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.934 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 345 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8496 |\n", "| fps | 101 |\n", "| time_elapsed | 7589 |\n", "| total_timesteps | 9772809 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0429 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 340 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8500 |\n", "| fps | 101 |\n", "| time_elapsed | 7627 |\n", "| total_timesteps | 9776901 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0974 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 337 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8504 |\n", "| fps | 101 |\n", "| time_elapsed | 7670 |\n", "| total_timesteps | 9781633 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0967 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 341 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8508 |\n", "| fps | 101 |\n", "| time_elapsed | 7711 |\n", "| total_timesteps | 9786105 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0378 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 338 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8512 |\n", "| fps | 101 |\n", "| time_elapsed | 7755 |\n", "| total_timesteps | 9791013 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 340 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8516 |\n", "| fps | 102 |\n", "| time_elapsed | 7794 |\n", "| total_timesteps | 9795275 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "Eval num_timesteps=9800000, episode_reward=202.50 +/- 123.13\n", "Episode length: 1101.00 +/- 191.17\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.1e+03 |\n", "| mean_reward | 202 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 345 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8520 |\n", "| fps | 101 |\n", "| time_elapsed | 7869 |\n", "| total_timesteps | 9800267 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0294 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8524 |\n", "| fps | 101 |\n", "| time_elapsed | 7912 |\n", "| total_timesteps | 9805033 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.65 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 353 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8528 |\n", "| fps | 101 |\n", "| time_elapsed | 7958 |\n", "| total_timesteps | 9810089 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0938 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 360 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8532 |\n", "| fps | 101 |\n", "| time_elapsed | 8002 |\n", "| total_timesteps | 9814893 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0681 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8536 |\n", "| fps | 101 |\n", "| time_elapsed | 8047 |\n", "| total_timesteps | 9819855 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0527 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 362 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8540 |\n", "| fps | 101 |\n", "| time_elapsed | 8088 |\n", "| total_timesteps | 9824375 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0506 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 365 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8544 |\n", "| fps | 101 |\n", "| time_elapsed | 8132 |\n", "| total_timesteps | 9829183 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.133 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 354 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8548 |\n", "| fps | 101 |\n", "| time_elapsed | 8169 |\n", "| total_timesteps | 9833209 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 354 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8552 |\n", "| fps | 102 |\n", "| time_elapsed | 8211 |\n", "| total_timesteps | 9837913 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.697 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8556 |\n", "| fps | 102 |\n", "| time_elapsed | 8254 |\n", "| total_timesteps | 9842659 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8560 |\n", "| fps | 102 |\n", "| time_elapsed | 8297 |\n", "| total_timesteps | 9847339 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0726 |\n", "----------------------------------\n", "Eval num_timesteps=9850000, episode_reward=331.30 +/- 74.27\n", "Episode length: 1130.20 +/- 84.57\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.13e+03 |\n", "| mean_reward | 331 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 353 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8564 |\n", "| fps | 102 |\n", "| time_elapsed | 8599 |\n", "| total_timesteps | 9877405 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0504 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 350 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8568 |\n", "| fps | 102 |\n", "| time_elapsed | 8640 |\n", "| total_timesteps | 9881879 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 345 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8572 |\n", "| fps | 102 |\n", "| time_elapsed | 8681 |\n", "| total_timesteps | 9886377 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.034 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 346 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8576 |\n", "| fps | 102 |\n", "| time_elapsed | 8723 |\n", "| total_timesteps | 9890985 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0164 |\n", "----------------------------------\n", "Eval num_timesteps=9900000, episode_reward=255.70 +/- 113.39\n", "Episode length: 1180.80 +/- 110.72\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 256 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0346 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 341 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8580 |\n", "| fps | 102 |\n", "| time_elapsed | 9025 |\n", "| total_timesteps | 9920767 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.034 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 341 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8584 |\n", "| fps | 102 |\n", "| time_elapsed | 9068 |\n", "| total_timesteps | 9925489 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.00654 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 336 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8588 |\n", "| fps | 102 |\n", "| time_elapsed | 9104 |\n", "| total_timesteps | 9929447 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0484 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 336 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8592 |\n", "| fps | 102 |\n", "| time_elapsed | 9149 |\n", "| total_timesteps | 9934359 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 335 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8596 |\n", "| fps | 102 |\n", "| time_elapsed | 9195 |\n", "| total_timesteps | 9939493 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0551 |\n", "----------------------------------\n", "Eval num_timesteps=9950000, episode_reward=242.30 +/- 103.70\n", "Episode length: 1180.20 +/- 191.91\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 242 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 9950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0486 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.93e+03 |\n", "| ep_rew_mean | 333 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8600 |\n", "| fps | 102 |\n", "| time_elapsed | 9496 |\n", "| total_timesteps | 9969649 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.148 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.92e+03 |\n", "| ep_rew_mean | 330 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8604 |\n", "| fps | 102 |\n", "| time_elapsed | 9539 |\n", "| total_timesteps | 9973931 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0871 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.93e+03 |\n", "| ep_rew_mean | 323 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8608 |\n", "| fps | 102 |\n", "| time_elapsed | 9584 |\n", "| total_timesteps | 9979213 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0482 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.93e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8612 |\n", "| fps | 102 |\n", "| time_elapsed | 9625 |\n", "| total_timesteps | 9983891 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.039 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.94e+03 |\n", "| ep_rew_mean | 323 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8616 |\n", "| fps | 102 |\n", "| time_elapsed | 9669 |\n", "| total_timesteps | 9988869 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0397 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.93e+03 |\n", "| ep_rew_mean | 322 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8620 |\n", "| fps | 102 |\n", "| time_elapsed | 9711 |\n", "| total_timesteps | 9993591 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0503 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.93e+03 |\n", "| ep_rew_mean | 321 |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| episodes | 8624 |\n", "| fps | 102 |\n", "| time_elapsed | 9746 |\n", "| total_timesteps | 9997559 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.058 |\n", "----------------------------------\n", "Eval num_timesteps=10000000, episode_reward=486.70 +/- 88.09\n", "Episode length: 1257.20 +/- 110.33\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.26e+03 |\n", "| mean_reward | 487 |\n", "| rollout/ | |\n", "| exploration_rate | 0.005 |\n", "| time/ | |\n", "| total_timesteps | 10000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0183 |\n", "----------------------------------\n", "New best mean reward!\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "ab23b2ba", "metadata": { "execution": { "iopub.execute_input": "2024-05-17T19:10:49.849747Z", "iopub.status.busy": "2024-05-17T19:10:49.849028Z", "iopub.status.idle": "2024-05-17T19:11:41.754166Z", "shell.execute_reply": "2024-05-17T19:11:41.752887Z" }, "papermill": { "duration": 51.934116, "end_time": "2024-05-17T19:11:41.757228", "exception": false, "start_time": "2024-05-17T19:10:49.823112", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 178008473, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 10027.263775, "end_time": "2024-05-17T19:11:44.715163", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-17T16:24:37.451388", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }