{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "bcbb5dc1", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:10:21.253933Z", "iopub.status.busy": "2024-05-10T15:10:21.253021Z", "iopub.status.idle": "2024-05-10T15:11:29.370571Z", "shell.execute_reply": "2024-05-10T15:11:29.369129Z" }, "papermill": { "duration": 68.126717, "end_time": "2024-05-10T15:11:29.373250", "exception": false, "start_time": "2024-05-10T15:10:21.246533", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "85e8fda5", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:11:29.383384Z", "iopub.status.busy": "2024-05-10T15:11:29.382481Z", "iopub.status.idle": "2024-05-10T15:11:48.651152Z", "shell.execute_reply": "2024-05-10T15:11:48.650323Z" }, "papermill": { "duration": 19.27607, "end_time": "2024-05-10T15:11:48.653573", "exception": false, "start_time": "2024-05-10T15:11:29.377503", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-10 15:11:37.339761: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-10 15:11:37.339870: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-10 15:11:37.498809: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-1\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-1\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 75_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "VIDEO_CALLBACK_FREQ = 375_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_500_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "BUFFER_SIZE = 60_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.0001\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.1\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "b6d4bc46", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:11:48.662719Z", "iopub.status.busy": "2024-05-10T15:11:48.662146Z", "iopub.status.idle": "2024-05-10T15:11:48.673142Z", "shell.execute_reply": "2024-05-10T15:11:48.672279Z" }, "papermill": { "duration": 0.017731, "end_time": "2024-05-10T15:11:48.675177", "exception": false, "start_time": "2024-05-10T15:11:48.657446", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "987c802a", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:11:48.683977Z", "iopub.status.busy": "2024-05-10T15:11:48.683174Z", "iopub.status.idle": "2024-05-10T15:11:48.695105Z", "shell.execute_reply": "2024-05-10T15:11:48.694277Z" }, "papermill": { "duration": 0.018306, "end_time": "2024-05-10T15:11:48.697069", "exception": false, "start_time": "2024-05-10T15:11:48.678763", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "f0f150d5", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:11:48.705452Z", "iopub.status.busy": "2024-05-10T15:11:48.705211Z", "iopub.status.idle": "2024-05-10T15:11:48.720403Z", "shell.execute_reply": "2024-05-10T15:11:48.719558Z" }, "papermill": { "duration": 0.021697, "end_time": "2024-05-10T15:11:48.722247", "exception": false, "start_time": "2024-05-10T15:11:48.700550", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "cd33d80b", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:11:48.730243Z", "iopub.status.busy": "2024-05-10T15:11:48.729955Z", "iopub.status.idle": "2024-05-10T15:13:06.576670Z", "shell.execute_reply": "2024-05-10T15:13:06.575799Z" }, "papermill": { "duration": 77.853531, "end_time": "2024-05-10T15:13:06.579274", "exception": false, "start_time": "2024-05-10T15:11:48.725743", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2/dqn_replay_buffer_pacman_v2\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "a1794659", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:13:06.588856Z", "iopub.status.busy": "2024-05-10T15:13:06.588479Z", "iopub.status.idle": "2024-05-10T15:13:06.594899Z", "shell.execute_reply": "2024-05-10T15:13:06.594143Z" }, "papermill": { "duration": 0.013233, "end_time": "2024-05-10T15:13:06.596818", "exception": false, "start_time": "2024-05-10T15:13:06.583585", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "445f4c1f", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T15:13:06.605773Z", "iopub.status.busy": "2024-05-10T15:13:06.605451Z", "iopub.status.idle": "2024-05-10T18:52:26.024853Z", "shell.execute_reply": "2024-05-10T18:52:26.023822Z" }, "papermill": { "duration": 13159.426534, "end_time": "2024-05-10T18:52:26.027095", "exception": false, "start_time": "2024-05-10T15:13:06.600561", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 73.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2116 |\n", "| fps | 110 |\n", "| time_elapsed | 269 |\n", "| total_timesteps | 1529688 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0733 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 74.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2120 |\n", "| fps | 109 |\n", "| time_elapsed | 305 |\n", "| total_timesteps | 1533544 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 73.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2124 |\n", "| fps | 109 |\n", "| time_elapsed | 335 |\n", "| total_timesteps | 1536756 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 73.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2128 |\n", "| fps | 109 |\n", "| time_elapsed | 373 |\n", "| total_timesteps | 1540990 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.051 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 72.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2132 |\n", "| fps | 109 |\n", "| time_elapsed | 410 |\n", "| total_timesteps | 1545052 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.128 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 72.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2136 |\n", "| fps | 109 |\n", "| time_elapsed | 445 |\n", "| total_timesteps | 1548854 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.056 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 72.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2140 |\n", "| fps | 109 |\n", "| time_elapsed | 472 |\n", "| total_timesteps | 1551816 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0909 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 73.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2144 |\n", "| fps | 109 |\n", "| time_elapsed | 514 |\n", "| total_timesteps | 1556378 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 73.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2148 |\n", "| fps | 109 |\n", "| time_elapsed | 538 |\n", "| total_timesteps | 1558980 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 73.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2152 |\n", "| fps | 109 |\n", "| time_elapsed | 570 |\n", "| total_timesteps | 1562566 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.129 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 72.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2156 |\n", "| fps | 109 |\n", "| time_elapsed | 605 |\n", "| total_timesteps | 1566310 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0751 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 74.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2160 |\n", "| fps | 109 |\n", "| time_elapsed | 637 |\n", "| total_timesteps | 1569848 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.074 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 75.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2164 |\n", "| fps | 109 |\n", "| time_elapsed | 668 |\n", "| total_timesteps | 1573236 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.38 |\n", "----------------------------------\n", "Eval num_timesteps=1575000, episode_reward=27.70 +/- 4.00\n", "Episode length: 573.00 +/- 137.24\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 573 |\n", "| mean_reward | 27.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1575000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0598 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 75.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2168 |\n", "| fps | 107 |\n", "| time_elapsed | 710 |\n", "| total_timesteps | 1576208 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 77.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2172 |\n", "| fps | 107 |\n", "| time_elapsed | 743 |\n", "| total_timesteps | 1579994 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 78.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2176 |\n", "| fps | 107 |\n", "| time_elapsed | 773 |\n", "| total_timesteps | 1583382 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0804 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 78.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2180 |\n", "| fps | 107 |\n", "| time_elapsed | 800 |\n", "| total_timesteps | 1586400 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.463 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 77.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2184 |\n", "| fps | 108 |\n", "| time_elapsed | 835 |\n", "| total_timesteps | 1590378 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.488 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 78.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2188 |\n", "| fps | 108 |\n", "| time_elapsed | 871 |\n", "| total_timesteps | 1594436 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 78.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2192 |\n", "| fps | 108 |\n", "| time_elapsed | 907 |\n", "| total_timesteps | 1598492 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.2 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 78.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2196 |\n", "| fps | 108 |\n", "| time_elapsed | 937 |\n", "| total_timesteps | 1601968 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.435 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 80.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2200 |\n", "| fps | 108 |\n", "| time_elapsed | 975 |\n", "| total_timesteps | 1606196 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 80 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2204 |\n", "| fps | 109 |\n", "| time_elapsed | 1000 |\n", "| total_timesteps | 1609113 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 81.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2208 |\n", "| fps | 109 |\n", "| time_elapsed | 1026 |\n", "| total_timesteps | 1612045 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 77.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2212 |\n", "| fps | 109 |\n", "| time_elapsed | 1054 |\n", "| total_timesteps | 1615244 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.173 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 889 |\n", "| ep_rew_mean | 76.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2216 |\n", "| fps | 109 |\n", "| time_elapsed | 1083 |\n", "| total_timesteps | 1618558 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0729 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 877 |\n", "| ep_rew_mean | 75.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2220 |\n", "| fps | 109 |\n", "| time_elapsed | 1107 |\n", "| total_timesteps | 1621258 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 873 |\n", "| ep_rew_mean | 74 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2224 |\n", "| fps | 109 |\n", "| time_elapsed | 1132 |\n", "| total_timesteps | 1624062 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.922 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 875 |\n", "| ep_rew_mean | 74.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2228 |\n", "| fps | 109 |\n", "| time_elapsed | 1170 |\n", "| total_timesteps | 1628452 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.259 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 871 |\n", "| ep_rew_mean | 75.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2232 |\n", "| fps | 109 |\n", "| time_elapsed | 1203 |\n", "| total_timesteps | 1632154 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0711 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 75.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2236 |\n", "| fps | 109 |\n", "| time_elapsed | 1235 |\n", "| total_timesteps | 1635840 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.33 |\n", "----------------------------------\n", "Eval num_timesteps=1650000, episode_reward=57.60 +/- 19.90\n", "Episode length: 982.80 +/- 159.44\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 983 |\n", "| mean_reward | 57.6 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1650000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.07 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 79.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2240 |\n", "| fps | 108 |\n", "| time_elapsed | 1519 |\n", "| total_timesteps | 1665365 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.553 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 77.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2244 |\n", "| fps | 108 |\n", "| time_elapsed | 1545 |\n", "| total_timesteps | 1668277 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 78.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2248 |\n", "| fps | 108 |\n", "| time_elapsed | 1574 |\n", "| total_timesteps | 1671621 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 78.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2252 |\n", "| fps | 109 |\n", "| time_elapsed | 1604 |\n", "| total_timesteps | 1675063 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0825 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 79.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2256 |\n", "| fps | 109 |\n", "| time_elapsed | 1641 |\n", "| total_timesteps | 1679193 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 77.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2260 |\n", "| fps | 109 |\n", "| time_elapsed | 1675 |\n", "| total_timesteps | 1683035 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0639 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 79.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2264 |\n", "| fps | 109 |\n", "| time_elapsed | 1943 |\n", "| total_timesteps | 1713721 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.288 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 76.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2268 |\n", "| fps | 110 |\n", "| time_elapsed | 1972 |\n", "| total_timesteps | 1717088 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0813 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 74.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2272 |\n", "| fps | 110 |\n", "| time_elapsed | 1999 |\n", "| total_timesteps | 1720051 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0911 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 73.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2276 |\n", "| fps | 110 |\n", "| time_elapsed | 2031 |\n", "| total_timesteps | 1723715 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.04 |\n", "----------------------------------\n", "Eval num_timesteps=1725000, episode_reward=78.00 +/- 18.33\n", "Episode length: 660.20 +/- 18.21\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 660 |\n", "| mean_reward | 78 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1725000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.052 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 73.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2280 |\n", "| fps | 109 |\n", "| time_elapsed | 2078 |\n", "| total_timesteps | 1726999 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0564 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 73.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2284 |\n", "| fps | 109 |\n", "| time_elapsed | 2108 |\n", "| total_timesteps | 1730283 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.14 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 72.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2288 |\n", "| fps | 109 |\n", "| time_elapsed | 2144 |\n", "| total_timesteps | 1734143 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.083 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 75.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2292 |\n", "| fps | 109 |\n", "| time_elapsed | 2180 |\n", "| total_timesteps | 1738201 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.254 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 76.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2296 |\n", "| fps | 109 |\n", "| time_elapsed | 2221 |\n", "| total_timesteps | 1742724 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.097 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 75.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2300 |\n", "| fps | 109 |\n", "| time_elapsed | 2260 |\n", "| total_timesteps | 1747084 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 77 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2304 |\n", "| fps | 109 |\n", "| time_elapsed | 2300 |\n", "| total_timesteps | 1751644 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.321 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 77.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2308 |\n", "| fps | 109 |\n", "| time_elapsed | 2333 |\n", "| total_timesteps | 1755426 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0561 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.65e+03 |\n", "| ep_rew_mean | 79 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2312 |\n", "| fps | 109 |\n", "| time_elapsed | 2546 |\n", "| total_timesteps | 1779780 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0872 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 78.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2316 |\n", "| fps | 109 |\n", "| time_elapsed | 2585 |\n", "| total_timesteps | 1784278 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0963 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.68e+03 |\n", "| ep_rew_mean | 79.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2320 |\n", "| fps | 110 |\n", "| time_elapsed | 2630 |\n", "| total_timesteps | 1789448 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0745 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 82.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2324 |\n", "| fps | 110 |\n", "| time_elapsed | 2682 |\n", "| total_timesteps | 1795342 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.294 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 82.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2328 |\n", "| fps | 110 |\n", "| time_elapsed | 2719 |\n", "| total_timesteps | 1799538 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "Eval num_timesteps=1800000, episode_reward=31.20 +/- 8.29\n", "Episode length: 647.30 +/- 68.78\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 647 |\n", "| mean_reward | 31.2 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1800000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.584 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 81.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2332 |\n", "| fps | 109 |\n", "| time_elapsed | 2769 |\n", "| total_timesteps | 1803396 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0925 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.72e+03 |\n", "| ep_rew_mean | 82 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2336 |\n", "| fps | 109 |\n", "| time_elapsed | 2811 |\n", "| total_timesteps | 1808158 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.204 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 80.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2340 |\n", "| fps | 109 |\n", "| time_elapsed | 2850 |\n", "| total_timesteps | 1812619 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.204 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.49e+03 |\n", "| ep_rew_mean | 81 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2344 |\n", "| fps | 109 |\n", "| time_elapsed | 2888 |\n", "| total_timesteps | 1816859 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.051 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 80.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2348 |\n", "| fps | 109 |\n", "| time_elapsed | 2932 |\n", "| total_timesteps | 1821905 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 82.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2352 |\n", "| fps | 109 |\n", "| time_elapsed | 2972 |\n", "| total_timesteps | 1826335 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.299 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 83.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2356 |\n", "| fps | 109 |\n", "| time_elapsed | 3018 |\n", "| total_timesteps | 1831526 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.53e+03 |\n", "| ep_rew_mean | 85.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2360 |\n", "| fps | 109 |\n", "| time_elapsed | 3057 |\n", "| total_timesteps | 1835934 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0976 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 85.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2364 |\n", "| fps | 109 |\n", "| time_elapsed | 3088 |\n", "| total_timesteps | 1839527 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0894 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 86.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2368 |\n", "| fps | 109 |\n", "| time_elapsed | 3123 |\n", "| total_timesteps | 1843411 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 88 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2372 |\n", "| fps | 109 |\n", "| time_elapsed | 3159 |\n", "| total_timesteps | 1847485 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.246 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 88 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2376 |\n", "| fps | 110 |\n", "| time_elapsed | 3188 |\n", "| total_timesteps | 1850795 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 98.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2380 |\n", "| fps | 110 |\n", "| time_elapsed | 3228 |\n", "| total_timesteps | 1855249 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.264 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 98.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2384 |\n", "| fps | 110 |\n", "| time_elapsed | 3261 |\n", "| total_timesteps | 1858928 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.219 |\n", "----------------------------------\n", "Eval num_timesteps=1875000, episode_reward=232.80 +/- 108.99\n", "Episode length: 1205.50 +/- 140.97\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.21e+03 |\n", "| mean_reward | 233 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1875000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "New best mean reward!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.54e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2388 |\n", "| fps | 109 |\n", "| time_elapsed | 3539 |\n", "| total_timesteps | 1887868 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.405 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.54e+03 |\n", "| ep_rew_mean | 105 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2392 |\n", "| fps | 109 |\n", "| time_elapsed | 3584 |\n", "| total_timesteps | 1892284 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.53e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2396 |\n", "| fps | 109 |\n", "| time_elapsed | 3610 |\n", "| total_timesteps | 1895576 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 1.34 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 106 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2400 |\n", "| fps | 109 |\n", "| time_elapsed | 3635 |\n", "| total_timesteps | 1898576 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.183 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2404 |\n", "| fps | 109 |\n", "| time_elapsed | 3660 |\n", "| total_timesteps | 1901698 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 106 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2408 |\n", "| fps | 109 |\n", "| time_elapsed | 3695 |\n", "| total_timesteps | 1905910 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2412 |\n", "| fps | 109 |\n", "| time_elapsed | 3729 |\n", "| total_timesteps | 1910050 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0857 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 108 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2416 |\n", "| fps | 110 |\n", "| time_elapsed | 3753 |\n", "| total_timesteps | 1913040 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 109 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2420 |\n", "| fps | 110 |\n", "| time_elapsed | 3786 |\n", "| total_timesteps | 1917033 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.202 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2424 |\n", "| fps | 110 |\n", "| time_elapsed | 3820 |\n", "| total_timesteps | 1921282 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0512 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 113 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2428 |\n", "| fps | 110 |\n", "| time_elapsed | 3847 |\n", "| total_timesteps | 1924556 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0878 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2432 |\n", "| fps | 110 |\n", "| time_elapsed | 3881 |\n", "| total_timesteps | 1928810 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2436 |\n", "| fps | 110 |\n", "| time_elapsed | 3916 |\n", "| total_timesteps | 1933102 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0718 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2440 |\n", "| fps | 110 |\n", "| time_elapsed | 3950 |\n", "| total_timesteps | 1937250 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.148 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2444 |\n", "| fps | 110 |\n", "| time_elapsed | 3979 |\n", "| total_timesteps | 1940735 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.148 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2448 |\n", "| fps | 110 |\n", "| time_elapsed | 4014 |\n", "| total_timesteps | 1945079 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2452 |\n", "| fps | 110 |\n", "| time_elapsed | 4046 |\n", "| total_timesteps | 1949001 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "Eval num_timesteps=1950000, episode_reward=96.70 +/- 50.13\n", "Episode length: 1054.20 +/- 163.53\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.05e+03 |\n", "| mean_reward | 96.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1950000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.179 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2456 |\n", "| fps | 110 |\n", "| time_elapsed | 4104 |\n", "| total_timesteps | 1952959 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2460 |\n", "| fps | 110 |\n", "| time_elapsed | 4140 |\n", "| total_timesteps | 1957355 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.119 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2464 |\n", "| fps | 110 |\n", "| time_elapsed | 4179 |\n", "| total_timesteps | 1962117 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.313 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2468 |\n", "| fps | 110 |\n", "| time_elapsed | 4214 |\n", "| total_timesteps | 1966521 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.201 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2472 |\n", "| fps | 110 |\n", "| time_elapsed | 4247 |\n", "| total_timesteps | 1970503 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.847 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 125 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2476 |\n", "| fps | 110 |\n", "| time_elapsed | 4280 |\n", "| total_timesteps | 1974628 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2480 |\n", "| fps | 110 |\n", "| time_elapsed | 4314 |\n", "| total_timesteps | 1978732 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.382 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2484 |\n", "| fps | 111 |\n", "| time_elapsed | 4346 |\n", "| total_timesteps | 1982742 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.166 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 987 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2488 |\n", "| fps | 111 |\n", "| time_elapsed | 4377 |\n", "| total_timesteps | 1986524 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 980 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2492 |\n", "| fps | 111 |\n", "| time_elapsed | 4407 |\n", "| total_timesteps | 1990239 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.213 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 986 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2496 |\n", "| fps | 111 |\n", "| time_elapsed | 4439 |\n", "| total_timesteps | 1994197 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.234 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 992 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2500 |\n", "| fps | 111 |\n", "| time_elapsed | 4469 |\n", "| total_timesteps | 1997819 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "Eval num_timesteps=2025000, episode_reward=104.90 +/- 93.00\n", "Episode length: 3734.00 +/- 7755.05\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.73e+03 |\n", "| mean_reward | 105 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2025000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0977 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2504 |\n", "| fps | 109 |\n", "| time_elapsed | 4797 |\n", "| total_timesteps | 2027263 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2508 |\n", "| fps | 109 |\n", "| time_elapsed | 4836 |\n", "| total_timesteps | 2032015 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2512 |\n", "| fps | 110 |\n", "| time_elapsed | 4874 |\n", "| total_timesteps | 2036533 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.166 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2516 |\n", "| fps | 110 |\n", "| time_elapsed | 4904 |\n", "| total_timesteps | 2040286 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.415 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2520 |\n", "| fps | 110 |\n", "| time_elapsed | 4947 |\n", "| total_timesteps | 2045560 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 113 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2524 |\n", "| fps | 110 |\n", "| time_elapsed | 4984 |\n", "| total_timesteps | 2049969 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.323 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2528 |\n", "| fps | 110 |\n", "| time_elapsed | 5134 |\n", "| total_timesteps | 2068479 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.492 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2532 |\n", "| fps | 110 |\n", "| time_elapsed | 5165 |\n", "| total_timesteps | 2072244 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2536 |\n", "| fps | 110 |\n", "| time_elapsed | 5199 |\n", "| total_timesteps | 2076360 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0936 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2540 |\n", "| fps | 110 |\n", "| time_elapsed | 5226 |\n", "| total_timesteps | 2079772 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.184 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2544 |\n", "| fps | 110 |\n", "| time_elapsed | 5260 |\n", "| total_timesteps | 2083933 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2548 |\n", "| fps | 111 |\n", "| time_elapsed | 5298 |\n", "| total_timesteps | 2088542 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2552 |\n", "| fps | 111 |\n", "| time_elapsed | 5330 |\n", "| total_timesteps | 2092578 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2556 |\n", "| fps | 111 |\n", "| time_elapsed | 5368 |\n", "| total_timesteps | 2097186 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "Eval num_timesteps=2100000, episode_reward=65.90 +/- 23.57\n", "Episode length: 928.60 +/- 189.96\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 929 |\n", "| mean_reward | 65.9 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2100000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.299 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2560 |\n", "| fps | 110 |\n", "| time_elapsed | 5428 |\n", "| total_timesteps | 2101664 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.179 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2564 |\n", "| fps | 110 |\n", "| time_elapsed | 5459 |\n", "| total_timesteps | 2105439 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2568 |\n", "| fps | 110 |\n", "| time_elapsed | 5493 |\n", "| total_timesteps | 2109551 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2572 |\n", "| fps | 111 |\n", "| time_elapsed | 5525 |\n", "| total_timesteps | 2113455 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2576 |\n", "| fps | 111 |\n", "| time_elapsed | 5560 |\n", "| total_timesteps | 2117757 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2580 |\n", "| fps | 111 |\n", "| time_elapsed | 5586 |\n", "| total_timesteps | 2120869 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2584 |\n", "| fps | 111 |\n", "| time_elapsed | 5622 |\n", "| total_timesteps | 2125261 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 106 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2588 |\n", "| fps | 111 |\n", "| time_elapsed | 5655 |\n", "| total_timesteps | 2129277 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.342 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2592 |\n", "| fps | 111 |\n", "| time_elapsed | 5686 |\n", "| total_timesteps | 2133049 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.182 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2596 |\n", "| fps | 111 |\n", "| time_elapsed | 5717 |\n", "| total_timesteps | 2136833 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 105 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2600 |\n", "| fps | 111 |\n", "| time_elapsed | 5958 |\n", "| total_timesteps | 2166563 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.274 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 103 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2604 |\n", "| fps | 111 |\n", "| time_elapsed | 5991 |\n", "| total_timesteps | 2170589 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "Eval num_timesteps=2175000, episode_reward=90.00 +/- 23.43\n", "Episode length: 971.10 +/- 96.26\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 971 |\n", "| mean_reward | 90 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2175000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.716 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 103 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2608 |\n", "| fps | 111 |\n", "| time_elapsed | 6054 |\n", "| total_timesteps | 2175396 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2612 |\n", "| fps | 111 |\n", "| time_elapsed | 6088 |\n", "| total_timesteps | 2179507 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 1.33 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2616 |\n", "| fps | 111 |\n", "| time_elapsed | 6122 |\n", "| total_timesteps | 2183725 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.128 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2620 |\n", "| fps | 111 |\n", "| time_elapsed | 6158 |\n", "| total_timesteps | 2188091 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.211 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 101 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2624 |\n", "| fps | 111 |\n", "| time_elapsed | 6187 |\n", "| total_timesteps | 2191679 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.228 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 99.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2628 |\n", "| fps | 111 |\n", "| time_elapsed | 6223 |\n", "| total_timesteps | 2196108 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0756 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 103 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2632 |\n", "| fps | 111 |\n", "| time_elapsed | 6262 |\n", "| total_timesteps | 2200873 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0813 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 100 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2636 |\n", "| fps | 111 |\n", "| time_elapsed | 6300 |\n", "| total_timesteps | 2205555 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 101 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2640 |\n", "| fps | 112 |\n", "| time_elapsed | 6333 |\n", "| total_timesteps | 2209557 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2644 |\n", "| fps | 112 |\n", "| time_elapsed | 6374 |\n", "| total_timesteps | 2214573 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 105 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2648 |\n", "| fps | 112 |\n", "| time_elapsed | 6410 |\n", "| total_timesteps | 2218955 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 1.37 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 106 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2652 |\n", "| fps | 112 |\n", "| time_elapsed | 6444 |\n", "| total_timesteps | 2223189 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.236 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 103 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2656 |\n", "| fps | 112 |\n", "| time_elapsed | 6476 |\n", "| total_timesteps | 2226995 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.278 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 105 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2660 |\n", "| fps | 112 |\n", "| time_elapsed | 6510 |\n", "| total_timesteps | 2231143 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.193 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2664 |\n", "| fps | 112 |\n", "| time_elapsed | 6548 |\n", "| total_timesteps | 2235685 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.331 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2668 |\n", "| fps | 112 |\n", "| time_elapsed | 6579 |\n", "| total_timesteps | 2239515 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0891 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2672 |\n", "| fps | 112 |\n", "| time_elapsed | 6609 |\n", "| total_timesteps | 2243143 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.438 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2676 |\n", "| fps | 112 |\n", "| time_elapsed | 6636 |\n", "| total_timesteps | 2246528 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.161 |\n", "----------------------------------\n", "Eval num_timesteps=2250000, episode_reward=83.20 +/- 41.68\n", "Episode length: 934.70 +/- 360.78\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 935 |\n", "| mean_reward | 83.2 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2250000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.415 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 108 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2680 |\n", "| fps | 112 |\n", "| time_elapsed | 6697 |\n", "| total_timesteps | 2250718 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.153 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2684 |\n", "| fps | 112 |\n", "| time_elapsed | 6737 |\n", "| total_timesteps | 2254580 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.331 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2688 |\n", "| fps | 112 |\n", "| time_elapsed | 6776 |\n", "| total_timesteps | 2259250 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.381 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2692 |\n", "| fps | 112 |\n", "| time_elapsed | 6823 |\n", "| total_timesteps | 2264876 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.14 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2696 |\n", "| fps | 112 |\n", "| time_elapsed | 6851 |\n", "| total_timesteps | 2268225 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.06e+03 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2700 |\n", "| fps | 112 |\n", "| time_elapsed | 6885 |\n", "| total_timesteps | 2272286 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.292 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.05e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2704 |\n", "| fps | 112 |\n", "| time_elapsed | 6915 |\n", "| total_timesteps | 2275906 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.05e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2708 |\n", "| fps | 112 |\n", "| time_elapsed | 6950 |\n", "| total_timesteps | 2280108 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.307 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.06e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2712 |\n", "| fps | 112 |\n", "| time_elapsed | 6992 |\n", "| total_timesteps | 2285296 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.168 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.06e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2716 |\n", "| fps | 112 |\n", "| time_elapsed | 7026 |\n", "| total_timesteps | 2289326 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.132 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.05e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2720 |\n", "| fps | 112 |\n", "| time_elapsed | 7055 |\n", "| total_timesteps | 2292879 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.168 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.05e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2724 |\n", "| fps | 112 |\n", "| time_elapsed | 7083 |\n", "| total_timesteps | 2296303 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.218 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.04e+03 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2728 |\n", "| fps | 112 |\n", "| time_elapsed | 7113 |\n", "| total_timesteps | 2299873 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.256 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2732 |\n", "| fps | 112 |\n", "| time_elapsed | 7235 |\n", "| total_timesteps | 2314830 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 119 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2736 |\n", "| fps | 112 |\n", "| time_elapsed | 7268 |\n", "| total_timesteps | 2318840 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2740 |\n", "| fps | 112 |\n", "| time_elapsed | 7298 |\n", "| total_timesteps | 2322482 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.445 |\n", "----------------------------------\n", "Eval num_timesteps=2325000, episode_reward=305.30 +/- 99.30\n", "Episode length: 1262.70 +/- 327.18\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.26e+03 |\n", "| mean_reward | 305 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2325000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 122 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2744 |\n", "| fps | 112 |\n", "| time_elapsed | 7360 |\n", "| total_timesteps | 2326028 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.531 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 124 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2748 |\n", "| fps | 112 |\n", "| time_elapsed | 7398 |\n", "| total_timesteps | 2330636 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.294 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2752 |\n", "| fps | 112 |\n", "| time_elapsed | 7429 |\n", "| total_timesteps | 2334398 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.461 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 128 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2756 |\n", "| fps | 112 |\n", "| time_elapsed | 7475 |\n", "| total_timesteps | 2340018 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.191 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 128 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2760 |\n", "| fps | 112 |\n", "| time_elapsed | 7502 |\n", "| total_timesteps | 2343234 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.221 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 132 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2764 |\n", "| fps | 112 |\n", "| time_elapsed | 7538 |\n", "| total_timesteps | 2347684 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.208 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 133 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2768 |\n", "| fps | 112 |\n", "| time_elapsed | 7564 |\n", "| total_timesteps | 2350882 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 134 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2772 |\n", "| fps | 112 |\n", "| time_elapsed | 7598 |\n", "| total_timesteps | 2354930 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.235 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2776 |\n", "| fps | 112 |\n", "| time_elapsed | 7631 |\n", "| total_timesteps | 2358924 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.417 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 140 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2780 |\n", "| fps | 112 |\n", "| time_elapsed | 7658 |\n", "| total_timesteps | 2362218 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.527 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2784 |\n", "| fps | 112 |\n", "| time_elapsed | 7910 |\n", "| total_timesteps | 2392620 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2788 |\n", "| fps | 112 |\n", "| time_elapsed | 7941 |\n", "| total_timesteps | 2396434 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.083 |\n", "----------------------------------\n", "Eval num_timesteps=2400000, episode_reward=54.20 +/- 12.11\n", "Episode length: 990.80 +/- 137.43\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 991 |\n", "| mean_reward | 54.2 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2400000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 135 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2792 |\n", "| fps | 112 |\n", "| time_elapsed | 8051 |\n", "| total_timesteps | 2406608 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 134 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2796 |\n", "| fps | 112 |\n", "| time_elapsed | 8077 |\n", "| total_timesteps | 2409630 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 133 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2800 |\n", "| fps | 112 |\n", "| time_elapsed | 8102 |\n", "| total_timesteps | 2412736 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.143 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 130 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2804 |\n", "| fps | 112 |\n", "| time_elapsed | 8351 |\n", "| total_timesteps | 2442776 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 129 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2808 |\n", "| fps | 112 |\n", "| time_elapsed | 8377 |\n", "| total_timesteps | 2445921 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.254 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.64e+03 |\n", "| ep_rew_mean | 129 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2812 |\n", "| fps | 112 |\n", "| time_elapsed | 8407 |\n", "| total_timesteps | 2449477 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0954 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 131 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2816 |\n", "| fps | 112 |\n", "| time_elapsed | 8431 |\n", "| total_timesteps | 2452503 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0922 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2820 |\n", "| fps | 112 |\n", "| time_elapsed | 8457 |\n", "| total_timesteps | 2455555 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 125 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2824 |\n", "| fps | 113 |\n", "| time_elapsed | 8490 |\n", "| total_timesteps | 2459550 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.487 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 125 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2828 |\n", "| fps | 113 |\n", "| time_elapsed | 8518 |\n", "| total_timesteps | 2462816 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.644 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 121 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2832 |\n", "| fps | 113 |\n", "| time_elapsed | 8548 |\n", "| total_timesteps | 2466472 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0898 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 119 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2836 |\n", "| fps | 113 |\n", "| time_elapsed | 8579 |\n", "| total_timesteps | 2470208 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 120 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2840 |\n", "| fps | 113 |\n", "| time_elapsed | 8615 |\n", "| total_timesteps | 2474488 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.22 |\n", "----------------------------------\n", "Eval num_timesteps=2475000, episode_reward=47.70 +/- 11.45\n", "Episode length: 891.10 +/- 179.00\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 891 |\n", "| mean_reward | 47.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2475000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.62 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 118 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2844 |\n", "| fps | 112 |\n", "| time_elapsed | 8665 |\n", "| total_timesteps | 2477706 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.322 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 112 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2848 |\n", "| fps | 112 |\n", "| time_elapsed | 8701 |\n", "| total_timesteps | 2482046 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.438 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2852 |\n", "| fps | 112 |\n", "| time_elapsed | 8736 |\n", "| total_timesteps | 2486256 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.264 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 113 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2856 |\n", "| fps | 112 |\n", "| time_elapsed | 8767 |\n", "| total_timesteps | 2489904 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.603 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2860 |\n", "| fps | 112 |\n", "| time_elapsed | 8800 |\n", "| total_timesteps | 2493842 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.318 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 109 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2864 |\n", "| fps | 112 |\n", "| time_elapsed | 8830 |\n", "| total_timesteps | 2497532 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 106 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2868 |\n", "| fps | 112 |\n", "| time_elapsed | 8864 |\n", "| total_timesteps | 2501608 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2872 |\n", "| fps | 113 |\n", "| time_elapsed | 8892 |\n", "| total_timesteps | 2505026 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 100 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2876 |\n", "| fps | 113 |\n", "| time_elapsed | 8921 |\n", "| total_timesteps | 2508534 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.173 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 96.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2880 |\n", "| fps | 113 |\n", "| time_elapsed | 8962 |\n", "| total_timesteps | 2513468 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.283 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 97.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2884 |\n", "| fps | 113 |\n", "| time_elapsed | 8993 |\n", "| total_timesteps | 2517212 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 101 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2888 |\n", "| fps | 113 |\n", "| time_elapsed | 9023 |\n", "| total_timesteps | 2520891 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.286 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 104 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2892 |\n", "| fps | 113 |\n", "| time_elapsed | 9060 |\n", "| total_timesteps | 2525311 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.327 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2896 |\n", "| fps | 113 |\n", "| time_elapsed | 9092 |\n", "| total_timesteps | 2529133 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.889 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2900 |\n", "| fps | 113 |\n", "| time_elapsed | 9125 |\n", "| total_timesteps | 2533129 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 938 |\n", "| ep_rew_mean | 109 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2904 |\n", "| fps | 113 |\n", "| time_elapsed | 9153 |\n", "| total_timesteps | 2536545 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.829 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 939 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2908 |\n", "| fps | 113 |\n", "| time_elapsed | 9180 |\n", "| total_timesteps | 2539847 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 941 |\n", "| ep_rew_mean | 113 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2912 |\n", "| fps | 113 |\n", "| time_elapsed | 9211 |\n", "| total_timesteps | 2543617 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.201 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 947 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2916 |\n", "| fps | 113 |\n", "| time_elapsed | 9241 |\n", "| total_timesteps | 2547170 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 1.05 |\n", "----------------------------------\n", "Eval num_timesteps=2550000, episode_reward=106.70 +/- 33.63\n", "Episode length: 1119.60 +/- 192.95\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.12e+03 |\n", "| mean_reward | 107 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2550000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 957 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2920 |\n", "| fps | 112 |\n", "| time_elapsed | 9304 |\n", "| total_timesteps | 2551267 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0906 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 955 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2924 |\n", "| fps | 113 |\n", "| time_elapsed | 9335 |\n", "| total_timesteps | 2555003 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0936 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 966 |\n", "| ep_rew_mean | 120 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2928 |\n", "| fps | 113 |\n", "| time_elapsed | 9372 |\n", "| total_timesteps | 2559413 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.387 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 122 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2932 |\n", "| fps | 113 |\n", "| time_elapsed | 9621 |\n", "| total_timesteps | 2589673 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.522 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 124 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2936 |\n", "| fps | 113 |\n", "| time_elapsed | 9660 |\n", "| total_timesteps | 2594413 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 125 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2940 |\n", "| fps | 113 |\n", "| time_elapsed | 9694 |\n", "| total_timesteps | 2598507 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.544 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2944 |\n", "| fps | 113 |\n", "| time_elapsed | 9728 |\n", "| total_timesteps | 2602619 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 132 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2948 |\n", "| fps | 113 |\n", "| time_elapsed | 9765 |\n", "| total_timesteps | 2606993 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0889 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2952 |\n", "| fps | 113 |\n", "| time_elapsed | 9801 |\n", "| total_timesteps | 2611385 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.576 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 128 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2956 |\n", "| fps | 113 |\n", "| time_elapsed | 9831 |\n", "| total_timesteps | 2614981 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 132 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2960 |\n", "| fps | 113 |\n", "| time_elapsed | 9873 |\n", "| total_timesteps | 2620021 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0988 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 129 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2964 |\n", "| fps | 113 |\n", "| time_elapsed | 9911 |\n", "| total_timesteps | 2624588 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "Eval num_timesteps=2625000, episode_reward=93.50 +/- 35.58\n", "Episode length: 940.10 +/- 318.75\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 940 |\n", "| mean_reward | 93.5 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2625000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 134 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2968 |\n", "| fps | 113 |\n", "| time_elapsed | 9971 |\n", "| total_timesteps | 2628692 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0889 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 135 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2972 |\n", "| fps | 113 |\n", "| time_elapsed | 10005 |\n", "| total_timesteps | 2632422 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.207 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 140 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2976 |\n", "| fps | 113 |\n", "| time_elapsed | 10039 |\n", "| total_timesteps | 2636522 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.21 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 142 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2980 |\n", "| fps | 113 |\n", "| time_elapsed | 10066 |\n", "| total_timesteps | 2639897 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 141 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2984 |\n", "| fps | 113 |\n", "| time_elapsed | 10097 |\n", "| total_timesteps | 2643595 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.242 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 140 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2988 |\n", "| fps | 113 |\n", "| time_elapsed | 10129 |\n", "| total_timesteps | 2647413 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2992 |\n", "| fps | 113 |\n", "| time_elapsed | 10161 |\n", "| total_timesteps | 2651363 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.168 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2996 |\n", "| fps | 113 |\n", "| time_elapsed | 10193 |\n", "| total_timesteps | 2655145 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.398 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3000 |\n", "| fps | 113 |\n", "| time_elapsed | 10229 |\n", "| total_timesteps | 2659445 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3004 |\n", "| fps | 113 |\n", "| time_elapsed | 10255 |\n", "| total_timesteps | 2662645 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.177 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3008 |\n", "| fps | 113 |\n", "| time_elapsed | 10296 |\n", "| total_timesteps | 2667499 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.451 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 131 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3012 |\n", "| fps | 113 |\n", "| time_elapsed | 10323 |\n", "| total_timesteps | 2670713 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.236 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 130 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3016 |\n", "| fps | 113 |\n", "| time_elapsed | 10355 |\n", "| total_timesteps | 2674674 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.403 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 128 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3020 |\n", "| fps | 113 |\n", "| time_elapsed | 10388 |\n", "| total_timesteps | 2678622 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3024 |\n", "| fps | 113 |\n", "| time_elapsed | 10420 |\n", "| total_timesteps | 2682496 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 122 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3028 |\n", "| fps | 113 |\n", "| time_elapsed | 10449 |\n", "| total_timesteps | 2686093 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.477 |\n", "----------------------------------\n", "Eval num_timesteps=2700000, episode_reward=53.20 +/- 16.85\n", "Episode length: 870.60 +/- 140.11\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 871 |\n", "| mean_reward | 53.2 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2700000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0819 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 120 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3032 |\n", "| fps | 113 |\n", "| time_elapsed | 10721 |\n", "| total_timesteps | 2715823 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 117 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3036 |\n", "| fps | 113 |\n", "| time_elapsed | 10754 |\n", "| total_timesteps | 2719685 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0805 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 114 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3040 |\n", "| fps | 113 |\n", "| time_elapsed | 10789 |\n", "| total_timesteps | 2723889 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0945 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 110 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3044 |\n", "| fps | 113 |\n", "| time_elapsed | 10820 |\n", "| total_timesteps | 2727574 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3048 |\n", "| fps | 113 |\n", "| time_elapsed | 10853 |\n", "| total_timesteps | 2731522 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0568 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3052 |\n", "| fps | 113 |\n", "| time_elapsed | 10892 |\n", "| total_timesteps | 2736178 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 105 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3056 |\n", "| fps | 113 |\n", "| time_elapsed | 10919 |\n", "| total_timesteps | 2739339 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.507 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 99.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3060 |\n", "| fps | 113 |\n", "| time_elapsed | 10945 |\n", "| total_timesteps | 2742445 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 96.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3064 |\n", "| fps | 113 |\n", "| time_elapsed | 10970 |\n", "| total_timesteps | 2745405 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 89.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3068 |\n", "| fps | 113 |\n", "| time_elapsed | 11000 |\n", "| total_timesteps | 2748926 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0796 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 89.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3072 |\n", "| fps | 113 |\n", "| time_elapsed | 11035 |\n", "| total_timesteps | 2753169 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 83.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3076 |\n", "| fps | 113 |\n", "| time_elapsed | 11068 |\n", "| total_timesteps | 2757015 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 81.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3080 |\n", "| fps | 113 |\n", "| time_elapsed | 11105 |\n", "| total_timesteps | 2761456 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.48 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 80.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3084 |\n", "| fps | 113 |\n", "| time_elapsed | 11138 |\n", "| total_timesteps | 2765362 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.365 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 77.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3088 |\n", "| fps | 113 |\n", "| time_elapsed | 11172 |\n", "| total_timesteps | 2769450 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.491 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 77.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3092 |\n", "| fps | 113 |\n", "| time_elapsed | 11203 |\n", "| total_timesteps | 2773110 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.227 |\n", "----------------------------------\n", "Eval num_timesteps=2775000, episode_reward=166.40 +/- 98.03\n", "Episode length: 954.80 +/- 101.49\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 955 |\n", "| mean_reward | 166 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2775000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.311 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 74.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3096 |\n", "| fps | 113 |\n", "| time_elapsed | 11258 |\n", "| total_timesteps | 2776836 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.734 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 74.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3100 |\n", "| fps | 113 |\n", "| time_elapsed | 11299 |\n", "| total_timesteps | 2781839 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.168 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 73.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3104 |\n", "| fps | 113 |\n", "| time_elapsed | 11332 |\n", "| total_timesteps | 2785753 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.187 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 72.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3108 |\n", "| fps | 113 |\n", "| time_elapsed | 11368 |\n", "| total_timesteps | 2790158 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.544 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 74.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3112 |\n", "| fps | 113 |\n", "| time_elapsed | 11405 |\n", "| total_timesteps | 2794607 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.273 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 74.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3116 |\n", "| fps | 113 |\n", "| time_elapsed | 11436 |\n", "| total_timesteps | 2798435 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 74.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3120 |\n", "| fps | 113 |\n", "| time_elapsed | 11469 |\n", "| total_timesteps | 2802475 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 73.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3124 |\n", "| fps | 113 |\n", "| time_elapsed | 11499 |\n", "| total_timesteps | 2806043 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 72.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3128 |\n", "| fps | 113 |\n", "| time_elapsed | 11529 |\n", "| total_timesteps | 2809677 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 75.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3132 |\n", "| fps | 113 |\n", "| time_elapsed | 11561 |\n", "| total_timesteps | 2813563 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.378 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 76.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3136 |\n", "| fps | 113 |\n", "| time_elapsed | 11592 |\n", "| total_timesteps | 2817366 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.228 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 967 |\n", "| ep_rew_mean | 75.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3140 |\n", "| fps | 113 |\n", "| time_elapsed | 11619 |\n", "| total_timesteps | 2820589 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.235 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 75.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3144 |\n", "| fps | 113 |\n", "| time_elapsed | 11652 |\n", "| total_timesteps | 2824613 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 968 |\n", "| ep_rew_mean | 73.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3148 |\n", "| fps | 113 |\n", "| time_elapsed | 11682 |\n", "| total_timesteps | 2828304 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 951 |\n", "| ep_rew_mean | 70.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3152 |\n", "| fps | 113 |\n", "| time_elapsed | 11707 |\n", "| total_timesteps | 2831326 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.261 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 955 |\n", "| ep_rew_mean | 70.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3156 |\n", "| fps | 113 |\n", "| time_elapsed | 11736 |\n", "| total_timesteps | 2834863 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.298 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 958 |\n", "| ep_rew_mean | 71.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3160 |\n", "| fps | 113 |\n", "| time_elapsed | 11764 |\n", "| total_timesteps | 2838264 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.129 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 968 |\n", "| ep_rew_mean | 73.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3164 |\n", "| fps | 113 |\n", "| time_elapsed | 11796 |\n", "| total_timesteps | 2842160 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.212 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 74.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3168 |\n", "| fps | 113 |\n", "| time_elapsed | 11826 |\n", "| total_timesteps | 2845852 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "Eval num_timesteps=2850000, episode_reward=61.40 +/- 33.57\n", "Episode length: 886.50 +/- 195.07\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 886 |\n", "| mean_reward | 61.4 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2850000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.162 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 74.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3172 |\n", "| fps | 113 |\n", "| time_elapsed | 12091 |\n", "| total_timesteps | 2875768 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 78.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3176 |\n", "| fps | 113 |\n", "| time_elapsed | 12125 |\n", "| total_timesteps | 2879882 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 75.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3180 |\n", "| fps | 113 |\n", "| time_elapsed | 12144 |\n", "| total_timesteps | 2882270 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 76.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3184 |\n", "| fps | 113 |\n", "| time_elapsed | 12169 |\n", "| total_timesteps | 2885334 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0779 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 75.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3188 |\n", "| fps | 113 |\n", "| time_elapsed | 12244 |\n", "| total_timesteps | 2894455 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0587 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 77.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3192 |\n", "| fps | 113 |\n", "| time_elapsed | 12274 |\n", "| total_timesteps | 2898145 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 77.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3196 |\n", "| fps | 113 |\n", "| time_elapsed | 12300 |\n", "| total_timesteps | 2901307 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 78.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3200 |\n", "| fps | 113 |\n", "| time_elapsed | 12328 |\n", "| total_timesteps | 2904815 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0545 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 80.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3204 |\n", "| fps | 113 |\n", "| time_elapsed | 12356 |\n", "| total_timesteps | 2908201 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0942 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 80.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3208 |\n", "| fps | 113 |\n", "| time_elapsed | 12384 |\n", "| total_timesteps | 2911640 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 80.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3212 |\n", "| fps | 113 |\n", "| time_elapsed | 12413 |\n", "| total_timesteps | 2915151 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.0818 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 80.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3216 |\n", "| fps | 114 |\n", "| time_elapsed | 12447 |\n", "| total_timesteps | 2919088 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.143 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 81.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3220 |\n", "| fps | 114 |\n", "| time_elapsed | 12475 |\n", "| total_timesteps | 2922622 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "Eval num_timesteps=2925000, episode_reward=52.70 +/- 22.35\n", "Episode length: 965.20 +/- 126.16\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 965 |\n", "| mean_reward | 52.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 2925000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 82.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3224 |\n", "| fps | 113 |\n", "| time_elapsed | 12537 |\n", "| total_timesteps | 2927304 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.42 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 82.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3228 |\n", "| fps | 113 |\n", "| time_elapsed | 12571 |\n", "| total_timesteps | 2931388 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.243 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 81.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3232 |\n", "| fps | 113 |\n", "| time_elapsed | 12607 |\n", "| total_timesteps | 2935878 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.408 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 80.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3236 |\n", "| fps | 113 |\n", "| time_elapsed | 12638 |\n", "| total_timesteps | 2939570 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 83.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3240 |\n", "| fps | 113 |\n", "| time_elapsed | 12672 |\n", "| total_timesteps | 2943847 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 84.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3244 |\n", "| fps | 113 |\n", "| time_elapsed | 12705 |\n", "| total_timesteps | 2947835 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.8 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 83.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3248 |\n", "| fps | 113 |\n", "| time_elapsed | 12734 |\n", "| total_timesteps | 2951383 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 84.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3252 |\n", "| fps | 113 |\n", "| time_elapsed | 12763 |\n", "| total_timesteps | 2954993 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.786 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 84.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3256 |\n", "| fps | 114 |\n", "| time_elapsed | 12787 |\n", "| total_timesteps | 2957887 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 85 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3260 |\n", "| fps | 114 |\n", "| time_elapsed | 12816 |\n", "| total_timesteps | 2961391 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 84.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3264 |\n", "| fps | 114 |\n", "| time_elapsed | 12848 |\n", "| total_timesteps | 2965368 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.168 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 86.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3268 |\n", "| fps | 114 |\n", "| time_elapsed | 12880 |\n", "| total_timesteps | 2969330 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.653 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 971 |\n", "| ep_rew_mean | 88.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3272 |\n", "| fps | 114 |\n", "| time_elapsed | 12909 |\n", "| total_timesteps | 2972870 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.176 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 85.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3276 |\n", "| fps | 114 |\n", "| time_elapsed | 12942 |\n", "| total_timesteps | 2976784 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 974 |\n", "| ep_rew_mean | 85.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3280 |\n", "| fps | 114 |\n", "| time_elapsed | 12965 |\n", "| total_timesteps | 2979685 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.223 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 983 |\n", "| ep_rew_mean | 85.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3284 |\n", "| fps | 114 |\n", "| time_elapsed | 12997 |\n", "| total_timesteps | 2983596 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.204 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 933 |\n", "| ep_rew_mean | 88.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3288 |\n", "| fps | 114 |\n", "| time_elapsed | 13031 |\n", "| total_timesteps | 2987713 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.241 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 938 |\n", "| ep_rew_mean | 85.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3292 |\n", "| fps | 114 |\n", "| time_elapsed | 13066 |\n", "| total_timesteps | 2991957 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.352 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 947 |\n", "| ep_rew_mean | 87.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 3296 |\n", "| fps | 114 |\n", "| time_elapsed | 13098 |\n", "| total_timesteps | 2995982 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.422 |\n", "----------------------------------\n", "Eval num_timesteps=3000000, episode_reward=235.00 +/- 103.35\n", "Episode length: 1029.00 +/- 168.50\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.03e+03 |\n", "| mean_reward | 235 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 3000000 |\n", "| train/ | |\n", "| learning_rate | 0.0001 |\n", "| loss | 0.116 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "bb1daca3", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T18:52:26.095386Z", "iopub.status.busy": "2024-05-10T18:52:26.094653Z", "iopub.status.idle": "2024-05-10T18:53:16.608221Z", "shell.execute_reply": "2024-05-10T18:53:16.607133Z" }, "papermill": { "duration": 50.550861, "end_time": "2024-05-10T18:53:16.610738", "exception": false, "start_time": "2024-05-10T18:52:26.059877", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 176691538, "sourceType": "kernelVersion" } ], "dockerImageVersionId": 30699, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 13381.269941, "end_time": "2024-05-10T18:53:19.407294", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-10T15:10:18.137353", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }