{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "88002031", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:07:31.296183Z", "iopub.status.busy": "2024-05-16T13:07:31.295584Z", "iopub.status.idle": "2024-05-16T13:08:28.408034Z", "shell.execute_reply": "2024-05-16T13:08:28.406865Z" }, "papermill": { "duration": 57.120021, "end_time": "2024-05-16T13:08:28.410346", "exception": false, "start_time": "2024-05-16T13:07:31.290325", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "5adfa6cd", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:08:28.419616Z", "iopub.status.busy": "2024-05-16T13:08:28.418885Z", "iopub.status.idle": "2024-05-16T13:08:43.252623Z", "shell.execute_reply": "2024-05-16T13:08:43.251775Z" }, "papermill": { "duration": 14.840675, "end_time": "2024-05-16T13:08:43.254956", "exception": false, "start_time": "2024-05-16T13:08:28.414281", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-16 13:08:34.276877: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-16 13:08:34.276975: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-16 13:08:34.377172: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-7\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-7\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 50_000\n", "# Record in approximate quarters\n", "# Using an endpoint about 5% less than the total timesteps will trigger the last video call.\n", "# This doesn't coincide exactly with the end, but gets close.\n", "VIDEO_CALLBACK_FREQ = 350_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_500_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "# Increasing buffer size to 70K, should be able to store it.\n", "BUFFER_SIZE = 70_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 100_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.01\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "2568baeb", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:08:43.272988Z", "iopub.status.busy": "2024-05-16T13:08:43.272367Z", "iopub.status.idle": "2024-05-16T13:08:43.284759Z", "shell.execute_reply": "2024-05-16T13:08:43.283877Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.021569, "end_time": "2024-05-16T13:08:43.286758", "exception": false, "start_time": "2024-05-16T13:08:43.265189", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "e3599d4d", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:08:43.295134Z", "iopub.status.busy": "2024-05-16T13:08:43.294864Z", "iopub.status.idle": "2024-05-16T13:08:43.306152Z", "shell.execute_reply": "2024-05-16T13:08:43.305355Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017354, "end_time": "2024-05-16T13:08:43.307947", "exception": false, "start_time": "2024-05-16T13:08:43.290593", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "f104604e", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:08:43.315912Z", "iopub.status.busy": "2024-05-16T13:08:43.315663Z", "iopub.status.idle": "2024-05-16T13:08:43.330523Z", "shell.execute_reply": "2024-05-16T13:08:43.329841Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.020922, "end_time": "2024-05-16T13:08:43.332345", "exception": false, "start_time": "2024-05-16T13:08:43.311423", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "422bd48e", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:08:43.340374Z", "iopub.status.busy": "2024-05-16T13:08:43.340073Z", "iopub.status.idle": "2024-05-16T13:10:06.159700Z", "shell.execute_reply": "2024-05-16T13:10:06.158592Z" }, "papermill": { "duration": 82.827601, "end_time": "2024-05-16T13:10:06.163395", "exception": false, "start_time": "2024-05-16T13:08:43.335794", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2v6/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2v6/dqn_replay_buffer_pacman_v2-6\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "110b78fd", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:10:06.175751Z", "iopub.status.busy": "2024-05-16T13:10:06.175429Z", "iopub.status.idle": "2024-05-16T13:10:06.181674Z", "shell.execute_reply": "2024-05-16T13:10:06.180910Z" }, "papermill": { "duration": 0.014864, "end_time": "2024-05-16T13:10:06.183504", "exception": false, "start_time": "2024-05-16T13:10:06.168640", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "5adae08a", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T13:10:06.191974Z", "iopub.status.busy": "2024-05-16T13:10:06.191708Z", "iopub.status.idle": "2024-05-16T16:51:37.358311Z", "shell.execute_reply": "2024-05-16T16:51:37.357379Z" }, "papermill": { "duration": 13291.173132, "end_time": "2024-05-16T16:51:37.360364", "exception": false, "start_time": "2024-05-16T13:10:06.187232", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6880 |\n", "| fps | 110 |\n", "| time_elapsed | 44 |\n", "| total_timesteps | 7504846 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0931 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6884 |\n", "| fps | 111 |\n", "| time_elapsed | 95 |\n", "| total_timesteps | 7510594 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0725 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6888 |\n", "| fps | 111 |\n", "| time_elapsed | 138 |\n", "| total_timesteps | 7515488 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0945 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6892 |\n", "| fps | 112 |\n", "| time_elapsed | 187 |\n", "| total_timesteps | 7521004 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0718 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6896 |\n", "| fps | 112 |\n", "| time_elapsed | 228 |\n", "| total_timesteps | 7525608 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6900 |\n", "| fps | 112 |\n", "| time_elapsed | 274 |\n", "| total_timesteps | 7530836 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0725 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6904 |\n", "| fps | 112 |\n", "| time_elapsed | 323 |\n", "| total_timesteps | 7536354 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0484 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6908 |\n", "| fps | 112 |\n", "| time_elapsed | 364 |\n", "| total_timesteps | 7540970 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0399 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 305 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6912 |\n", "| fps | 112 |\n", "| time_elapsed | 412 |\n", "| total_timesteps | 7546292 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0335 |\n", "----------------------------------\n", "Eval num_timesteps=7550000, episode_reward=344.90 +/- 73.31\n", "Episode length: 1237.60 +/- 125.11\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.24e+03 |\n", "| mean_reward | 345 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0948 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 303 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6916 |\n", "| fps | 105 |\n", "| time_elapsed | 486 |\n", "| total_timesteps | 7551422 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0426 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 302 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6920 |\n", "| fps | 106 |\n", "| time_elapsed | 531 |\n", "| total_timesteps | 7556514 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.059 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6924 |\n", "| fps | 106 |\n", "| time_elapsed | 569 |\n", "| total_timesteps | 7560806 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.034 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6928 |\n", "| fps | 107 |\n", "| time_elapsed | 614 |\n", "| total_timesteps | 7565811 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.05 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6932 |\n", "| fps | 109 |\n", "| time_elapsed | 887 |\n", "| total_timesteps | 7596749 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0547 |\n", "----------------------------------\n", "Eval num_timesteps=7600000, episode_reward=210.00 +/- 143.68\n", "Episode length: 1053.50 +/- 235.76\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.05e+03 |\n", "| mean_reward | 210 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0304 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6936 |\n", "| fps | 106 |\n", "| time_elapsed | 957 |\n", "| total_timesteps | 7601919 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0308 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 315 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6940 |\n", "| fps | 106 |\n", "| time_elapsed | 1000 |\n", "| total_timesteps | 7606775 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0293 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6944 |\n", "| fps | 106 |\n", "| time_elapsed | 1041 |\n", "| total_timesteps | 7611421 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0622 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.51e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6948 |\n", "| fps | 107 |\n", "| time_elapsed | 1083 |\n", "| total_timesteps | 7616208 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0673 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.52e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6952 |\n", "| fps | 107 |\n", "| time_elapsed | 1129 |\n", "| total_timesteps | 7621418 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0259 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.75e+03 |\n", "| ep_rew_mean | 304 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6956 |\n", "| fps | 108 |\n", "| time_elapsed | 1371 |\n", "| total_timesteps | 7648710 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0173 |\n", "----------------------------------\n", "Eval num_timesteps=7650000, episode_reward=408.70 +/- 77.43\n", "Episode length: 1297.60 +/- 62.91\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.3e+03 |\n", "| mean_reward | 409 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0538 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.75e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6960 |\n", "| fps | 106 |\n", "| time_elapsed | 1444 |\n", "| total_timesteps | 7653506 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6964 |\n", "| fps | 106 |\n", "| time_elapsed | 1487 |\n", "| total_timesteps | 7658369 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.039 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6968 |\n", "| fps | 106 |\n", "| time_elapsed | 1527 |\n", "| total_timesteps | 7662957 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0376 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6972 |\n", "| fps | 106 |\n", "| time_elapsed | 1569 |\n", "| total_timesteps | 7667747 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.205 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6976 |\n", "| fps | 107 |\n", "| time_elapsed | 1618 |\n", "| total_timesteps | 7673159 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0276 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6980 |\n", "| fps | 107 |\n", "| time_elapsed | 1658 |\n", "| total_timesteps | 7677666 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.72e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6984 |\n", "| fps | 107 |\n", "| time_elapsed | 1703 |\n", "| total_timesteps | 7682819 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0328 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.72e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6988 |\n", "| fps | 107 |\n", "| time_elapsed | 1741 |\n", "| total_timesteps | 7687069 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.337 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6992 |\n", "| fps | 107 |\n", "| time_elapsed | 1783 |\n", "| total_timesteps | 7691853 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0481 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.7e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 6996 |\n", "| fps | 107 |\n", "| time_elapsed | 1820 |\n", "| total_timesteps | 7696033 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0723 |\n", "----------------------------------\n", "Eval num_timesteps=7700000, episode_reward=401.60 +/- 70.85\n", "Episode length: 1312.80 +/- 104.40\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.31e+03 |\n", "| mean_reward | 402 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.145 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.7e+03 |\n", "| ep_rew_mean | 315 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7000 |\n", "| fps | 106 |\n", "| time_elapsed | 1893 |\n", "| total_timesteps | 7700917 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0464 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 318 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7004 |\n", "| fps | 106 |\n", "| time_elapsed | 1936 |\n", "| total_timesteps | 7705695 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.7e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7008 |\n", "| fps | 106 |\n", "| time_elapsed | 1978 |\n", "| total_timesteps | 7710511 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 329 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7012 |\n", "| fps | 106 |\n", "| time_elapsed | 2023 |\n", "| total_timesteps | 7715547 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0559 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 331 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7016 |\n", "| fps | 106 |\n", "| time_elapsed | 2065 |\n", "| total_timesteps | 7720269 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.146 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 338 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7020 |\n", "| fps | 106 |\n", "| time_elapsed | 2108 |\n", "| total_timesteps | 7725127 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0581 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 344 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7024 |\n", "| fps | 106 |\n", "| time_elapsed | 2153 |\n", "| total_timesteps | 7730249 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.631 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 344 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7028 |\n", "| fps | 107 |\n", "| time_elapsed | 2192 |\n", "| total_timesteps | 7734575 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 3.65 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 343 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7032 |\n", "| fps | 107 |\n", "| time_elapsed | 2230 |\n", "| total_timesteps | 7738885 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0793 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 341 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7036 |\n", "| fps | 107 |\n", "| time_elapsed | 2272 |\n", "| total_timesteps | 7743579 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0382 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 344 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7040 |\n", "| fps | 107 |\n", "| time_elapsed | 2314 |\n", "| total_timesteps | 7748317 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0374 |\n", "----------------------------------\n", "Eval num_timesteps=7750000, episode_reward=271.60 +/- 39.08\n", "Episode length: 1230.60 +/- 113.93\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 272 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7044 |\n", "| fps | 106 |\n", "| time_elapsed | 2388 |\n", "| total_timesteps | 7753573 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0198 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 357 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7048 |\n", "| fps | 106 |\n", "| time_elapsed | 2429 |\n", "| total_timesteps | 7758229 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0483 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 363 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7052 |\n", "| fps | 106 |\n", "| time_elapsed | 2470 |\n", "| total_timesteps | 7762815 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0991 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 368 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7056 |\n", "| fps | 106 |\n", "| time_elapsed | 2511 |\n", "| total_timesteps | 7767473 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.13 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 373 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7060 |\n", "| fps | 106 |\n", "| time_elapsed | 2554 |\n", "| total_timesteps | 7772285 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0834 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 368 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7064 |\n", "| fps | 106 |\n", "| time_elapsed | 2595 |\n", "| total_timesteps | 7776877 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 372 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7068 |\n", "| fps | 106 |\n", "| time_elapsed | 2639 |\n", "| total_timesteps | 7781841 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.313 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 376 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7072 |\n", "| fps | 106 |\n", "| time_elapsed | 2683 |\n", "| total_timesteps | 7786869 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.325 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 377 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7076 |\n", "| fps | 106 |\n", "| time_elapsed | 2726 |\n", "| total_timesteps | 7791669 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 380 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7080 |\n", "| fps | 107 |\n", "| time_elapsed | 2764 |\n", "| total_timesteps | 7796031 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0705 |\n", "----------------------------------\n", "Eval num_timesteps=7800000, episode_reward=338.80 +/- 118.77\n", "Episode length: 3697.80 +/- 7767.17\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.7e+03 |\n", "| mean_reward | 339 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0526 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 385 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7084 |\n", "| fps | 104 |\n", "| time_elapsed | 2887 |\n", "| total_timesteps | 7800535 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0602 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 387 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7088 |\n", "| fps | 104 |\n", "| time_elapsed | 2931 |\n", "| total_timesteps | 7805459 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0573 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 388 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7092 |\n", "| fps | 104 |\n", "| time_elapsed | 2971 |\n", "| total_timesteps | 7810001 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0365 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 391 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7096 |\n", "| fps | 104 |\n", "| time_elapsed | 3013 |\n", "| total_timesteps | 7814725 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.52 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 385 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7100 |\n", "| fps | 104 |\n", "| time_elapsed | 3049 |\n", "| total_timesteps | 7818765 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0572 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 382 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7104 |\n", "| fps | 104 |\n", "| time_elapsed | 3091 |\n", "| total_timesteps | 7823587 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.067 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 381 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7108 |\n", "| fps | 104 |\n", "| time_elapsed | 3133 |\n", "| total_timesteps | 7828277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0625 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 375 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7112 |\n", "| fps | 104 |\n", "| time_elapsed | 3172 |\n", "| total_timesteps | 7832641 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.21 |\n", "----------------------------------\n", "Eval num_timesteps=7850000, episode_reward=408.90 +/- 55.09\n", "Episode length: 1149.20 +/- 184.42\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.15e+03 |\n", "| mean_reward | 409 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0118 |\n", "----------------------------------\n", "New best mean reward!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 374 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7116 |\n", "| fps | 104 |\n", "| time_elapsed | 3472 |\n", "| total_timesteps | 7863417 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.32 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 368 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7120 |\n", "| fps | 104 |\n", "| time_elapsed | 3511 |\n", "| total_timesteps | 7867344 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 358 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7124 |\n", "| fps | 104 |\n", "| time_elapsed | 3542 |\n", "| total_timesteps | 7871173 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0378 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 355 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7128 |\n", "| fps | 105 |\n", "| time_elapsed | 3582 |\n", "| total_timesteps | 7876183 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 354 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7132 |\n", "| fps | 105 |\n", "| time_elapsed | 3618 |\n", "| total_timesteps | 7880578 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 352 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7136 |\n", "| fps | 105 |\n", "| time_elapsed | 3654 |\n", "| total_timesteps | 7885068 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0266 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 346 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7140 |\n", "| fps | 105 |\n", "| time_elapsed | 3687 |\n", "| total_timesteps | 7889120 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0462 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 338 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7144 |\n", "| fps | 105 |\n", "| time_elapsed | 3725 |\n", "| total_timesteps | 7893889 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0287 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 337 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7148 |\n", "| fps | 105 |\n", "| time_elapsed | 3767 |\n", "| total_timesteps | 7899089 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.397 |\n", "----------------------------------\n", "Eval num_timesteps=7900000, episode_reward=287.40 +/- 50.41\n", "Episode length: 1175.40 +/- 95.80\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 287 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0322 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 333 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7152 |\n", "| fps | 105 |\n", "| time_elapsed | 3831 |\n", "| total_timesteps | 7903653 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0466 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 329 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7156 |\n", "| fps | 105 |\n", "| time_elapsed | 3866 |\n", "| total_timesteps | 7907969 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.145 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7160 |\n", "| fps | 105 |\n", "| time_elapsed | 3903 |\n", "| total_timesteps | 7912545 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0879 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7164 |\n", "| fps | 105 |\n", "| time_elapsed | 3945 |\n", "| total_timesteps | 7917817 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0553 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7168 |\n", "| fps | 106 |\n", "| time_elapsed | 3977 |\n", "| total_timesteps | 7921707 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.27 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7172 |\n", "| fps | 106 |\n", "| time_elapsed | 4011 |\n", "| total_timesteps | 7925925 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7176 |\n", "| fps | 106 |\n", "| time_elapsed | 4046 |\n", "| total_timesteps | 7930135 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.061 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7180 |\n", "| fps | 106 |\n", "| time_elapsed | 4086 |\n", "| total_timesteps | 7935139 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7184 |\n", "| fps | 106 |\n", "| time_elapsed | 4121 |\n", "| total_timesteps | 7939447 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0443 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7188 |\n", "| fps | 106 |\n", "| time_elapsed | 4158 |\n", "| total_timesteps | 7944083 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0297 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7192 |\n", "| fps | 106 |\n", "| time_elapsed | 4196 |\n", "| total_timesteps | 7948729 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0524 |\n", "----------------------------------\n", "Eval num_timesteps=7950000, episode_reward=281.10 +/- 25.12\n", "Episode length: 1178.40 +/- 67.74\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 281 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 7950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0555 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7196 |\n", "| fps | 106 |\n", "| time_elapsed | 4264 |\n", "| total_timesteps | 7953767 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.195 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7200 |\n", "| fps | 106 |\n", "| time_elapsed | 4301 |\n", "| total_timesteps | 7958395 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.208 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7204 |\n", "| fps | 106 |\n", "| time_elapsed | 4339 |\n", "| total_timesteps | 7963063 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7208 |\n", "| fps | 106 |\n", "| time_elapsed | 4376 |\n", "| total_timesteps | 7967615 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0668 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7212 |\n", "| fps | 106 |\n", "| time_elapsed | 4413 |\n", "| total_timesteps | 7972275 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0291 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7216 |\n", "| fps | 107 |\n", "| time_elapsed | 4451 |\n", "| total_timesteps | 7976975 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.932 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7220 |\n", "| fps | 107 |\n", "| time_elapsed | 4493 |\n", "| total_timesteps | 7982171 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0214 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7224 |\n", "| fps | 107 |\n", "| time_elapsed | 4529 |\n", "| total_timesteps | 7986609 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.239 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7228 |\n", "| fps | 107 |\n", "| time_elapsed | 4571 |\n", "| total_timesteps | 7991841 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.047 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7232 |\n", "| fps | 107 |\n", "| time_elapsed | 4615 |\n", "| total_timesteps | 7997277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0422 |\n", "----------------------------------\n", "Eval num_timesteps=8000000, episode_reward=301.10 +/- 183.73\n", "Episode length: 1115.00 +/- 231.42\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.12e+03 |\n", "| mean_reward | 301 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0397 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7236 |\n", "| fps | 107 |\n", "| time_elapsed | 4676 |\n", "| total_timesteps | 8001647 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.296 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7240 |\n", "| fps | 107 |\n", "| time_elapsed | 4712 |\n", "| total_timesteps | 8006127 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0497 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7244 |\n", "| fps | 107 |\n", "| time_elapsed | 4751 |\n", "| total_timesteps | 8010971 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7248 |\n", "| fps | 107 |\n", "| time_elapsed | 4792 |\n", "| total_timesteps | 8015963 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0586 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7252 |\n", "| fps | 107 |\n", "| time_elapsed | 4830 |\n", "| total_timesteps | 8020725 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0477 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7256 |\n", "| fps | 107 |\n", "| time_elapsed | 4867 |\n", "| total_timesteps | 8025279 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.375 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7260 |\n", "| fps | 108 |\n", "| time_elapsed | 4906 |\n", "| total_timesteps | 8030063 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.411 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7264 |\n", "| fps | 108 |\n", "| time_elapsed | 4937 |\n", "| total_timesteps | 8034011 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0187 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 304 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7268 |\n", "| fps | 108 |\n", "| time_elapsed | 4976 |\n", "| total_timesteps | 8038821 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0401 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7272 |\n", "| fps | 108 |\n", "| time_elapsed | 5014 |\n", "| total_timesteps | 8043451 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 3.58 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7276 |\n", "| fps | 108 |\n", "| time_elapsed | 5055 |\n", "| total_timesteps | 8048525 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0581 |\n", "----------------------------------\n", "Eval num_timesteps=8050000, episode_reward=377.70 +/- 51.33\n", "Episode length: 1222.00 +/- 158.14\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.22e+03 |\n", "| mean_reward | 378 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0809 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 316 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7280 |\n", "| fps | 108 |\n", "| time_elapsed | 5121 |\n", "| total_timesteps | 8053303 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0402 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 321 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7284 |\n", "| fps | 108 |\n", "| time_elapsed | 5158 |\n", "| total_timesteps | 8057865 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0876 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 316 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7288 |\n", "| fps | 108 |\n", "| time_elapsed | 5197 |\n", "| total_timesteps | 8062709 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0707 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7292 |\n", "| fps | 108 |\n", "| time_elapsed | 5236 |\n", "| total_timesteps | 8067453 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7296 |\n", "| fps | 108 |\n", "| time_elapsed | 5272 |\n", "| total_timesteps | 8071963 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0324 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7300 |\n", "| fps | 108 |\n", "| time_elapsed | 5312 |\n", "| total_timesteps | 8076893 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0221 |\n", "----------------------------------\n", "Eval num_timesteps=8100000, episode_reward=322.90 +/- 126.07\n", "Episode length: 1055.00 +/- 197.25\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.06e+03 |\n", "| mean_reward | 323 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8100000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0336 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7304 |\n", "| fps | 108 |\n", "| time_elapsed | 5576 |\n", "| total_timesteps | 8106787 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7308 |\n", "| fps | 108 |\n", "| time_elapsed | 5612 |\n", "| total_timesteps | 8111179 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 316 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7312 |\n", "| fps | 108 |\n", "| time_elapsed | 5650 |\n", "| total_timesteps | 8115809 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0205 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7316 |\n", "| fps | 109 |\n", "| time_elapsed | 5893 |\n", "| total_timesteps | 8146081 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0355 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.68e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7320 |\n", "| fps | 109 |\n", "| time_elapsed | 5924 |\n", "| total_timesteps | 8149921 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0366 |\n", "----------------------------------\n", "Eval num_timesteps=8150000, episode_reward=156.60 +/- 140.50\n", "Episode length: 830.80 +/- 145.39\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 831 |\n", "| mean_reward | 157 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0228 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.68e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7324 |\n", "| fps | 109 |\n", "| time_elapsed | 5978 |\n", "| total_timesteps | 8154145 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0339 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7328 |\n", "| fps | 109 |\n", "| time_elapsed | 6008 |\n", "| total_timesteps | 8157969 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0528 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.92e+03 |\n", "| ep_rew_mean | 301 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7332 |\n", "| fps | 110 |\n", "| time_elapsed | 6257 |\n", "| total_timesteps | 8188793 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0424 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7336 |\n", "| fps | 110 |\n", "| time_elapsed | 6292 |\n", "| total_timesteps | 8193139 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0289 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.92e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7340 |\n", "| fps | 110 |\n", "| time_elapsed | 6334 |\n", "| total_timesteps | 8198231 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "Eval num_timesteps=8200000, episode_reward=418.60 +/- 25.35\n", "Episode length: 1142.20 +/- 125.55\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 419 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0348 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7344 |\n", "| fps | 109 |\n", "| time_elapsed | 6399 |\n", "| total_timesteps | 8202395 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0856 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7348 |\n", "| fps | 109 |\n", "| time_elapsed | 6444 |\n", "| total_timesteps | 8207155 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0524 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.9e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7352 |\n", "| fps | 109 |\n", "| time_elapsed | 6478 |\n", "| total_timesteps | 8211225 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0261 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7356 |\n", "| fps | 109 |\n", "| time_elapsed | 6519 |\n", "| total_timesteps | 8216277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0744 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7360 |\n", "| fps | 109 |\n", "| time_elapsed | 6558 |\n", "| total_timesteps | 8221131 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0332 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.91e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7364 |\n", "| fps | 110 |\n", "| time_elapsed | 6593 |\n", "| total_timesteps | 8225463 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0529 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.92e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7368 |\n", "| fps | 110 |\n", "| time_elapsed | 6636 |\n", "| total_timesteps | 8230719 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.31 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.92e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7372 |\n", "| fps | 110 |\n", "| time_elapsed | 6678 |\n", "| total_timesteps | 8235951 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.04 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.94e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7376 |\n", "| fps | 110 |\n", "| time_elapsed | 6734 |\n", "| total_timesteps | 8242809 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "Eval num_timesteps=8250000, episode_reward=262.90 +/- 28.10\n", "Episode length: 1231.80 +/- 89.09\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 263 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8250000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0527 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7380 |\n", "| fps | 110 |\n", "| time_elapsed | 7007 |\n", "| total_timesteps | 8273391 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0451 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7384 |\n", "| fps | 110 |\n", "| time_elapsed | 7047 |\n", "| total_timesteps | 8278291 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0758 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.21e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7388 |\n", "| fps | 110 |\n", "| time_elapsed | 7087 |\n", "| total_timesteps | 8283239 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0552 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7392 |\n", "| fps | 110 |\n", "| time_elapsed | 7124 |\n", "| total_timesteps | 8287845 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0286 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7396 |\n", "| fps | 110 |\n", "| time_elapsed | 7161 |\n", "| total_timesteps | 8292413 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0321 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7400 |\n", "| fps | 110 |\n", "| time_elapsed | 7196 |\n", "| total_timesteps | 8296768 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0228 |\n", "----------------------------------\n", "Eval num_timesteps=8300000, episode_reward=390.70 +/- 73.87\n", "Episode length: 1193.60 +/- 96.99\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.19e+03 |\n", "| mean_reward | 391 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.169 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.94e+03 |\n", "| ep_rew_mean | 244 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7404 |\n", "| fps | 110 |\n", "| time_elapsed | 7256 |\n", "| total_timesteps | 8300876 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0338 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.21e+03 |\n", "| ep_rew_mean | 244 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7408 |\n", "| fps | 110 |\n", "| time_elapsed | 7504 |\n", "| total_timesteps | 8331704 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.61 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.2e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7412 |\n", "| fps | 110 |\n", "| time_elapsed | 7538 |\n", "| total_timesteps | 8335824 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.5 |\n", "----------------------------------\n", "Eval num_timesteps=8350000, episode_reward=206.00 +/- 111.95\n", "Episode length: 1062.20 +/- 161.56\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.06e+03 |\n", "| mean_reward | 206 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0742 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.1e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7416 |\n", "| fps | 110 |\n", "| time_elapsed | 7728 |\n", "| total_timesteps | 8356392 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.11e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7420 |\n", "| fps | 110 |\n", "| time_elapsed | 7765 |\n", "| total_timesteps | 8361048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.03 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.12e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7424 |\n", "| fps | 110 |\n", "| time_elapsed | 7804 |\n", "| total_timesteps | 8365864 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.024 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 2.13e+03 |\n", "| ep_rew_mean | 248 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7428 |\n", "| fps | 111 |\n", "| time_elapsed | 7843 |\n", "| total_timesteps | 8370620 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0186 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7432 |\n", "| fps | 111 |\n", "| time_elapsed | 7883 |\n", "| total_timesteps | 8375596 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0651 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7436 |\n", "| fps | 111 |\n", "| time_elapsed | 7922 |\n", "| total_timesteps | 8380426 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7440 |\n", "| fps | 111 |\n", "| time_elapsed | 7959 |\n", "| total_timesteps | 8385048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.57 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7444 |\n", "| fps | 111 |\n", "| time_elapsed | 7997 |\n", "| total_timesteps | 8389680 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.051 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7448 |\n", "| fps | 111 |\n", "| time_elapsed | 8033 |\n", "| total_timesteps | 8394156 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0193 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7452 |\n", "| fps | 111 |\n", "| time_elapsed | 8068 |\n", "| total_timesteps | 8398488 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0632 |\n", "----------------------------------\n", "Eval num_timesteps=8400000, episode_reward=350.90 +/- 78.48\n", "Episode length: 1211.80 +/- 67.36\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.21e+03 |\n", "| mean_reward | 351 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8400000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0244 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7456 |\n", "| fps | 111 |\n", "| time_elapsed | 8133 |\n", "| total_timesteps | 8403182 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.035 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7460 |\n", "| fps | 111 |\n", "| time_elapsed | 8170 |\n", "| total_timesteps | 8407660 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.044 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.87e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7464 |\n", "| fps | 111 |\n", "| time_elapsed | 8206 |\n", "| total_timesteps | 8412214 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0437 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.86e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7468 |\n", "| fps | 111 |\n", "| time_elapsed | 8246 |\n", "| total_timesteps | 8417062 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0838 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.86e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7472 |\n", "| fps | 111 |\n", "| time_elapsed | 8285 |\n", "| total_timesteps | 8421956 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.633 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.83e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7476 |\n", "| fps | 111 |\n", "| time_elapsed | 8320 |\n", "| total_timesteps | 8426302 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.033 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.58e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7480 |\n", "| fps | 111 |\n", "| time_elapsed | 8358 |\n", "| total_timesteps | 8430952 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0843 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7484 |\n", "| fps | 111 |\n", "| time_elapsed | 8394 |\n", "| total_timesteps | 8435448 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0494 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7488 |\n", "| fps | 111 |\n", "| time_elapsed | 8430 |\n", "| total_timesteps | 8439884 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0271 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7492 |\n", "| fps | 111 |\n", "| time_elapsed | 8466 |\n", "| total_timesteps | 8444306 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.207 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7496 |\n", "| fps | 111 |\n", "| time_elapsed | 8500 |\n", "| total_timesteps | 8448548 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0843 |\n", "----------------------------------\n", "Eval num_timesteps=8450000, episode_reward=307.80 +/- 113.18\n", "Episode length: 1146.20 +/- 107.03\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.15e+03 |\n", "| mean_reward | 308 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0442 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 318 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7500 |\n", "| fps | 111 |\n", "| time_elapsed | 8564 |\n", "| total_timesteps | 8453180 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0661 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 325 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7504 |\n", "| fps | 111 |\n", "| time_elapsed | 8600 |\n", "| total_timesteps | 8457626 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.06 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 328 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7508 |\n", "| fps | 111 |\n", "| time_elapsed | 8640 |\n", "| total_timesteps | 8462632 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.57e+03 |\n", "| ep_rew_mean | 328 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7512 |\n", "| fps | 111 |\n", "| time_elapsed | 8883 |\n", "| total_timesteps | 8492916 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.96 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 329 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7516 |\n", "| fps | 111 |\n", "| time_elapsed | 8915 |\n", "| total_timesteps | 8496806 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0515 |\n", "----------------------------------\n", "Eval num_timesteps=8500000, episode_reward=430.00 +/- 68.03\n", "Episode length: 1164.80 +/- 70.45\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 430 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0184 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7520 |\n", "| fps | 111 |\n", "| time_elapsed | 8979 |\n", "| total_timesteps | 8501346 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.369 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 328 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7524 |\n", "| fps | 111 |\n", "| time_elapsed | 9016 |\n", "| total_timesteps | 8505914 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0219 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 330 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7528 |\n", "| fps | 111 |\n", "| time_elapsed | 9049 |\n", "| total_timesteps | 8510058 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0403 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7532 |\n", "| fps | 111 |\n", "| time_elapsed | 9088 |\n", "| total_timesteps | 8514902 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0532 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 323 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7536 |\n", "| fps | 111 |\n", "| time_elapsed | 9126 |\n", "| total_timesteps | 8519534 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.024 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 329 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7540 |\n", "| fps | 111 |\n", "| time_elapsed | 9161 |\n", "| total_timesteps | 8523950 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0946 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 326 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7544 |\n", "| fps | 111 |\n", "| time_elapsed | 9198 |\n", "| total_timesteps | 8528436 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0747 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7548 |\n", "| fps | 111 |\n", "| time_elapsed | 9232 |\n", "| total_timesteps | 8532722 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.09 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 324 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7552 |\n", "| fps | 111 |\n", "| time_elapsed | 9268 |\n", "| total_timesteps | 8537138 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 321 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7556 |\n", "| fps | 111 |\n", "| time_elapsed | 9300 |\n", "| total_timesteps | 8541112 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0523 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 320 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7560 |\n", "| fps | 111 |\n", "| time_elapsed | 9337 |\n", "| total_timesteps | 8545646 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0775 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 316 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7564 |\n", "| fps | 112 |\n", "| time_elapsed | 9372 |\n", "| total_timesteps | 8549944 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.344 |\n", "----------------------------------\n", "Eval num_timesteps=8550000, episode_reward=386.60 +/- 67.10\n", "Episode length: 1212.60 +/- 127.84\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.21e+03 |\n", "| mean_reward | 387 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0775 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7568 |\n", "| fps | 111 |\n", "| time_elapsed | 9436 |\n", "| total_timesteps | 8554030 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0424 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 315 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7572 |\n", "| fps | 111 |\n", "| time_elapsed | 9481 |\n", "| total_timesteps | 8558818 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0355 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7576 |\n", "| fps | 111 |\n", "| time_elapsed | 9512 |\n", "| total_timesteps | 8562684 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0318 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7580 |\n", "| fps | 111 |\n", "| time_elapsed | 9549 |\n", "| total_timesteps | 8567264 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.55 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7584 |\n", "| fps | 111 |\n", "| time_elapsed | 9587 |\n", "| total_timesteps | 8571900 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0352 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7588 |\n", "| fps | 111 |\n", "| time_elapsed | 9618 |\n", "| total_timesteps | 8575752 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7592 |\n", "| fps | 111 |\n", "| time_elapsed | 9655 |\n", "| total_timesteps | 8580426 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0809 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7596 |\n", "| fps | 111 |\n", "| time_elapsed | 9692 |\n", "| total_timesteps | 8584986 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.07 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7600 |\n", "| fps | 111 |\n", "| time_elapsed | 9733 |\n", "| total_timesteps | 8590056 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0541 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7604 |\n", "| fps | 112 |\n", "| time_elapsed | 9769 |\n", "| total_timesteps | 8594510 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.062 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 303 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7608 |\n", "| fps | 112 |\n", "| time_elapsed | 9808 |\n", "| total_timesteps | 8599314 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.039 |\n", "----------------------------------\n", "Eval num_timesteps=8600000, episode_reward=409.00 +/- 70.57\n", "Episode length: 1177.60 +/- 130.74\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 409 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0515 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 306 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7612 |\n", "| fps | 111 |\n", "| time_elapsed | 9873 |\n", "| total_timesteps | 8604116 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0537 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7616 |\n", "| fps | 111 |\n", "| time_elapsed | 9913 |\n", "| total_timesteps | 8609022 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0268 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7620 |\n", "| fps | 111 |\n", "| time_elapsed | 9949 |\n", "| total_timesteps | 8613448 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.598 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 305 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7624 |\n", "| fps | 111 |\n", "| time_elapsed | 9988 |\n", "| total_timesteps | 8618319 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0279 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7628 |\n", "| fps | 112 |\n", "| time_elapsed | 10026 |\n", "| total_timesteps | 8622955 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.038 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7632 |\n", "| fps | 112 |\n", "| time_elapsed | 10065 |\n", "| total_timesteps | 8627825 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0255 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 318 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7636 |\n", "| fps | 112 |\n", "| time_elapsed | 10102 |\n", "| total_timesteps | 8632433 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0422 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7640 |\n", "| fps | 112 |\n", "| time_elapsed | 10136 |\n", "| total_timesteps | 8636577 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0388 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7644 |\n", "| fps | 112 |\n", "| time_elapsed | 10170 |\n", "| total_timesteps | 8640869 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.038 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 309 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7648 |\n", "| fps | 112 |\n", "| time_elapsed | 10208 |\n", "| total_timesteps | 8645561 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0515 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7652 |\n", "| fps | 112 |\n", "| time_elapsed | 10244 |\n", "| total_timesteps | 8649961 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0365 |\n", "----------------------------------\n", "Eval num_timesteps=8650000, episode_reward=403.80 +/- 10.90\n", "Episode length: 1221.60 +/- 108.23\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.22e+03 |\n", "| mean_reward | 404 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7656 |\n", "| fps | 111 |\n", "| time_elapsed | 10311 |\n", "| total_timesteps | 8654873 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0382 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 308 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7660 |\n", "| fps | 112 |\n", "| time_elapsed | 10349 |\n", "| total_timesteps | 8659523 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7664 |\n", "| fps | 112 |\n", "| time_elapsed | 10384 |\n", "| total_timesteps | 8663877 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0214 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 304 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7668 |\n", "| fps | 112 |\n", "| time_elapsed | 10419 |\n", "| total_timesteps | 8668227 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0519 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7672 |\n", "| fps | 112 |\n", "| time_elapsed | 10458 |\n", "| total_timesteps | 8673061 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7676 |\n", "| fps | 112 |\n", "| time_elapsed | 10492 |\n", "| total_timesteps | 8677185 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 3.55 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 311 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7680 |\n", "| fps | 112 |\n", "| time_elapsed | 10531 |\n", "| total_timesteps | 8682059 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7684 |\n", "| fps | 112 |\n", "| time_elapsed | 10568 |\n", "| total_timesteps | 8686659 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0225 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 313 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7688 |\n", "| fps | 112 |\n", "| time_elapsed | 10597 |\n", "| total_timesteps | 8690183 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.029 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 318 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7692 |\n", "| fps | 112 |\n", "| time_elapsed | 10635 |\n", "| total_timesteps | 8694931 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0495 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7696 |\n", "| fps | 112 |\n", "| time_elapsed | 10669 |\n", "| total_timesteps | 8699151 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0797 |\n", "----------------------------------\n", "Eval num_timesteps=8700000, episode_reward=259.80 +/- 51.19\n", "Episode length: 1091.40 +/- 99.55\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.09e+03 |\n", "| mean_reward | 260 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.58 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 312 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7700 |\n", "| fps | 112 |\n", "| time_elapsed | 10734 |\n", "| total_timesteps | 8704197 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0409 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 317 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7704 |\n", "| fps | 112 |\n", "| time_elapsed | 10775 |\n", "| total_timesteps | 8709251 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 314 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7708 |\n", "| fps | 112 |\n", "| time_elapsed | 10809 |\n", "| total_timesteps | 8713437 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0294 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 310 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7712 |\n", "| fps | 112 |\n", "| time_elapsed | 10844 |\n", "| total_timesteps | 8717725 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.133 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 307 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7716 |\n", "| fps | 112 |\n", "| time_elapsed | 10875 |\n", "| total_timesteps | 8721597 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 305 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7720 |\n", "| fps | 112 |\n", "| time_elapsed | 10908 |\n", "| total_timesteps | 8725703 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0246 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7724 |\n", "| fps | 112 |\n", "| time_elapsed | 10946 |\n", "| total_timesteps | 8730415 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0359 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7728 |\n", "| fps | 112 |\n", "| time_elapsed | 10983 |\n", "| total_timesteps | 8734949 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.122 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7732 |\n", "| fps | 112 |\n", "| time_elapsed | 11020 |\n", "| total_timesteps | 8739557 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0351 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7736 |\n", "| fps | 112 |\n", "| time_elapsed | 11053 |\n", "| total_timesteps | 8743677 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0473 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7740 |\n", "| fps | 112 |\n", "| time_elapsed | 11089 |\n", "| total_timesteps | 8748169 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0356 |\n", "----------------------------------\n", "Eval num_timesteps=8750000, episode_reward=363.20 +/- 91.43\n", "Episode length: 1201.60 +/- 166.26\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.2e+03 |\n", "| mean_reward | 363 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0331 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7744 |\n", "| fps | 112 |\n", "| time_elapsed | 11154 |\n", "| total_timesteps | 8752785 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0463 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 304 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7748 |\n", "| fps | 112 |\n", "| time_elapsed | 11189 |\n", "| total_timesteps | 8757191 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 301 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7752 |\n", "| fps | 112 |\n", "| time_elapsed | 11226 |\n", "| total_timesteps | 8761733 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0401 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7756 |\n", "| fps | 112 |\n", "| time_elapsed | 11263 |\n", "| total_timesteps | 8766263 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7760 |\n", "| fps | 112 |\n", "| time_elapsed | 11298 |\n", "| total_timesteps | 8770661 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0461 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7764 |\n", "| fps | 112 |\n", "| time_elapsed | 11332 |\n", "| total_timesteps | 8774851 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0622 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7768 |\n", "| fps | 112 |\n", "| time_elapsed | 11376 |\n", "| total_timesteps | 8780213 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.5 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7772 |\n", "| fps | 112 |\n", "| time_elapsed | 11419 |\n", "| total_timesteps | 8785621 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0506 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7776 |\n", "| fps | 112 |\n", "| time_elapsed | 11452 |\n", "| total_timesteps | 8789709 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0574 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7780 |\n", "| fps | 112 |\n", "| time_elapsed | 11484 |\n", "| total_timesteps | 8793587 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0284 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7784 |\n", "| fps | 112 |\n", "| time_elapsed | 11525 |\n", "| total_timesteps | 8798699 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "Eval num_timesteps=8800000, episode_reward=246.80 +/- 39.77\n", "Episode length: 1178.20 +/- 172.92\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 247 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0384 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7788 |\n", "| fps | 112 |\n", "| time_elapsed | 11595 |\n", "| total_timesteps | 8804015 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.214 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7792 |\n", "| fps | 112 |\n", "| time_elapsed | 11637 |\n", "| total_timesteps | 8809273 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0272 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7796 |\n", "| fps | 112 |\n", "| time_elapsed | 11677 |\n", "| total_timesteps | 8814263 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0627 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7800 |\n", "| fps | 112 |\n", "| time_elapsed | 11920 |\n", "| total_timesteps | 8844499 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0257 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7804 |\n", "| fps | 112 |\n", "| time_elapsed | 11956 |\n", "| total_timesteps | 8849009 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0447 |\n", "----------------------------------\n", "Eval num_timesteps=8850000, episode_reward=296.50 +/- 114.74\n", "Episode length: 1208.40 +/- 141.33\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.21e+03 |\n", "| mean_reward | 296 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0296 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7808 |\n", "| fps | 112 |\n", "| time_elapsed | 12026 |\n", "| total_timesteps | 8854173 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7812 |\n", "| fps | 112 |\n", "| time_elapsed | 12059 |\n", "| total_timesteps | 8858303 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0346 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7816 |\n", "| fps | 112 |\n", "| time_elapsed | 12093 |\n", "| total_timesteps | 8862505 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0325 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7820 |\n", "| fps | 112 |\n", "| time_elapsed | 12129 |\n", "| total_timesteps | 8866845 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0253 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7824 |\n", "| fps | 112 |\n", "| time_elapsed | 12161 |\n", "| total_timesteps | 8870891 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0342 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7828 |\n", "| fps | 112 |\n", "| time_elapsed | 12199 |\n", "| total_timesteps | 8875607 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.052 |\n", "----------------------------------\n", "Eval num_timesteps=8900000, episode_reward=338.00 +/- 75.89\n", "Episode length: 1136.20 +/- 128.68\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 338 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7832 |\n", "| fps | 112 |\n", "| time_elapsed | 12473 |\n", "| total_timesteps | 8905949 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7836 |\n", "| fps | 112 |\n", "| time_elapsed | 12516 |\n", "| total_timesteps | 8910405 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0522 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7840 |\n", "| fps | 112 |\n", "| time_elapsed | 12551 |\n", "| total_timesteps | 8914829 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.054 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7844 |\n", "| fps | 112 |\n", "| time_elapsed | 12593 |\n", "| total_timesteps | 8919983 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0485 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7848 |\n", "| fps | 112 |\n", "| time_elapsed | 12627 |\n", "| total_timesteps | 8924217 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0237 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7852 |\n", "| fps | 112 |\n", "| time_elapsed | 12665 |\n", "| total_timesteps | 8928941 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.051 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7856 |\n", "| fps | 112 |\n", "| time_elapsed | 12701 |\n", "| total_timesteps | 8933341 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7860 |\n", "| fps | 112 |\n", "| time_elapsed | 12728 |\n", "| total_timesteps | 8936750 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.65e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7864 |\n", "| fps | 112 |\n", "| time_elapsed | 12755 |\n", "| total_timesteps | 8940080 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0516 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.65e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7868 |\n", "| fps | 112 |\n", "| time_elapsed | 12795 |\n", "| total_timesteps | 8945044 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0834 |\n", "----------------------------------\n", "Eval num_timesteps=8950000, episode_reward=379.20 +/- 24.23\n", "Episode length: 1163.80 +/- 224.56\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 379 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 8950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.169 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.64e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7872 |\n", "| fps | 112 |\n", "| time_elapsed | 12863 |\n", "| total_timesteps | 8950102 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.75e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7876 |\n", "| fps | 112 |\n", "| time_elapsed | 12984 |\n", "| total_timesteps | 8965110 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.75e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7880 |\n", "| fps | 112 |\n", "| time_elapsed | 13013 |\n", "| total_timesteps | 8968715 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0423 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7884 |\n", "| fps | 112 |\n", "| time_elapsed | 13049 |\n", "| total_timesteps | 8973181 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0454 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7888 |\n", "| fps | 112 |\n", "| time_elapsed | 13085 |\n", "| total_timesteps | 8977679 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.118 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7892 |\n", "| fps | 112 |\n", "| time_elapsed | 13125 |\n", "| total_timesteps | 8982555 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0287 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.72e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7896 |\n", "| fps | 112 |\n", "| time_elapsed | 13159 |\n", "| total_timesteps | 8986741 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0536 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7900 |\n", "| fps | 113 |\n", "| time_elapsed | 13196 |\n", "| total_timesteps | 8991397 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.67 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7904 |\n", "| fps | 113 |\n", "| time_elapsed | 13228 |\n", "| total_timesteps | 8995301 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.056 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| episodes | 7908 |\n", "| fps | 113 |\n", "| time_elapsed | 13266 |\n", "| total_timesteps | 8999987 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.57 |\n", "----------------------------------\n", "Eval num_timesteps=9000000, episode_reward=372.30 +/- 8.97\n", "Episode length: 1087.00 +/- 84.00\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.09e+03 |\n", "| mean_reward | 372 |\n", "| rollout/ | |\n", "| exploration_rate | 0.01 |\n", "| time/ | |\n", "| total_timesteps | 9000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0733 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "0b7118b0", "metadata": { "execution": { "iopub.execute_input": "2024-05-16T16:51:37.422141Z", "iopub.status.busy": "2024-05-16T16:51:37.421476Z", "iopub.status.idle": "2024-05-16T16:52:28.989274Z", "shell.execute_reply": "2024-05-16T16:52:28.988245Z" }, "papermill": { "duration": 51.602337, "end_time": "2024-05-16T16:52:28.993033", "exception": false, "start_time": "2024-05-16T16:51:37.390696", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 177842018, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 13504.046841, "end_time": "2024-05-16T16:52:32.750081", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-16T13:07:28.703240", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }