diff --git "a/notebooks/dqn_pacmanv5_run2v3.ipynb" "b/notebooks/dqn_pacmanv5_run2v3.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/dqn_pacmanv5_run2v3.ipynb" @@ -0,0 +1,5169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "536a0475", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:56:02.774182Z", + "iopub.status.busy": "2024-05-10T21:56:02.773894Z", + "iopub.status.idle": "2024-05-10T21:57:24.594567Z", + "shell.execute_reply": "2024-05-10T21:57:24.593319Z" + }, + "papermill": { + "duration": 81.828617, + "end_time": "2024-05-10T21:57:24.597342", + "exception": false, + "start_time": "2024-05-10T21:56:02.768725", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install stable-baselines3[extra]\n", + "!pip install moviepy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "21e673cc", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:57:24.606314Z", + "iopub.status.busy": "2024-05-10T21:57:24.606004Z", + "iopub.status.idle": "2024-05-10T21:57:49.638696Z", + "shell.execute_reply": "2024-05-10T21:57:49.637818Z" + }, + "papermill": { + "duration": 25.039828, + "end_time": "2024-05-10T21:57:49.641081", + "exception": false, + "start_time": "2024-05-10T21:57:24.601253", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-10 21:57:35.588946: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-05-10 21:57:35.589049: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-05-10 21:57:35.863074: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" + ] + } + ], + "source": [ + "from stable_baselines3 import DQN\n", + "from stable_baselines3.common.monitor import Monitor\n", + "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", + "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", + "from stable_baselines3.common.evaluation import evaluate_policy\n", + "\n", + "from typing import Any, Dict\n", + "\n", + "import gymnasium as gym\n", + "import torch as th\n", + "import numpy as np\n", + "\n", + "# =====File names=====\n", + "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", + "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-2\"\n", + "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-2\"\n", + "\n", + "# =====Model Config=====\n", + "# Evaluate in 20ths\n", + "EVAL_CALLBACK_FREQ = 75_000\n", + "# Record in quarters (the last one won't record, will have to do manually)\n", + "VIDEO_CALLBACK_FREQ = 375_000\n", + "FRAMESKIP = 4\n", + "NUM_TIMESTEPS = 1_500_000\n", + "\n", + "# =====Hyperparams=====\n", + "EXPLORATION_FRACTION = 0.3\n", + "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", + "BUFFER_SIZE = 60_000\n", + "BATCH_SIZE = 64\n", + "LEARNING_STARTS = 50_000\n", + "LEARNING_RATE = 0.00005\n", + "GAMMA = 0.999\n", + "FINAL_EPSILON = 0.05\n", + "# Target Update Interval is set to 10k by default and looks like it is set to \n", + "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", + "# is something different or measured differently...\n", + "TARGET_UPDATE_INTERVAL = 1_000\n", + "\n", + "# =====Custom objects for hyperparam modification=====\n", + "CUSTOM_OBJECTS = {\n", + " \"exploration_fraction\": EXPLORATION_FRACTION, \n", + " \"buffer_size\": BUFFER_SIZE,\n", + " \"batch_size\": BATCH_SIZE,\n", + " \"learning_starts\": LEARNING_STARTS,\n", + " \"learning_rate\": LEARNING_RATE,\n", + " \"gamma\": GAMMA,\n", + " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", + " \"exploration_final_eps\": FINAL_EPSILON,\n", + " \"tensorboard_log\": \"./\",\n", + " \"verbose\": 1}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3ceb5255", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:57:49.650876Z", + "iopub.status.busy": "2024-05-10T21:57:49.649910Z", + "iopub.status.idle": "2024-05-10T21:57:49.660823Z", + "shell.execute_reply": "2024-05-10T21:57:49.659990Z" + }, + "papermill": { + "duration": 0.017558, + "end_time": "2024-05-10T21:57:49.662716", + "exception": false, + "start_time": "2024-05-10T21:57:49.645158", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# VideoRecorderCallback\n", + "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", + "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", + "# the training has been completed\n", + "\n", + "class VideoRecorderCallback(BaseCallback):\n", + " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", + " \"\"\"\n", + " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", + " :param eval_env: A gym environment from which the trajectory is recorded\n", + " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", + " :param n_eval_episodes: Number of episodes to render\n", + " :param deterministic: Whether to use deterministic or stochastic policy\n", + " \"\"\"\n", + " super().__init__()\n", + " self._eval_env = eval_env\n", + " self._render_freq = render_freq\n", + " self._n_eval_episodes = n_eval_episodes\n", + " self._deterministic = deterministic\n", + "\n", + " def _on_step(self) -> bool:\n", + " if self.n_calls % self._render_freq == 0:\n", + " screens = []\n", + "\n", + " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", + " \"\"\"\n", + " Renders the environment in its current state, recording the screen in the captured `screens` list\n", + " :param _locals: A dictionary containing all local variables of the callback's scope\n", + " :param _globals: A dictionary containing all global variables of the callback's scope\n", + " \"\"\"\n", + " screen = self._eval_env.render()\n", + " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", + " screens.append(screen.transpose(2, 0, 1))\n", + "\n", + " evaluate_policy(\n", + " self.model,\n", + " self._eval_env,\n", + " callback=grab_screens,\n", + " n_eval_episodes=self._n_eval_episodes,\n", + " deterministic=self._deterministic,\n", + " )\n", + " self.logger.record(\n", + " \"trajectory/video\",\n", + " Video(th.from_numpy(np.array([screens])), fps=60),\n", + " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", + " )\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "026f9ded", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:57:49.670923Z", + "iopub.status.busy": "2024-05-10T21:57:49.670483Z", + "iopub.status.idle": "2024-05-10T21:57:49.681645Z", + "shell.execute_reply": "2024-05-10T21:57:49.680856Z" + }, + "papermill": { + "duration": 0.017255, + "end_time": "2024-05-10T21:57:49.683446", + "exception": false, + "start_time": "2024-05-10T21:57:49.666191", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# HParamCallback\n", + "# This should log the hyperparameters specified and map the metrics that are logged to \n", + "# the appropriate run.\n", + "class HParamCallback(BaseCallback):\n", + " \"\"\"\n", + " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", + " \"\"\" \n", + " def __init__(self):\n", + " super().__init__()\n", + " \n", + "\n", + " def _on_training_start(self) -> None:\n", + " \n", + " hparam_dict = {\n", + " \"algorithm\": self.model.__class__.__name__,\n", + " \"policy\": self.model.policy.__class__.__name__,\n", + " \"environment\": self.model.env.__class__.__name__,\n", + " \"buffer_size\": self.model.buffer_size,\n", + " \"batch_size\": self.model.batch_size,\n", + " \"tau\": self.model.tau,\n", + " \"gradient_steps\": self.model.gradient_steps,\n", + " \"target_update_interval\": self.model.target_update_interval,\n", + " \"exploration_fraction\": self.model.exploration_fraction,\n", + " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", + " \"exploration_final_eps\": self.model.exploration_final_eps,\n", + " \"max_grad_norm\": self.model.max_grad_norm,\n", + " \"tensorboard_log\": self.model.tensorboard_log,\n", + " \"seed\": self.model.seed, \n", + " \"learning rate\": self.model.learning_rate,\n", + " \"gamma\": self.model.gamma, \n", + " }\n", + " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", + " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", + " metric_dict = {\n", + " \"eval/mean_ep_length\": 0,\n", + " \"eval/mean_reward\": 0,\n", + " \"rollout/ep_len_mean\": 0,\n", + " \"rollout/ep_rew_mean\": 0,\n", + " \"rollout/exploration_rate\": 0,\n", + " \"time/_episode_num\": 0,\n", + " \"time/fps\": 0,\n", + " \"time/total_timesteps\": 0,\n", + " \"train/learning_rate\": 0.0,\n", + " \"train/loss\": 0.0,\n", + " \"train/n_updates\": 0.0,\n", + " \"locals/rewards\": 0.0,\n", + " \"locals/infos_0_lives\": 0.0,\n", + " \"locals/num_collected_steps\": 0.0,\n", + " \"locals/num_collected_episodes\": 0.0\n", + " }\n", + " \n", + " self.logger.record(\n", + " \"hparams\",\n", + " HParam(hparam_dict, metric_dict),\n", + " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", + " )\n", + " \n", + " def _on_step(self) -> bool:\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8f76e49b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:57:49.691426Z", + "iopub.status.busy": "2024-05-10T21:57:49.690953Z", + "iopub.status.idle": "2024-05-10T21:57:49.705966Z", + "shell.execute_reply": "2024-05-10T21:57:49.705058Z" + }, + "papermill": { + "duration": 0.021092, + "end_time": "2024-05-10T21:57:49.707904", + "exception": false, + "start_time": "2024-05-10T21:57:49.686812", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# PlotTensorboardValuesCallback\n", + "# This callback should log values to tensorboard on every step. \n", + "# The self.logger class should plot a new scalar value when recording.\n", + "\n", + "class PlotTensorboardValuesCallback(BaseCallback):\n", + " \"\"\"\n", + " Custom callback for plotting additional values in tensorboard.\n", + " \"\"\"\n", + " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", + " super().__init__(verbose)\n", + " self._eval_env = eval_env\n", + " self._train_env = train_env\n", + " self._model = model\n", + "\n", + " def _on_training_start(self) -> None:\n", + " output_formats = self.logger.output_formats\n", + " # Save reference to tensorboard formatter object\n", + " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", + " try:\n", + " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", + " except:\n", + " print(\"Exception thrown in tb_formatter initialization.\") \n", + " \n", + " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + "\n", + " def _on_step(self) -> bool:\n", + " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", + " \n", + " return True\n", + " \n", + " def _on_training_end(self) -> None:\n", + " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()\n", + " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", + " self.tb_formatter.writer.flush()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "640c058f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:57:49.715734Z", + "iopub.status.busy": "2024-05-10T21:57:49.715409Z", + "iopub.status.idle": "2024-05-10T21:59:28.705015Z", + "shell.execute_reply": "2024-05-10T21:59:28.704068Z" + }, + "papermill": { + "duration": 98.996212, + "end_time": "2024-05-10T21:59:28.707551", + "exception": false, + "start_time": "2024-05-10T21:57:49.711339", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", + "[Powered by Stella]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wrapping the env with a `Monitor` wrapper\n", + "Wrapping the env in a DummyVecEnv.\n", + "Wrapping the env in a VecTransposeImage.\n" + ] + } + ], + "source": [ + "# make the training and evaluation environments\n", + "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", + "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", + "\n", + "# Make the model with specified hyperparams\n", + "# load the model\n", + "# load the buffer\n", + "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", + "model = DQN.load(\"/kaggle/input/dqn-pacman-run2v2/ALE-Pacman-v5.zip\", \n", + " env=train_env, \n", + " custom_objects=CUSTOM_OBJECTS)\n", + "model.load_replay_buffer(\"/kaggle/input/dqn-pacman-run2v2/dqn_replay_buffer_pacman_v2-1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "22ca5762", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:59:28.721521Z", + "iopub.status.busy": "2024-05-10T21:59:28.721238Z", + "iopub.status.idle": "2024-05-10T21:59:28.727816Z", + "shell.execute_reply": "2024-05-10T21:59:28.726805Z" + }, + "papermill": { + "duration": 0.013606, + "end_time": "2024-05-10T21:59:28.729986", + "exception": false, + "start_time": "2024-05-10T21:59:28.716380", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the callbacks and put them in a list\n", + "eval_callback = EvalCallback(\n", + " eval_env,\n", + " best_model_save_path=\"./best_model/\",\n", + " log_path=\"./evals/\",\n", + " eval_freq=EVAL_CALLBACK_FREQ,\n", + " n_eval_episodes=10,\n", + " deterministic=True,\n", + " render=False)\n", + "\n", + "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", + "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", + "hparam_callback = HParamCallback()\n", + "\n", + "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "648057ac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-10T21:59:28.738728Z", + "iopub.status.busy": "2024-05-10T21:59:28.738413Z", + "iopub.status.idle": "2024-05-11T01:41:04.585763Z", + "shell.execute_reply": "2024-05-11T01:41:04.584612Z" + }, + "papermill": { + "duration": 13295.854278, + "end_time": "2024-05-11T01:41:04.588098", + "exception": false, + "start_time": "2024-05-10T21:59:28.733820", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logging to ././tb/_0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", + " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 953 |\n", + "| ep_rew_mean | 88 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3300 |\n", + "| fps | 87 |\n", + "| time_elapsed | 11 |\n", + "| total_timesteps | 3001016 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.469 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 962 |\n", + "| ep_rew_mean | 90.3 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3304 |\n", + "| fps | 105 |\n", + "| time_elapsed | 50 |\n", + "| total_timesteps | 3005376 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.183 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 965 |\n", + "| ep_rew_mean | 93.6 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3308 |\n", + "| fps | 108 |\n", + "| time_elapsed | 84 |\n", + "| total_timesteps | 3009124 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.25 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 974 |\n", + "| ep_rew_mean | 97.8 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3312 |\n", + "| fps | 109 |\n", + "| time_elapsed | 123 |\n", + "| total_timesteps | 3013473 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.145 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 974 |\n", + "| ep_rew_mean | 98.7 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3316 |\n", + "| fps | 109 |\n", + "| time_elapsed | 158 |\n", + "| total_timesteps | 3017417 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.765 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 985 |\n", + "| ep_rew_mean | 102 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3320 |\n", + "| fps | 110 |\n", + "| time_elapsed | 199 |\n", + "| total_timesteps | 3022035 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.163 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 975 |\n", + "| ep_rew_mean | 107 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3324 |\n", + "| fps | 110 |\n", + "| time_elapsed | 233 |\n", + "| total_timesteps | 3025723 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.525 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 976 |\n", + "| ep_rew_mean | 108 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3328 |\n", + "| fps | 110 |\n", + "| time_elapsed | 270 |\n", + "| total_timesteps | 3029961 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.123 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 974 |\n", + "| ep_rew_mean | 111 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3332 |\n", + "| fps | 110 |\n", + "| time_elapsed | 308 |\n", + "| total_timesteps | 3034213 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.18 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 978 |\n", + "| ep_rew_mean | 115 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3336 |\n", + "| fps | 110 |\n", + "| time_elapsed | 345 |\n", + "| total_timesteps | 3038285 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.159 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 972 |\n", + "| ep_rew_mean | 116 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3340 |\n", + "| fps | 110 |\n", + "| time_elapsed | 379 |\n", + "| total_timesteps | 3042023 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0668 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.24e+03 |\n", + "| ep_rew_mean | 122 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3344 |\n", + "| fps | 111 |\n", + "| time_elapsed | 649 |\n", + "| total_timesteps | 3072342 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0839 |\n", + "----------------------------------\n", + "Eval num_timesteps=3075000, episode_reward=208.00 +/- 104.39\n", + "Episode length: 931.00 +/- 86.74\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 931 |\n", + "| mean_reward | 208 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3075000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0993 |\n", + "----------------------------------\n", + "New best mean reward!\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.24e+03 |\n", + "| ep_rew_mean | 126 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3348 |\n", + "| fps | 107 |\n", + "| time_elapsed | 706 |\n", + "| total_timesteps | 3076077 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0991 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.25e+03 |\n", + "| ep_rew_mean | 131 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3352 |\n", + "| fps | 107 |\n", + "| time_elapsed | 749 |\n", + "| total_timesteps | 3080943 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.109 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.26e+03 |\n", + "| ep_rew_mean | 134 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3356 |\n", + "| fps | 108 |\n", + "| time_elapsed | 785 |\n", + "| total_timesteps | 3084943 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0644 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.28e+03 |\n", + "| ep_rew_mean | 139 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3360 |\n", + "| fps | 108 |\n", + "| time_elapsed | 831 |\n", + "| total_timesteps | 3089999 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.041 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.28e+03 |\n", + "| ep_rew_mean | 143 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3364 |\n", + "| fps | 108 |\n", + "| time_elapsed | 873 |\n", + "| total_timesteps | 3094685 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0551 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.29e+03 |\n", + "| ep_rew_mean | 144 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3368 |\n", + "| fps | 108 |\n", + "| time_elapsed | 910 |\n", + "| total_timesteps | 3098783 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.322 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.29e+03 |\n", + "| ep_rew_mean | 142 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3372 |\n", + "| fps | 108 |\n", + "| time_elapsed | 944 |\n", + "| total_timesteps | 3102588 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0419 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.3e+03 |\n", + "| ep_rew_mean | 146 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3376 |\n", + "| fps | 108 |\n", + "| time_elapsed | 988 |\n", + "| total_timesteps | 3107492 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.123 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.31e+03 |\n", + "| ep_rew_mean | 152 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3380 |\n", + "| fps | 108 |\n", + "| time_elapsed | 1027 |\n", + "| total_timesteps | 3111815 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.404 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.31e+03 |\n", + "| ep_rew_mean | 157 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3384 |\n", + "| fps | 108 |\n", + "| time_elapsed | 1060 |\n", + "| total_timesteps | 3115521 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.118 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.31e+03 |\n", + "| ep_rew_mean | 156 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3388 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1099 |\n", + "| total_timesteps | 3119903 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0927 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3392 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1141 |\n", + "| total_timesteps | 3124629 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.149 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3396 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1180 |\n", + "| total_timesteps | 3128877 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.205 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 160 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3400 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1219 |\n", + "| total_timesteps | 3133205 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.328 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 158 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3404 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1261 |\n", + "| total_timesteps | 3137927 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.2 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3408 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1300 |\n", + "| total_timesteps | 3142273 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0668 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3412 |\n", + "| fps | 109 |\n", + "| time_elapsed | 1334 |\n", + "| total_timesteps | 3146099 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.699 |\n", + "----------------------------------\n", + "Eval num_timesteps=3150000, episode_reward=186.10 +/- 72.06\n", + "Episode length: 4162.20 +/- 7682.33\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 4.16e+03 |\n", + "| mean_reward | 186 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3150000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.111 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 160 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3416 |\n", + "| fps | 102 |\n", + "| time_elapsed | 1471 |\n", + "| total_timesteps | 3150231 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.176 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 166 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3420 |\n", + "| fps | 102 |\n", + "| time_elapsed | 1515 |\n", + "| total_timesteps | 3155147 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.163 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.34e+03 |\n", + "| ep_rew_mean | 167 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3424 |\n", + "| fps | 102 |\n", + "| time_elapsed | 1558 |\n", + "| total_timesteps | 3159929 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.113 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.34e+03 |\n", + "| ep_rew_mean | 172 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3428 |\n", + "| fps | 102 |\n", + "| time_elapsed | 1596 |\n", + "| total_timesteps | 3164208 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.447 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.35e+03 |\n", + "| ep_rew_mean | 170 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3432 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1637 |\n", + "| total_timesteps | 3168767 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.385 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.35e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3436 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1679 |\n", + "| total_timesteps | 3173425 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.474 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.36e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3440 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1717 |\n", + "| total_timesteps | 3177653 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.419 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.09e+03 |\n", + "| ep_rew_mean | 170 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3444 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1753 |\n", + "| total_timesteps | 3181742 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0663 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 171 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3448 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1789 |\n", + "| total_timesteps | 3185646 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0768 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.09e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3452 |\n", + "| fps | 103 |\n", + "| time_elapsed | 1829 |\n", + "| total_timesteps | 3190098 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.327 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 175 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3456 |\n", + "| fps | 104 |\n", + "| time_elapsed | 1871 |\n", + "| total_timesteps | 3194868 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0996 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.09e+03 |\n", + "| ep_rew_mean | 172 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3460 |\n", + "| fps | 104 |\n", + "| time_elapsed | 1911 |\n", + "| total_timesteps | 3199365 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.149 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3464 |\n", + "| fps | 104 |\n", + "| time_elapsed | 1956 |\n", + "| total_timesteps | 3204383 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.315 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 175 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3468 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2000 |\n", + "| total_timesteps | 3209264 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0518 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 179 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3472 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2041 |\n", + "| total_timesteps | 3213887 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.388 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 177 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3476 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2077 |\n", + "| total_timesteps | 3217888 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.18 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 175 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3480 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2117 |\n", + "| total_timesteps | 3222302 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.13 |\n", + "----------------------------------\n", + "Eval num_timesteps=3225000, episode_reward=270.80 +/- 121.72\n", + "Episode length: 1096.40 +/- 217.57\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.1e+03 |\n", + "| mean_reward | 271 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3225000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.114 |\n", + "----------------------------------\n", + "New best mean reward!\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3484 |\n", + "| fps | 103 |\n", + "| time_elapsed | 2185 |\n", + "| total_timesteps | 3226908 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0847 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 177 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3488 |\n", + "| fps | 103 |\n", + "| time_elapsed | 2227 |\n", + "| total_timesteps | 3231548 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.155 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 180 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3492 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2261 |\n", + "| total_timesteps | 3235411 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.137 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 180 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3496 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2300 |\n", + "| total_timesteps | 3239711 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.329 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 181 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3500 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2351 |\n", + "| total_timesteps | 3245451 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0808 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 180 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3504 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2392 |\n", + "| total_timesteps | 3250026 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.115 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 177 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3508 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2426 |\n", + "| total_timesteps | 3253824 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.141 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 181 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3512 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2473 |\n", + "| total_timesteps | 3259154 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.134 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 177 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3516 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2517 |\n", + "| total_timesteps | 3264074 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0586 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3520 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2562 |\n", + "| total_timesteps | 3269066 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0767 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3524 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2609 |\n", + "| total_timesteps | 3274332 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0772 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 165 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3528 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2652 |\n", + "| total_timesteps | 3279168 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0604 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 167 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3532 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2691 |\n", + "| total_timesteps | 3283458 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0755 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3536 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2736 |\n", + "| total_timesteps | 3288592 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.117 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 167 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3540 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2775 |\n", + "| total_timesteps | 3292912 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0786 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 164 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3544 |\n", + "| fps | 105 |\n", + "| time_elapsed | 2819 |\n", + "| total_timesteps | 3297801 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.769 |\n", + "----------------------------------\n", + "Eval num_timesteps=3300000, episode_reward=147.70 +/- 63.64\n", + "Episode length: 1203.00 +/- 228.16\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.2e+03 |\n", + "| mean_reward | 148 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3300000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.141 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 162 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3548 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2897 |\n", + "| total_timesteps | 3303207 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.197 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 161 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3552 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2943 |\n", + "| total_timesteps | 3308334 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.281 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 158 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3556 |\n", + "| fps | 104 |\n", + "| time_elapsed | 2981 |\n", + "| total_timesteps | 3312592 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0784 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 156 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3560 |\n", + "| fps | 104 |\n", + "| time_elapsed | 3021 |\n", + "| total_timesteps | 3317017 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0725 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 152 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3564 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3054 |\n", + "| total_timesteps | 3320755 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 149 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3568 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3089 |\n", + "| total_timesteps | 3324621 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.115 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 147 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3572 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3128 |\n", + "| total_timesteps | 3329001 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.281 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 147 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3576 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3165 |\n", + "| total_timesteps | 3333061 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.311 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 151 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3580 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3204 |\n", + "| total_timesteps | 3337452 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.27 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 153 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3584 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3249 |\n", + "| total_timesteps | 3342402 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.24 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 152 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3588 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3289 |\n", + "| total_timesteps | 3346834 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.188 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 150 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3592 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3336 |\n", + "| total_timesteps | 3352098 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.569 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 153 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3596 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3379 |\n", + "| total_timesteps | 3356900 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.49 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 154 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3600 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3421 |\n", + "| total_timesteps | 3361592 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.609 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 160 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3604 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3473 |\n", + "| total_timesteps | 3367384 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.115 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 158 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3608 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3521 |\n", + "| total_timesteps | 3372676 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.124 |\n", + "----------------------------------\n", + "Eval num_timesteps=3375000, episode_reward=67.00 +/- 21.59\n", + "Episode length: 996.50 +/- 147.15\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 996 |\n", + "| mean_reward | 67 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3375000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.858 |\n", + "----------------------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", + " logger.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3612 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3596 |\n", + "| total_timesteps | 3378339 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.13 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 156 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3616 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3645 |\n", + "| total_timesteps | 3383354 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.103 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3620 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3683 |\n", + "| total_timesteps | 3387970 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.163 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 160 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3624 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3720 |\n", + "| total_timesteps | 3392498 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.149 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 161 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3628 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3755 |\n", + "| total_timesteps | 3396690 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.196 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 162 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3632 |\n", + "| fps | 105 |\n", + "| time_elapsed | 3795 |\n", + "| total_timesteps | 3401577 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.15 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.42e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3636 |\n", + "| fps | 106 |\n", + "| time_elapsed | 4035 |\n", + "| total_timesteps | 3431044 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.362 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.42e+03 |\n", + "| ep_rew_mean | 159 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3640 |\n", + "| fps | 106 |\n", + "| time_elapsed | 4064 |\n", + "| total_timesteps | 3434572 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0964 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3644 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4091 |\n", + "| total_timesteps | 3437924 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.198 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 151 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3648 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4118 |\n", + "| total_timesteps | 3441232 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.163 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 151 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3652 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4150 |\n", + "| total_timesteps | 3445120 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0879 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 153 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3656 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4183 |\n", + "| total_timesteps | 3449150 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.372 |\n", + "----------------------------------\n", + "Eval num_timesteps=3450000, episode_reward=139.50 +/- 58.55\n", + "Episode length: 1121.00 +/- 100.81\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.12e+03 |\n", + "| mean_reward | 140 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3450000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.262 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3660 |\n", + "| fps | 106 |\n", + "| time_elapsed | 4247 |\n", + "| total_timesteps | 3453596 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0966 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3664 |\n", + "| fps | 106 |\n", + "| time_elapsed | 4284 |\n", + "| total_timesteps | 3458164 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.193 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 156 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3668 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4320 |\n", + "| total_timesteps | 3462504 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.131 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 157 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3672 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4356 |\n", + "| total_timesteps | 3466934 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0864 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.39e+03 |\n", + "| ep_rew_mean | 158 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3676 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4394 |\n", + "| total_timesteps | 3471644 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.194 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 152 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3680 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4428 |\n", + "| total_timesteps | 3475694 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.204 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 152 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3684 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4461 |\n", + "| total_timesteps | 3479764 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0645 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 153 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3688 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4496 |\n", + "| total_timesteps | 3484018 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.329 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.36e+03 |\n", + "| ep_rew_mean | 150 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3692 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4530 |\n", + "| total_timesteps | 3488208 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.263 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.35e+03 |\n", + "| ep_rew_mean | 146 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3696 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4563 |\n", + "| total_timesteps | 3492267 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.179 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.35e+03 |\n", + "| ep_rew_mean | 144 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3700 |\n", + "| fps | 107 |\n", + "| time_elapsed | 4600 |\n", + "| total_timesteps | 3496817 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.151 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.34e+03 |\n", + "| ep_rew_mean | 143 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3704 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4636 |\n", + "| total_timesteps | 3501167 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.358 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 144 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3708 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4670 |\n", + "| total_timesteps | 3505329 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.243 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 146 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3712 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4709 |\n", + "| total_timesteps | 3510139 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.279 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 146 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3716 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4749 |\n", + "| total_timesteps | 3515054 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.216 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 145 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3720 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4793 |\n", + "| total_timesteps | 3520476 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.155 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 140 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3724 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4829 |\n", + "| total_timesteps | 3524793 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.178 |\n", + "----------------------------------\n", + "Eval num_timesteps=3525000, episode_reward=231.70 +/- 79.90\n", + "Episode length: 1138.20 +/- 122.98\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.14e+03 |\n", + "| mean_reward | 232 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3525000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.178 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.32e+03 |\n", + "| ep_rew_mean | 136 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3728 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4891 |\n", + "| total_timesteps | 3529123 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.124 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.33e+03 |\n", + "| ep_rew_mean | 132 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3732 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4932 |\n", + "| total_timesteps | 3534084 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.137 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.07e+03 |\n", + "| ep_rew_mean | 133 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3736 |\n", + "| fps | 108 |\n", + "| time_elapsed | 4965 |\n", + "| total_timesteps | 3538100 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.465 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.08e+03 |\n", + "| ep_rew_mean | 137 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3740 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5006 |\n", + "| total_timesteps | 3543054 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.206 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 139 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3744 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5045 |\n", + "| total_timesteps | 3547769 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.217 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 142 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3748 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5084 |\n", + "| total_timesteps | 3552541 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.175 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 141 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3752 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5124 |\n", + "| total_timesteps | 3557411 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.241 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 139 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3756 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5162 |\n", + "| total_timesteps | 3562105 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.178 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 136 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3760 |\n", + "| fps | 108 |\n", + "| time_elapsed | 5191 |\n", + "| total_timesteps | 3565591 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.21 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 138 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3764 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5227 |\n", + "| total_timesteps | 3570055 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.186 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 137 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3768 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5263 |\n", + "| total_timesteps | 3574396 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.154 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 135 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3772 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5293 |\n", + "| total_timesteps | 3578034 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0891 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 137 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3776 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5335 |\n", + "| total_timesteps | 3583181 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.358 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 137 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3780 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5370 |\n", + "| total_timesteps | 3587535 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.224 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 138 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3784 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5409 |\n", + "| total_timesteps | 3592280 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.106 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 135 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3788 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5445 |\n", + "| total_timesteps | 3596632 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.327 |\n", + "----------------------------------\n", + "Eval num_timesteps=3600000, episode_reward=297.00 +/- 63.53\n", + "Episode length: 1166.70 +/- 204.03\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.17e+03 |\n", + "| mean_reward | 297 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3600000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.409 |\n", + "----------------------------------\n", + "New best mean reward!\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 138 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3792 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5517 |\n", + "| total_timesteps | 3601836 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.165 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 139 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3796 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5552 |\n", + "| total_timesteps | 3606184 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.141 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 138 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3800 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5590 |\n", + "| total_timesteps | 3610856 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.154 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 137 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3804 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5631 |\n", + "| total_timesteps | 3615786 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0843 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 138 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3808 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5670 |\n", + "| total_timesteps | 3620562 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.149 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 140 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3812 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5717 |\n", + "| total_timesteps | 3626290 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.427 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 145 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3816 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5755 |\n", + "| total_timesteps | 3630975 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.192 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 150 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3820 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5796 |\n", + "| total_timesteps | 3635945 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.102 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 155 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3824 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5837 |\n", + "| total_timesteps | 3640950 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.319 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 160 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3828 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5877 |\n", + "| total_timesteps | 3645750 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.162 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 158 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3832 |\n", + "| fps | 109 |\n", + "| time_elapsed | 5911 |\n", + "| total_timesteps | 3649904 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.108 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 164 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3836 |\n", + "| fps | 110 |\n", + "| time_elapsed | 5945 |\n", + "| total_timesteps | 3654031 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.933 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 164 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3840 |\n", + "| fps | 110 |\n", + "| time_elapsed | 5989 |\n", + "| total_timesteps | 3659351 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0665 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 170 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3844 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6031 |\n", + "| total_timesteps | 3664461 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0967 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3848 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6060 |\n", + "| total_timesteps | 3668049 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.134 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3852 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6095 |\n", + "| total_timesteps | 3672283 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.598 |\n", + "----------------------------------\n", + "Eval num_timesteps=3675000, episode_reward=336.20 +/- 65.54\n", + "Episode length: 1168.00 +/- 256.37\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.17e+03 |\n", + "| mean_reward | 336 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3675000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.873 |\n", + "----------------------------------\n", + "New best mean reward!\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 172 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3856 |\n", + "| fps | 109 |\n", + "| time_elapsed | 6162 |\n", + "| total_timesteps | 3676755 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.192 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 175 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3860 |\n", + "| fps | 109 |\n", + "| time_elapsed | 6197 |\n", + "| total_timesteps | 3680979 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.27 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3864 |\n", + "| fps | 109 |\n", + "| time_elapsed | 6235 |\n", + "| total_timesteps | 3685617 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 176 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3868 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6278 |\n", + "| total_timesteps | 3690768 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.203 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 178 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3872 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6314 |\n", + "| total_timesteps | 3695117 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.196 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 181 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3876 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6355 |\n", + "| total_timesteps | 3700145 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.16 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 187 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3880 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6391 |\n", + "| total_timesteps | 3704529 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.107 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 186 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3884 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6433 |\n", + "| total_timesteps | 3709595 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0936 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 188 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3888 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6478 |\n", + "| total_timesteps | 3715161 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.257 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 188 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3892 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6527 |\n", + "| total_timesteps | 3721086 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.049 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 192 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3896 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6571 |\n", + "| total_timesteps | 3726448 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.314 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 198 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3900 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6609 |\n", + "| total_timesteps | 3730992 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.127 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 198 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3904 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6643 |\n", + "| total_timesteps | 3735152 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.156 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 199 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3908 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6685 |\n", + "| total_timesteps | 3740213 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.478 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 198 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3912 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6726 |\n", + "| total_timesteps | 3745249 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0895 |\n", + "----------------------------------\n", + "Eval num_timesteps=3750000, episode_reward=178.30 +/- 80.17\n", + "Episode length: 1179.00 +/- 108.40\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.18e+03 |\n", + "| mean_reward | 178 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3750000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0705 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 194 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3916 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6798 |\n", + "| total_timesteps | 3750130 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0779 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 193 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3920 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6847 |\n", + "| total_timesteps | 3755350 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 2.54 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 190 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3924 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6884 |\n", + "| total_timesteps | 3759897 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 190 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3928 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6924 |\n", + "| total_timesteps | 3764727 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.14 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 195 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3932 |\n", + "| fps | 110 |\n", + "| time_elapsed | 6960 |\n", + "| total_timesteps | 3769189 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.815 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 192 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3936 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7000 |\n", + "| total_timesteps | 3774035 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.139 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 187 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3940 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7038 |\n", + "| total_timesteps | 3778693 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.205 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 183 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3944 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7071 |\n", + "| total_timesteps | 3782819 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0804 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 189 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3948 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7321 |\n", + "| total_timesteps | 3813422 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0872 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 187 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3952 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7355 |\n", + "| total_timesteps | 3817624 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.167 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 190 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3956 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7392 |\n", + "| total_timesteps | 3822080 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0481 |\n", + "----------------------------------\n", + "Eval num_timesteps=3825000, episode_reward=291.40 +/- 118.67\n", + "Episode length: 1061.00 +/- 98.07\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.06e+03 |\n", + "| mean_reward | 291 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3825000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0784 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 193 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3960 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7452 |\n", + "| total_timesteps | 3826228 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0676 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 197 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3964 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7488 |\n", + "| total_timesteps | 3830620 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0738 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 199 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3968 |\n", + "| fps | 110 |\n", + "| time_elapsed | 7527 |\n", + "| total_timesteps | 3835389 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0534 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.44e+03 |\n", + "| ep_rew_mean | 199 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3972 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7554 |\n", + "| total_timesteps | 3838692 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.253 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.43e+03 |\n", + "| ep_rew_mean | 200 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3976 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7590 |\n", + "| total_timesteps | 3843094 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.176 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.43e+03 |\n", + "| ep_rew_mean | 199 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3980 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7623 |\n", + "| total_timesteps | 3847112 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.051 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.43e+03 |\n", + "| ep_rew_mean | 207 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3984 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7667 |\n", + "| total_timesteps | 3852617 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.146 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.42e+03 |\n", + "| ep_rew_mean | 207 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3988 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7704 |\n", + "| total_timesteps | 3857101 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.35 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 208 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3992 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7746 |\n", + "| total_timesteps | 3862315 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.12 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 212 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 3996 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7783 |\n", + "| total_timesteps | 3866815 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0721 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4000 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7827 |\n", + "| total_timesteps | 3872207 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0977 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.42e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4004 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7863 |\n", + "| total_timesteps | 3876663 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0597 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 218 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4008 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7902 |\n", + "| total_timesteps | 3881433 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.136 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 221 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4012 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7941 |\n", + "| total_timesteps | 3886139 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.338 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 226 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4016 |\n", + "| fps | 111 |\n", + "| time_elapsed | 7979 |\n", + "| total_timesteps | 3890864 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0728 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 221 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4020 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8017 |\n", + "| total_timesteps | 3895462 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.198 |\n", + "----------------------------------\n", + "Eval num_timesteps=3900000, episode_reward=237.40 +/- 97.81\n", + "Episode length: 1250.00 +/- 235.18\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.25e+03 |\n", + "| mean_reward | 237 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3900000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.313 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.41e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4024 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8088 |\n", + "| total_timesteps | 3900544 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.427 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4028 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8122 |\n", + "| total_timesteps | 3904683 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.214 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4032 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8158 |\n", + "| total_timesteps | 3909020 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.172 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4036 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8200 |\n", + "| total_timesteps | 3914174 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.246 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 231 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4040 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8233 |\n", + "| total_timesteps | 3918200 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.179 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.39e+03 |\n", + "| ep_rew_mean | 234 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4044 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8264 |\n", + "| total_timesteps | 3921982 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0812 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 233 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4048 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8303 |\n", + "| total_timesteps | 3926817 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.667 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4052 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8337 |\n", + "| total_timesteps | 3931017 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0873 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 236 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4056 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8378 |\n", + "| total_timesteps | 3935997 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0962 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4060 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8415 |\n", + "| total_timesteps | 3940503 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.117 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4064 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8454 |\n", + "| total_timesteps | 3945308 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.421 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 232 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4068 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8487 |\n", + "| total_timesteps | 3949294 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.105 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 237 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4072 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8531 |\n", + "| total_timesteps | 3954694 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 2.88 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4076 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8571 |\n", + "| total_timesteps | 3959620 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.136 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 241 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4080 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8613 |\n", + "| total_timesteps | 3964690 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.271 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 230 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4084 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8649 |\n", + "| total_timesteps | 3969162 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.132 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 230 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4088 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8690 |\n", + "| total_timesteps | 3974190 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "Eval num_timesteps=3975000, episode_reward=66.50 +/- 43.81\n", + "Episode length: 831.00 +/- 78.18\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 831 |\n", + "| mean_reward | 66.5 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 3975000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 231 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4092 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8748 |\n", + "| total_timesteps | 3978793 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.167 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4096 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8787 |\n", + "| total_timesteps | 3983603 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.142 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4100 |\n", + "| fps | 111 |\n", + "| time_elapsed | 8826 |\n", + "| total_timesteps | 3988305 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.157 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 218 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4104 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8862 |\n", + "| total_timesteps | 3992719 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.682 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 215 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4108 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8904 |\n", + "| total_timesteps | 3997887 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.293 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4112 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8947 |\n", + "| total_timesteps | 4003137 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.184 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 212 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4116 |\n", + "| fps | 112 |\n", + "| time_elapsed | 8984 |\n", + "| total_timesteps | 4007658 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.111 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 215 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4120 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9027 |\n", + "| total_timesteps | 4012930 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.133 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 218 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4124 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9071 |\n", + "| total_timesteps | 4018316 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.171 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 221 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4128 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9108 |\n", + "| total_timesteps | 4022777 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.146 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 216 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4132 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9140 |\n", + "| total_timesteps | 4026671 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.523 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 208 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4136 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9168 |\n", + "| total_timesteps | 4030087 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.451 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 206 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4140 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9201 |\n", + "| total_timesteps | 4034102 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.251 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 200 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4144 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9241 |\n", + "| total_timesteps | 4039008 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.121 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 195 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4148 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9278 |\n", + "| total_timesteps | 4043596 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.054 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 192 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4152 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9311 |\n", + "| total_timesteps | 4047666 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.12 |\n", + "----------------------------------\n", + "Eval num_timesteps=4050000, episode_reward=103.20 +/- 95.76\n", + "Episode length: 1099.80 +/- 209.71\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.1e+03 |\n", + "| mean_reward | 103 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4050000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.268 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 186 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4156 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9376 |\n", + "| total_timesteps | 4052322 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.101 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 181 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4160 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9412 |\n", + "| total_timesteps | 4056758 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.282 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 176 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4164 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9445 |\n", + "| total_timesteps | 4060744 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0935 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4168 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9484 |\n", + "| total_timesteps | 4065502 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.101 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 166 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4172 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9513 |\n", + "| total_timesteps | 4069074 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.178 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 166 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4176 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9552 |\n", + "| total_timesteps | 4073820 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.12 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 163 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4180 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9590 |\n", + "| total_timesteps | 4078474 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0574 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4184 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9628 |\n", + "| total_timesteps | 4083179 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.128 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4188 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9666 |\n", + "| total_timesteps | 4087717 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0583 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4192 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9702 |\n", + "| total_timesteps | 4092159 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.27 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 169 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4196 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9749 |\n", + "| total_timesteps | 4097933 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0987 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 170 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4200 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9787 |\n", + "| total_timesteps | 4102585 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.208 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 174 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4204 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9828 |\n", + "| total_timesteps | 4107604 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.171 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 173 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4208 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9870 |\n", + "| total_timesteps | 4112749 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.135 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 176 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4212 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9911 |\n", + "| total_timesteps | 4117694 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0774 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 176 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4216 |\n", + "| fps | 112 |\n", + "| time_elapsed | 9953 |\n", + "| total_timesteps | 4122898 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0458 |\n", + "----------------------------------\n", + "Eval num_timesteps=4125000, episode_reward=242.90 +/- 82.48\n", + "Episode length: 1305.50 +/- 195.95\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.31e+03 |\n", + "| mean_reward | 243 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4125000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.229 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 173 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4220 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10028 |\n", + "| total_timesteps | 4127816 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.137 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 171 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4224 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10068 |\n", + "| total_timesteps | 4131971 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.114 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 166 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4228 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10108 |\n", + "| total_timesteps | 4136886 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.104 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 168 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4232 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10144 |\n", + "| total_timesteps | 4141296 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.157 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 176 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4236 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10184 |\n", + "| total_timesteps | 4146198 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0935 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 179 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4240 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10223 |\n", + "| total_timesteps | 4150914 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.117 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 184 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4244 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10261 |\n", + "| total_timesteps | 4155542 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.14 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 187 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4248 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10295 |\n", + "| total_timesteps | 4159724 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.28 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 189 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4252 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10329 |\n", + "| total_timesteps | 4164000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0635 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 196 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4256 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10364 |\n", + "| total_timesteps | 4168266 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0836 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 198 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4260 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10406 |\n", + "| total_timesteps | 4173384 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.11 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 205 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4264 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10446 |\n", + "| total_timesteps | 4178297 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.195 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 210 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4268 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10490 |\n", + "| total_timesteps | 4183638 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.167 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 214 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4272 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10527 |\n", + "| total_timesteps | 4188130 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.346 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4276 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10566 |\n", + "| total_timesteps | 4192918 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0573 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 207 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4280 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10605 |\n", + "| total_timesteps | 4197588 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.575 |\n", + "----------------------------------\n", + "Eval num_timesteps=4200000, episode_reward=95.90 +/- 37.91\n", + "Episode length: 1303.90 +/- 154.47\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.3e+03 |\n", + "| mean_reward | 95.9 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4200000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.153 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4284 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10882 |\n", + "| total_timesteps | 4227700 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.35 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.44e+03 |\n", + "| ep_rew_mean | 215 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4288 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10917 |\n", + "| total_timesteps | 4232038 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0596 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.45e+03 |\n", + "| ep_rew_mean | 213 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4292 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10955 |\n", + "| total_timesteps | 4236698 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0933 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.43e+03 |\n", + "| ep_rew_mean | 214 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4296 |\n", + "| fps | 112 |\n", + "| time_elapsed | 10991 |\n", + "| total_timesteps | 4241048 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0324 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.43e+03 |\n", + "| ep_rew_mean | 219 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4300 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11028 |\n", + "| total_timesteps | 4245586 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0444 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.42e+03 |\n", + "| ep_rew_mean | 226 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4304 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11060 |\n", + "| total_timesteps | 4249431 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0635 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.4e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4308 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11089 |\n", + "| total_timesteps | 4253022 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0392 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.39e+03 |\n", + "| ep_rew_mean | 228 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4312 |\n", + "| fps | 113 |\n", + "| time_elapsed | 11121 |\n", + "| total_timesteps | 4256946 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.43 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 227 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4316 |\n", + "| fps | 113 |\n", + "| time_elapsed | 11154 |\n", + "| total_timesteps | 4260876 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.17 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 226 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4320 |\n", + "| fps | 113 |\n", + "| time_elapsed | 11187 |\n", + "| total_timesteps | 4264896 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0656 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4324 |\n", + "| fps | 113 |\n", + "| time_elapsed | 11227 |\n", + "| total_timesteps | 4269880 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.204 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4328 |\n", + "| fps | 113 |\n", + "| time_elapsed | 11268 |\n", + "| total_timesteps | 4274896 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.67 |\n", + "----------------------------------\n", + "Eval num_timesteps=4275000, episode_reward=248.20 +/- 134.43\n", + "Episode length: 3895.90 +/- 7701.30\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 3.9e+03 |\n", + "| mean_reward | 248 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4275000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.114 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 230 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4332 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11391 |\n", + "| total_timesteps | 4278465 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0533 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 230 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4336 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11435 |\n", + "| total_timesteps | 4283839 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.088 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 232 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4340 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11476 |\n", + "| total_timesteps | 4288894 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0715 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 233 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4344 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11512 |\n", + "| total_timesteps | 4293289 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.263 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 236 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4348 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11546 |\n", + "| total_timesteps | 4297424 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0843 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 236 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4352 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11587 |\n", + "| total_timesteps | 4302342 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.873 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.39e+03 |\n", + "| ep_rew_mean | 232 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4356 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11628 |\n", + "| total_timesteps | 4307348 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0952 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 234 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4360 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11663 |\n", + "| total_timesteps | 4311572 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.126 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.38e+03 |\n", + "| ep_rew_mean | 233 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4364 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11700 |\n", + "| total_timesteps | 4316178 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0642 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 231 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4368 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11736 |\n", + "| total_timesteps | 4320485 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.146 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 233 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4372 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11775 |\n", + "| total_timesteps | 4325277 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.102 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.36e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4376 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11808 |\n", + "| total_timesteps | 4329365 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.183 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.37e+03 |\n", + "| ep_rew_mean | 237 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4380 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11849 |\n", + "| total_timesteps | 4334371 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0539 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 231 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4384 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11889 |\n", + "| total_timesteps | 4339247 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.146 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4388 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11928 |\n", + "| total_timesteps | 4343983 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.229 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 235 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4392 |\n", + "| fps | 112 |\n", + "| time_elapsed | 11971 |\n", + "| total_timesteps | 4349241 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.695 |\n", + "----------------------------------\n", + "Eval num_timesteps=4350000, episode_reward=258.40 +/- 107.18\n", + "Episode length: 1256.90 +/- 159.73\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.26e+03 |\n", + "| mean_reward | 258 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4350000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0545 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 236 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4396 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12043 |\n", + "| total_timesteps | 4354321 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.567 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 234 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4400 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12086 |\n", + "| total_timesteps | 4359611 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.253 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.15e+03 |\n", + "| ep_rew_mean | 228 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4404 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12122 |\n", + "| total_timesteps | 4364037 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.33 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 228 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4408 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12162 |\n", + "| total_timesteps | 4368905 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.366 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.17e+03 |\n", + "| ep_rew_mean | 224 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4412 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12206 |\n", + "| total_timesteps | 4374277 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.113 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4416 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12249 |\n", + "| total_timesteps | 4379587 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0764 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 227 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4420 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12284 |\n", + "| total_timesteps | 4383842 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0869 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4424 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12326 |\n", + "| total_timesteps | 4388932 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0827 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4428 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12362 |\n", + "| total_timesteps | 4393406 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.114 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 229 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4432 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12400 |\n", + "| total_timesteps | 4398014 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0306 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 224 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4436 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12435 |\n", + "| total_timesteps | 4402280 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 1.35 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4440 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12476 |\n", + "| total_timesteps | 4407274 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0949 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 221 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4444 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12518 |\n", + "| total_timesteps | 4412386 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.1 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4448 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12555 |\n", + "| total_timesteps | 4416932 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0498 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4452 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12596 |\n", + "| total_timesteps | 4421926 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.121 |\n", + "----------------------------------\n", + "Eval num_timesteps=4425000, episode_reward=138.70 +/- 30.61\n", + "Episode length: 1228.60 +/- 233.70\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.23e+03 |\n", + "| mean_reward | 139 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4425000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.194 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 221 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4456 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12659 |\n", + "| total_timesteps | 4426048 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.162 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 218 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4460 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12695 |\n", + "| total_timesteps | 4430468 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0603 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4464 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12734 |\n", + "| total_timesteps | 4435153 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0892 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 223 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4468 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12773 |\n", + "| total_timesteps | 4439905 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.238 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 224 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4472 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12812 |\n", + "| total_timesteps | 4444711 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0677 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.2e+03 |\n", + "| ep_rew_mean | 222 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4476 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12850 |\n", + "| total_timesteps | 4449369 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0522 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.19e+03 |\n", + "| ep_rew_mean | 225 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4480 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12886 |\n", + "| total_timesteps | 4453814 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.151 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.18e+03 |\n", + "| ep_rew_mean | 219 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4484 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12913 |\n", + "| total_timesteps | 4457108 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0978 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.16e+03 |\n", + "| ep_rew_mean | 214 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4488 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12936 |\n", + "| total_timesteps | 4459859 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0614 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 207 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4492 |\n", + "| fps | 112 |\n", + "| time_elapsed | 12968 |\n", + "| total_timesteps | 4463729 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0981 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.14e+03 |\n", + "| ep_rew_mean | 206 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4496 |\n", + "| fps | 112 |\n", + "| time_elapsed | 13004 |\n", + "| total_timesteps | 4468138 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.181 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 211 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4500 |\n", + "| fps | 112 |\n", + "| time_elapsed | 13037 |\n", + "| total_timesteps | 4472184 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0693 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.13e+03 |\n", + "| ep_rew_mean | 212 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4504 |\n", + "| fps | 112 |\n", + "| time_elapsed | 13078 |\n", + "| total_timesteps | 4477178 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.109 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 212 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4508 |\n", + "| fps | 112 |\n", + "| time_elapsed | 13113 |\n", + "| total_timesteps | 4481382 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0498 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.12e+03 |\n", + "| ep_rew_mean | 215 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4512 |\n", + "| fps | 112 |\n", + "| time_elapsed | 13150 |\n", + "| total_timesteps | 4486008 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0818 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 217 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4516 |\n", + "| fps | 113 |\n", + "| time_elapsed | 13187 |\n", + "| total_timesteps | 4490476 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.271 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.11e+03 |\n", + "| ep_rew_mean | 214 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4520 |\n", + "| fps | 113 |\n", + "| time_elapsed | 13221 |\n", + "| total_timesteps | 4494638 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.152 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 1.1e+03 |\n", + "| ep_rew_mean | 211 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 4524 |\n", + "| fps | 113 |\n", + "| time_elapsed | 13256 |\n", + "| total_timesteps | 4498933 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0756 |\n", + "----------------------------------\n", + "Eval num_timesteps=4500000, episode_reward=218.00 +/- 135.33\n", + "Episode length: 1142.60 +/- 208.50\n", + "----------------------------------\n", + "| eval/ | |\n", + "| mean_ep_length | 1.14e+03 |\n", + "| mean_reward | 218 |\n", + "| rollout/ | |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| total_timesteps | 4500000 |\n", + "| train/ | |\n", + "| learning_rate | 5e-05 |\n", + "| loss | 0.0991 |\n", + "----------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Train the model\n", + "model.learn(total_timesteps=NUM_TIMESTEPS, \n", + " callback=callback_list, \n", + " tb_log_name=\"./tb/\", \n", + " reset_num_timesteps=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bda94f2d", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-11T01:41:04.663130Z", + "iopub.status.busy": "2024-05-11T01:41:04.662323Z", + "iopub.status.idle": "2024-05-11T01:41:57.585374Z", + "shell.execute_reply": "2024-05-11T01:41:57.584165Z" + }, + "papermill": { + "duration": 52.966023, + "end_time": "2024-05-11T01:41:57.588327", + "exception": false, + "start_time": "2024-05-11T01:41:04.622304", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Save the model, policy, and replay buffer for future loading and training\n", + "model.save(MODEL_FILE_NAME)\n", + "model.save_replay_buffer(BUFFER_FILE_NAME)\n", + "model.policy.save(POLICY_FILE_NAME)" + ] + } + ], + "metadata": { + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 176858961, + "sourceType": "kernelVersion" + } + ], + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "papermill": { + "default_parameters": {}, + "duration": 13561.860896, + "end_time": "2024-05-11T01:42:00.659070", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2024-05-10T21:55:58.798174", + "version": "2.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}