{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "536a0475", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:56:02.774182Z", "iopub.status.busy": "2024-05-10T21:56:02.773894Z", "iopub.status.idle": "2024-05-10T21:57:24.594567Z", "shell.execute_reply": "2024-05-10T21:57:24.593319Z" }, "papermill": { "duration": 81.828617, "end_time": "2024-05-10T21:57:24.597342", "exception": false, "start_time": "2024-05-10T21:56:02.768725", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "21e673cc", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:57:24.606314Z", "iopub.status.busy": "2024-05-10T21:57:24.606004Z", "iopub.status.idle": "2024-05-10T21:57:49.638696Z", "shell.execute_reply": "2024-05-10T21:57:49.637818Z" }, "papermill": { "duration": 25.039828, "end_time": "2024-05-10T21:57:49.641081", "exception": false, "start_time": "2024-05-10T21:57:24.601253", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-10 21:57:35.588946: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-10 21:57:35.589049: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-10 21:57:35.863074: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-2\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-2\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 75_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "VIDEO_CALLBACK_FREQ = 375_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_500_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "BUFFER_SIZE = 60_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.05\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "3ceb5255", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:57:49.650876Z", "iopub.status.busy": "2024-05-10T21:57:49.649910Z", "iopub.status.idle": "2024-05-10T21:57:49.660823Z", "shell.execute_reply": "2024-05-10T21:57:49.659990Z" }, "papermill": { "duration": 0.017558, "end_time": "2024-05-10T21:57:49.662716", "exception": false, "start_time": "2024-05-10T21:57:49.645158", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "026f9ded", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:57:49.670923Z", "iopub.status.busy": "2024-05-10T21:57:49.670483Z", "iopub.status.idle": "2024-05-10T21:57:49.681645Z", "shell.execute_reply": "2024-05-10T21:57:49.680856Z" }, "papermill": { "duration": 0.017255, "end_time": "2024-05-10T21:57:49.683446", "exception": false, "start_time": "2024-05-10T21:57:49.666191", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "8f76e49b", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:57:49.691426Z", "iopub.status.busy": "2024-05-10T21:57:49.690953Z", "iopub.status.idle": "2024-05-10T21:57:49.705966Z", "shell.execute_reply": "2024-05-10T21:57:49.705058Z" }, "papermill": { "duration": 0.021092, "end_time": "2024-05-10T21:57:49.707904", "exception": false, "start_time": "2024-05-10T21:57:49.686812", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "640c058f", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:57:49.715734Z", "iopub.status.busy": "2024-05-10T21:57:49.715409Z", "iopub.status.idle": "2024-05-10T21:59:28.705015Z", "shell.execute_reply": "2024-05-10T21:59:28.704068Z" }, "papermill": { "duration": 98.996212, "end_time": "2024-05-10T21:59:28.707551", "exception": false, "start_time": "2024-05-10T21:57:49.711339", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacman-run2v2/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacman-run2v2/dqn_replay_buffer_pacman_v2-1\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "22ca5762", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:59:28.721521Z", "iopub.status.busy": "2024-05-10T21:59:28.721238Z", "iopub.status.idle": "2024-05-10T21:59:28.727816Z", "shell.execute_reply": "2024-05-10T21:59:28.726805Z" }, "papermill": { "duration": 0.013606, "end_time": "2024-05-10T21:59:28.729986", "exception": false, "start_time": "2024-05-10T21:59:28.716380", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "648057ac", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:59:28.738728Z", "iopub.status.busy": "2024-05-10T21:59:28.738413Z", "iopub.status.idle": "2024-05-11T01:41:04.585763Z", "shell.execute_reply": "2024-05-11T01:41:04.584612Z" }, "papermill": { "duration": 13295.854278, "end_time": "2024-05-11T01:41:04.588098", "exception": false, "start_time": "2024-05-10T21:59:28.733820", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 953 |\n", "| ep_rew_mean | 88 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3300 |\n", "| fps | 87 |\n", "| time_elapsed | 11 |\n", "| total_timesteps | 3001016 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.469 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 90.3 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3304 |\n", "| fps | 105 |\n", "| time_elapsed | 50 |\n", "| total_timesteps | 3005376 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.183 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 965 |\n", "| ep_rew_mean | 93.6 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3308 |\n", "| fps | 108 |\n", "| time_elapsed | 84 |\n", "| total_timesteps | 3009124 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.25 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 974 |\n", "| ep_rew_mean | 97.8 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3312 |\n", "| fps | 109 |\n", "| time_elapsed | 123 |\n", "| total_timesteps | 3013473 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.145 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 974 |\n", "| ep_rew_mean | 98.7 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3316 |\n", "| fps | 109 |\n", "| time_elapsed | 158 |\n", "| total_timesteps | 3017417 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.765 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 985 |\n", "| ep_rew_mean | 102 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3320 |\n", "| fps | 110 |\n", "| time_elapsed | 199 |\n", "| total_timesteps | 3022035 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 975 |\n", "| ep_rew_mean | 107 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3324 |\n", "| fps | 110 |\n", "| time_elapsed | 233 |\n", "| total_timesteps | 3025723 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.525 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 976 |\n", "| ep_rew_mean | 108 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3328 |\n", "| fps | 110 |\n", "| time_elapsed | 270 |\n", "| total_timesteps | 3029961 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 974 |\n", "| ep_rew_mean | 111 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3332 |\n", "| fps | 110 |\n", "| time_elapsed | 308 |\n", "| total_timesteps | 3034213 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 978 |\n", "| ep_rew_mean | 115 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3336 |\n", "| fps | 110 |\n", "| time_elapsed | 345 |\n", "| total_timesteps | 3038285 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.159 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 972 |\n", "| ep_rew_mean | 116 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3340 |\n", "| fps | 110 |\n", "| time_elapsed | 379 |\n", "| total_timesteps | 3042023 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0668 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 122 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3344 |\n", "| fps | 111 |\n", "| time_elapsed | 649 |\n", "| total_timesteps | 3072342 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0839 |\n", "----------------------------------\n", "Eval num_timesteps=3075000, episode_reward=208.00 +/- 104.39\n", "Episode length: 931.00 +/- 86.74\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 931 |\n", "| mean_reward | 208 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3075000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0993 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 126 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3348 |\n", "| fps | 107 |\n", "| time_elapsed | 706 |\n", "| total_timesteps | 3076077 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0991 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 131 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3352 |\n", "| fps | 107 |\n", "| time_elapsed | 749 |\n", "| total_timesteps | 3080943 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 134 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3356 |\n", "| fps | 108 |\n", "| time_elapsed | 785 |\n", "| total_timesteps | 3084943 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0644 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 139 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3360 |\n", "| fps | 108 |\n", "| time_elapsed | 831 |\n", "| total_timesteps | 3089999 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.041 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 143 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3364 |\n", "| fps | 108 |\n", "| time_elapsed | 873 |\n", "| total_timesteps | 3094685 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0551 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 144 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3368 |\n", "| fps | 108 |\n", "| time_elapsed | 910 |\n", "| total_timesteps | 3098783 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.322 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 142 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3372 |\n", "| fps | 108 |\n", "| time_elapsed | 944 |\n", "| total_timesteps | 3102588 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0419 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 146 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3376 |\n", "| fps | 108 |\n", "| time_elapsed | 988 |\n", "| total_timesteps | 3107492 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 152 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3380 |\n", "| fps | 108 |\n", "| time_elapsed | 1027 |\n", "| total_timesteps | 3111815 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.404 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 157 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3384 |\n", "| fps | 108 |\n", "| time_elapsed | 1060 |\n", "| total_timesteps | 3115521 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.118 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 156 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3388 |\n", "| fps | 109 |\n", "| time_elapsed | 1099 |\n", "| total_timesteps | 3119903 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0927 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3392 |\n", "| fps | 109 |\n", "| time_elapsed | 1141 |\n", "| total_timesteps | 3124629 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3396 |\n", "| fps | 109 |\n", "| time_elapsed | 1180 |\n", "| total_timesteps | 3128877 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.205 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 160 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3400 |\n", "| fps | 109 |\n", "| time_elapsed | 1219 |\n", "| total_timesteps | 3133205 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.328 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 158 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3404 |\n", "| fps | 109 |\n", "| time_elapsed | 1261 |\n", "| total_timesteps | 3137927 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.2 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3408 |\n", "| fps | 109 |\n", "| time_elapsed | 1300 |\n", "| total_timesteps | 3142273 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0668 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3412 |\n", "| fps | 109 |\n", "| time_elapsed | 1334 |\n", "| total_timesteps | 3146099 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.699 |\n", "----------------------------------\n", "Eval num_timesteps=3150000, episode_reward=186.10 +/- 72.06\n", "Episode length: 4162.20 +/- 7682.33\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 4.16e+03 |\n", "| mean_reward | 186 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 160 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3416 |\n", "| fps | 102 |\n", "| time_elapsed | 1471 |\n", "| total_timesteps | 3150231 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.176 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 166 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3420 |\n", "| fps | 102 |\n", "| time_elapsed | 1515 |\n", "| total_timesteps | 3155147 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 167 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3424 |\n", "| fps | 102 |\n", "| time_elapsed | 1558 |\n", "| total_timesteps | 3159929 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 172 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3428 |\n", "| fps | 102 |\n", "| time_elapsed | 1596 |\n", "| total_timesteps | 3164208 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.447 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 170 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3432 |\n", "| fps | 103 |\n", "| time_elapsed | 1637 |\n", "| total_timesteps | 3168767 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3436 |\n", "| fps | 103 |\n", "| time_elapsed | 1679 |\n", "| total_timesteps | 3173425 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.474 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3440 |\n", "| fps | 103 |\n", "| time_elapsed | 1717 |\n", "| total_timesteps | 3177653 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.419 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 170 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3444 |\n", "| fps | 103 |\n", "| time_elapsed | 1753 |\n", "| total_timesteps | 3181742 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0663 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 171 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3448 |\n", "| fps | 103 |\n", "| time_elapsed | 1789 |\n", "| total_timesteps | 3185646 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0768 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3452 |\n", "| fps | 103 |\n", "| time_elapsed | 1829 |\n", "| total_timesteps | 3190098 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.327 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 175 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3456 |\n", "| fps | 104 |\n", "| time_elapsed | 1871 |\n", "| total_timesteps | 3194868 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0996 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 172 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3460 |\n", "| fps | 104 |\n", "| time_elapsed | 1911 |\n", "| total_timesteps | 3199365 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3464 |\n", "| fps | 104 |\n", "| time_elapsed | 1956 |\n", "| total_timesteps | 3204383 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.315 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 175 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3468 |\n", "| fps | 104 |\n", "| time_elapsed | 2000 |\n", "| total_timesteps | 3209264 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0518 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 179 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3472 |\n", "| fps | 104 |\n", "| time_elapsed | 2041 |\n", "| total_timesteps | 3213887 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.388 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 177 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3476 |\n", "| fps | 104 |\n", "| time_elapsed | 2077 |\n", "| total_timesteps | 3217888 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 175 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3480 |\n", "| fps | 104 |\n", "| time_elapsed | 2117 |\n", "| total_timesteps | 3222302 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.13 |\n", "----------------------------------\n", "Eval num_timesteps=3225000, episode_reward=270.80 +/- 121.72\n", "Episode length: 1096.40 +/- 217.57\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.1e+03 |\n", "| mean_reward | 271 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3225000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3484 |\n", "| fps | 103 |\n", "| time_elapsed | 2185 |\n", "| total_timesteps | 3226908 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0847 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 177 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3488 |\n", "| fps | 103 |\n", "| time_elapsed | 2227 |\n", "| total_timesteps | 3231548 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 180 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3492 |\n", "| fps | 104 |\n", "| time_elapsed | 2261 |\n", "| total_timesteps | 3235411 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 180 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3496 |\n", "| fps | 104 |\n", "| time_elapsed | 2300 |\n", "| total_timesteps | 3239711 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.329 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 181 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3500 |\n", "| fps | 104 |\n", "| time_elapsed | 2351 |\n", "| total_timesteps | 3245451 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0808 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 180 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3504 |\n", "| fps | 104 |\n", "| time_elapsed | 2392 |\n", "| total_timesteps | 3250026 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 177 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3508 |\n", "| fps | 104 |\n", "| time_elapsed | 2426 |\n", "| total_timesteps | 3253824 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 181 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3512 |\n", "| fps | 104 |\n", "| time_elapsed | 2473 |\n", "| total_timesteps | 3259154 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 177 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3516 |\n", "| fps | 104 |\n", "| time_elapsed | 2517 |\n", "| total_timesteps | 3264074 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0586 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3520 |\n", "| fps | 105 |\n", "| time_elapsed | 2562 |\n", "| total_timesteps | 3269066 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0767 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3524 |\n", "| fps | 105 |\n", "| time_elapsed | 2609 |\n", "| total_timesteps | 3274332 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0772 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 165 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3528 |\n", "| fps | 105 |\n", "| time_elapsed | 2652 |\n", "| total_timesteps | 3279168 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0604 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 167 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3532 |\n", "| fps | 105 |\n", "| time_elapsed | 2691 |\n", "| total_timesteps | 3283458 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0755 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3536 |\n", "| fps | 105 |\n", "| time_elapsed | 2736 |\n", "| total_timesteps | 3288592 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 167 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3540 |\n", "| fps | 105 |\n", "| time_elapsed | 2775 |\n", "| total_timesteps | 3292912 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0786 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 164 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3544 |\n", "| fps | 105 |\n", "| time_elapsed | 2819 |\n", "| total_timesteps | 3297801 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.769 |\n", "----------------------------------\n", "Eval num_timesteps=3300000, episode_reward=147.70 +/- 63.64\n", "Episode length: 1203.00 +/- 228.16\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.2e+03 |\n", "| mean_reward | 148 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 162 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3548 |\n", "| fps | 104 |\n", "| time_elapsed | 2897 |\n", "| total_timesteps | 3303207 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 161 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3552 |\n", "| fps | 104 |\n", "| time_elapsed | 2943 |\n", "| total_timesteps | 3308334 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.281 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 158 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3556 |\n", "| fps | 104 |\n", "| time_elapsed | 2981 |\n", "| total_timesteps | 3312592 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0784 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 156 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3560 |\n", "| fps | 104 |\n", "| time_elapsed | 3021 |\n", "| total_timesteps | 3317017 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0725 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 152 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3564 |\n", "| fps | 105 |\n", "| time_elapsed | 3054 |\n", "| total_timesteps | 3320755 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 149 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3568 |\n", "| fps | 105 |\n", "| time_elapsed | 3089 |\n", "| total_timesteps | 3324621 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 147 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3572 |\n", "| fps | 105 |\n", "| time_elapsed | 3128 |\n", "| total_timesteps | 3329001 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.281 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 147 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3576 |\n", "| fps | 105 |\n", "| time_elapsed | 3165 |\n", "| total_timesteps | 3333061 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.311 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 151 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3580 |\n", "| fps | 105 |\n", "| time_elapsed | 3204 |\n", "| total_timesteps | 3337452 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.27 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 153 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3584 |\n", "| fps | 105 |\n", "| time_elapsed | 3249 |\n", "| total_timesteps | 3342402 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 152 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3588 |\n", "| fps | 105 |\n", "| time_elapsed | 3289 |\n", "| total_timesteps | 3346834 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.188 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 150 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3592 |\n", "| fps | 105 |\n", "| time_elapsed | 3336 |\n", "| total_timesteps | 3352098 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.569 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 153 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3596 |\n", "| fps | 105 |\n", "| time_elapsed | 3379 |\n", "| total_timesteps | 3356900 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.49 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 154 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3600 |\n", "| fps | 105 |\n", "| time_elapsed | 3421 |\n", "| total_timesteps | 3361592 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.609 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 160 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3604 |\n", "| fps | 105 |\n", "| time_elapsed | 3473 |\n", "| total_timesteps | 3367384 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.115 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 158 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3608 |\n", "| fps | 105 |\n", "| time_elapsed | 3521 |\n", "| total_timesteps | 3372676 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "Eval num_timesteps=3375000, episode_reward=67.00 +/- 21.59\n", "Episode length: 996.50 +/- 147.15\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 996 |\n", "| mean_reward | 67 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3375000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.858 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3612 |\n", "| fps | 105 |\n", "| time_elapsed | 3596 |\n", "| total_timesteps | 3378339 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.13 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 156 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3616 |\n", "| fps | 105 |\n", "| time_elapsed | 3645 |\n", "| total_timesteps | 3383354 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3620 |\n", "| fps | 105 |\n", "| time_elapsed | 3683 |\n", "| total_timesteps | 3387970 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 160 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3624 |\n", "| fps | 105 |\n", "| time_elapsed | 3720 |\n", "| total_timesteps | 3392498 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 161 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3628 |\n", "| fps | 105 |\n", "| time_elapsed | 3755 |\n", "| total_timesteps | 3396690 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 162 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3632 |\n", "| fps | 105 |\n", "| time_elapsed | 3795 |\n", "| total_timesteps | 3401577 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3636 |\n", "| fps | 106 |\n", "| time_elapsed | 4035 |\n", "| total_timesteps | 3431044 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.362 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 159 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3640 |\n", "| fps | 106 |\n", "| time_elapsed | 4064 |\n", "| total_timesteps | 3434572 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0964 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3644 |\n", "| fps | 107 |\n", "| time_elapsed | 4091 |\n", "| total_timesteps | 3437924 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.198 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 151 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3648 |\n", "| fps | 107 |\n", "| time_elapsed | 4118 |\n", "| total_timesteps | 3441232 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 151 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3652 |\n", "| fps | 107 |\n", "| time_elapsed | 4150 |\n", "| total_timesteps | 3445120 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0879 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 153 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3656 |\n", "| fps | 107 |\n", "| time_elapsed | 4183 |\n", "| total_timesteps | 3449150 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.372 |\n", "----------------------------------\n", "Eval num_timesteps=3450000, episode_reward=139.50 +/- 58.55\n", "Episode length: 1121.00 +/- 100.81\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.12e+03 |\n", "| mean_reward | 140 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.262 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3660 |\n", "| fps | 106 |\n", "| time_elapsed | 4247 |\n", "| total_timesteps | 3453596 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0966 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3664 |\n", "| fps | 106 |\n", "| time_elapsed | 4284 |\n", "| total_timesteps | 3458164 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.193 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 156 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3668 |\n", "| fps | 107 |\n", "| time_elapsed | 4320 |\n", "| total_timesteps | 3462504 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 157 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3672 |\n", "| fps | 107 |\n", "| time_elapsed | 4356 |\n", "| total_timesteps | 3466934 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0864 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 158 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3676 |\n", "| fps | 107 |\n", "| time_elapsed | 4394 |\n", "| total_timesteps | 3471644 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 152 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3680 |\n", "| fps | 107 |\n", "| time_elapsed | 4428 |\n", "| total_timesteps | 3475694 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.204 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 152 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3684 |\n", "| fps | 107 |\n", "| time_elapsed | 4461 |\n", "| total_timesteps | 3479764 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0645 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 153 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3688 |\n", "| fps | 107 |\n", "| time_elapsed | 4496 |\n", "| total_timesteps | 3484018 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.329 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 150 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3692 |\n", "| fps | 107 |\n", "| time_elapsed | 4530 |\n", "| total_timesteps | 3488208 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 146 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3696 |\n", "| fps | 107 |\n", "| time_elapsed | 4563 |\n", "| total_timesteps | 3492267 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.179 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 144 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3700 |\n", "| fps | 107 |\n", "| time_elapsed | 4600 |\n", "| total_timesteps | 3496817 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.151 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 143 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3704 |\n", "| fps | 108 |\n", "| time_elapsed | 4636 |\n", "| total_timesteps | 3501167 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.358 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 144 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3708 |\n", "| fps | 108 |\n", "| time_elapsed | 4670 |\n", "| total_timesteps | 3505329 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.243 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 146 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3712 |\n", "| fps | 108 |\n", "| time_elapsed | 4709 |\n", "| total_timesteps | 3510139 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.279 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 146 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3716 |\n", "| fps | 108 |\n", "| time_elapsed | 4749 |\n", "| total_timesteps | 3515054 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.216 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 145 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3720 |\n", "| fps | 108 |\n", "| time_elapsed | 4793 |\n", "| total_timesteps | 3520476 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 140 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3724 |\n", "| fps | 108 |\n", "| time_elapsed | 4829 |\n", "| total_timesteps | 3524793 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "Eval num_timesteps=3525000, episode_reward=231.70 +/- 79.90\n", "Episode length: 1138.20 +/- 122.98\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 232 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3525000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3728 |\n", "| fps | 108 |\n", "| time_elapsed | 4891 |\n", "| total_timesteps | 3529123 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 132 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3732 |\n", "| fps | 108 |\n", "| time_elapsed | 4932 |\n", "| total_timesteps | 3534084 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.07e+03 |\n", "| ep_rew_mean | 133 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3736 |\n", "| fps | 108 |\n", "| time_elapsed | 4965 |\n", "| total_timesteps | 3538100 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.465 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.08e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3740 |\n", "| fps | 108 |\n", "| time_elapsed | 5006 |\n", "| total_timesteps | 3543054 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.206 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 139 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3744 |\n", "| fps | 108 |\n", "| time_elapsed | 5045 |\n", "| total_timesteps | 3547769 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.217 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 142 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3748 |\n", "| fps | 108 |\n", "| time_elapsed | 5084 |\n", "| total_timesteps | 3552541 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 141 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3752 |\n", "| fps | 108 |\n", "| time_elapsed | 5124 |\n", "| total_timesteps | 3557411 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.241 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 139 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3756 |\n", "| fps | 108 |\n", "| time_elapsed | 5162 |\n", "| total_timesteps | 3562105 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 136 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3760 |\n", "| fps | 108 |\n", "| time_elapsed | 5191 |\n", "| total_timesteps | 3565591 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.21 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3764 |\n", "| fps | 109 |\n", "| time_elapsed | 5227 |\n", "| total_timesteps | 3570055 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.186 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3768 |\n", "| fps | 109 |\n", "| time_elapsed | 5263 |\n", "| total_timesteps | 3574396 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 135 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3772 |\n", "| fps | 109 |\n", "| time_elapsed | 5293 |\n", "| total_timesteps | 3578034 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0891 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3776 |\n", "| fps | 109 |\n", "| time_elapsed | 5335 |\n", "| total_timesteps | 3583181 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.358 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3780 |\n", "| fps | 109 |\n", "| time_elapsed | 5370 |\n", "| total_timesteps | 3587535 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.224 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3784 |\n", "| fps | 109 |\n", "| time_elapsed | 5409 |\n", "| total_timesteps | 3592280 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 135 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3788 |\n", "| fps | 109 |\n", "| time_elapsed | 5445 |\n", "| total_timesteps | 3596632 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.327 |\n", "----------------------------------\n", "Eval num_timesteps=3600000, episode_reward=297.00 +/- 63.53\n", "Episode length: 1166.70 +/- 204.03\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 297 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.409 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3792 |\n", "| fps | 109 |\n", "| time_elapsed | 5517 |\n", "| total_timesteps | 3601836 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 139 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3796 |\n", "| fps | 109 |\n", "| time_elapsed | 5552 |\n", "| total_timesteps | 3606184 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3800 |\n", "| fps | 109 |\n", "| time_elapsed | 5590 |\n", "| total_timesteps | 3610856 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 137 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3804 |\n", "| fps | 109 |\n", "| time_elapsed | 5631 |\n", "| total_timesteps | 3615786 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0843 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 138 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3808 |\n", "| fps | 109 |\n", "| time_elapsed | 5670 |\n", "| total_timesteps | 3620562 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.149 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 140 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3812 |\n", "| fps | 109 |\n", "| time_elapsed | 5717 |\n", "| total_timesteps | 3626290 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.427 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 145 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3816 |\n", "| fps | 109 |\n", "| time_elapsed | 5755 |\n", "| total_timesteps | 3630975 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 150 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3820 |\n", "| fps | 109 |\n", "| time_elapsed | 5796 |\n", "| total_timesteps | 3635945 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 155 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3824 |\n", "| fps | 109 |\n", "| time_elapsed | 5837 |\n", "| total_timesteps | 3640950 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.319 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 160 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3828 |\n", "| fps | 109 |\n", "| time_elapsed | 5877 |\n", "| total_timesteps | 3645750 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.162 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 158 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3832 |\n", "| fps | 109 |\n", "| time_elapsed | 5911 |\n", "| total_timesteps | 3649904 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.108 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 164 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3836 |\n", "| fps | 110 |\n", "| time_elapsed | 5945 |\n", "| total_timesteps | 3654031 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.933 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 164 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3840 |\n", "| fps | 110 |\n", "| time_elapsed | 5989 |\n", "| total_timesteps | 3659351 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0665 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 170 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3844 |\n", "| fps | 110 |\n", "| time_elapsed | 6031 |\n", "| total_timesteps | 3664461 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0967 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3848 |\n", "| fps | 110 |\n", "| time_elapsed | 6060 |\n", "| total_timesteps | 3668049 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3852 |\n", "| fps | 110 |\n", "| time_elapsed | 6095 |\n", "| total_timesteps | 3672283 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.598 |\n", "----------------------------------\n", "Eval num_timesteps=3675000, episode_reward=336.20 +/- 65.54\n", "Episode length: 1168.00 +/- 256.37\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 336 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3675000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.873 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 172 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3856 |\n", "| fps | 109 |\n", "| time_elapsed | 6162 |\n", "| total_timesteps | 3676755 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 175 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3860 |\n", "| fps | 109 |\n", "| time_elapsed | 6197 |\n", "| total_timesteps | 3680979 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.27 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3864 |\n", "| fps | 109 |\n", "| time_elapsed | 6235 |\n", "| total_timesteps | 3685617 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 176 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3868 |\n", "| fps | 110 |\n", "| time_elapsed | 6278 |\n", "| total_timesteps | 3690768 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.203 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 178 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3872 |\n", "| fps | 110 |\n", "| time_elapsed | 6314 |\n", "| total_timesteps | 3695117 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 181 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3876 |\n", "| fps | 110 |\n", "| time_elapsed | 6355 |\n", "| total_timesteps | 3700145 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 187 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3880 |\n", "| fps | 110 |\n", "| time_elapsed | 6391 |\n", "| total_timesteps | 3704529 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 186 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3884 |\n", "| fps | 110 |\n", "| time_elapsed | 6433 |\n", "| total_timesteps | 3709595 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0936 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 188 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3888 |\n", "| fps | 110 |\n", "| time_elapsed | 6478 |\n", "| total_timesteps | 3715161 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.257 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 188 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3892 |\n", "| fps | 110 |\n", "| time_elapsed | 6527 |\n", "| total_timesteps | 3721086 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.049 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 192 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3896 |\n", "| fps | 110 |\n", "| time_elapsed | 6571 |\n", "| total_timesteps | 3726448 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 198 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3900 |\n", "| fps | 110 |\n", "| time_elapsed | 6609 |\n", "| total_timesteps | 3730992 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 198 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3904 |\n", "| fps | 110 |\n", "| time_elapsed | 6643 |\n", "| total_timesteps | 3735152 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 199 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3908 |\n", "| fps | 110 |\n", "| time_elapsed | 6685 |\n", "| total_timesteps | 3740213 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.478 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 198 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3912 |\n", "| fps | 110 |\n", "| time_elapsed | 6726 |\n", "| total_timesteps | 3745249 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0895 |\n", "----------------------------------\n", "Eval num_timesteps=3750000, episode_reward=178.30 +/- 80.17\n", "Episode length: 1179.00 +/- 108.40\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 178 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0705 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 194 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3916 |\n", "| fps | 110 |\n", "| time_elapsed | 6798 |\n", "| total_timesteps | 3750130 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0779 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 193 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3920 |\n", "| fps | 110 |\n", "| time_elapsed | 6847 |\n", "| total_timesteps | 3755350 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.54 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 190 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3924 |\n", "| fps | 110 |\n", "| time_elapsed | 6884 |\n", "| total_timesteps | 3759897 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 190 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3928 |\n", "| fps | 110 |\n", "| time_elapsed | 6924 |\n", "| total_timesteps | 3764727 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.14 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 195 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3932 |\n", "| fps | 110 |\n", "| time_elapsed | 6960 |\n", "| total_timesteps | 3769189 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.815 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 192 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3936 |\n", "| fps | 110 |\n", "| time_elapsed | 7000 |\n", "| total_timesteps | 3774035 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.139 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 187 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3940 |\n", "| fps | 110 |\n", "| time_elapsed | 7038 |\n", "| total_timesteps | 3778693 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.205 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 183 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3944 |\n", "| fps | 110 |\n", "| time_elapsed | 7071 |\n", "| total_timesteps | 3782819 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0804 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 189 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3948 |\n", "| fps | 111 |\n", "| time_elapsed | 7321 |\n", "| total_timesteps | 3813422 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0872 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 187 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3952 |\n", "| fps | 111 |\n", "| time_elapsed | 7355 |\n", "| total_timesteps | 3817624 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 190 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3956 |\n", "| fps | 111 |\n", "| time_elapsed | 7392 |\n", "| total_timesteps | 3822080 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0481 |\n", "----------------------------------\n", "Eval num_timesteps=3825000, episode_reward=291.40 +/- 118.67\n", "Episode length: 1061.00 +/- 98.07\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.06e+03 |\n", "| mean_reward | 291 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3825000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0784 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 193 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3960 |\n", "| fps | 110 |\n", "| time_elapsed | 7452 |\n", "| total_timesteps | 3826228 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0676 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 197 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3964 |\n", "| fps | 110 |\n", "| time_elapsed | 7488 |\n", "| total_timesteps | 3830620 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0738 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 199 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3968 |\n", "| fps | 110 |\n", "| time_elapsed | 7527 |\n", "| total_timesteps | 3835389 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0534 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 199 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3972 |\n", "| fps | 111 |\n", "| time_elapsed | 7554 |\n", "| total_timesteps | 3838692 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.253 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 200 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3976 |\n", "| fps | 111 |\n", "| time_elapsed | 7590 |\n", "| total_timesteps | 3843094 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.176 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 199 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3980 |\n", "| fps | 111 |\n", "| time_elapsed | 7623 |\n", "| total_timesteps | 3847112 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.051 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3984 |\n", "| fps | 111 |\n", "| time_elapsed | 7667 |\n", "| total_timesteps | 3852617 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.146 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3988 |\n", "| fps | 111 |\n", "| time_elapsed | 7704 |\n", "| total_timesteps | 3857101 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.35 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 208 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3992 |\n", "| fps | 111 |\n", "| time_elapsed | 7746 |\n", "| total_timesteps | 3862315 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 212 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 3996 |\n", "| fps | 111 |\n", "| time_elapsed | 7783 |\n", "| total_timesteps | 3866815 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4000 |\n", "| fps | 111 |\n", "| time_elapsed | 7827 |\n", "| total_timesteps | 3872207 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0977 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4004 |\n", "| fps | 111 |\n", "| time_elapsed | 7863 |\n", "| total_timesteps | 3876663 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0597 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4008 |\n", "| fps | 111 |\n", "| time_elapsed | 7902 |\n", "| total_timesteps | 3881433 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4012 |\n", "| fps | 111 |\n", "| time_elapsed | 7941 |\n", "| total_timesteps | 3886139 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.338 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4016 |\n", "| fps | 111 |\n", "| time_elapsed | 7979 |\n", "| total_timesteps | 3890864 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0728 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4020 |\n", "| fps | 111 |\n", "| time_elapsed | 8017 |\n", "| total_timesteps | 3895462 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.198 |\n", "----------------------------------\n", "Eval num_timesteps=3900000, episode_reward=237.40 +/- 97.81\n", "Episode length: 1250.00 +/- 235.18\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.25e+03 |\n", "| mean_reward | 237 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.313 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4024 |\n", "| fps | 111 |\n", "| time_elapsed | 8088 |\n", "| total_timesteps | 3900544 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.427 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4028 |\n", "| fps | 111 |\n", "| time_elapsed | 8122 |\n", "| total_timesteps | 3904683 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.214 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4032 |\n", "| fps | 111 |\n", "| time_elapsed | 8158 |\n", "| total_timesteps | 3909020 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.172 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4036 |\n", "| fps | 111 |\n", "| time_elapsed | 8200 |\n", "| total_timesteps | 3914174 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.246 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4040 |\n", "| fps | 111 |\n", "| time_elapsed | 8233 |\n", "| total_timesteps | 3918200 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.179 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4044 |\n", "| fps | 111 |\n", "| time_elapsed | 8264 |\n", "| total_timesteps | 3921982 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0812 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4048 |\n", "| fps | 111 |\n", "| time_elapsed | 8303 |\n", "| total_timesteps | 3926817 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.667 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4052 |\n", "| fps | 111 |\n", "| time_elapsed | 8337 |\n", "| total_timesteps | 3931017 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0873 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4056 |\n", "| fps | 111 |\n", "| time_elapsed | 8378 |\n", "| total_timesteps | 3935997 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0962 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4060 |\n", "| fps | 111 |\n", "| time_elapsed | 8415 |\n", "| total_timesteps | 3940503 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4064 |\n", "| fps | 111 |\n", "| time_elapsed | 8454 |\n", "| total_timesteps | 3945308 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.421 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4068 |\n", "| fps | 111 |\n", "| time_elapsed | 8487 |\n", "| total_timesteps | 3949294 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 237 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4072 |\n", "| fps | 111 |\n", "| time_elapsed | 8531 |\n", "| total_timesteps | 3954694 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.88 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4076 |\n", "| fps | 111 |\n", "| time_elapsed | 8571 |\n", "| total_timesteps | 3959620 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 241 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4080 |\n", "| fps | 112 |\n", "| time_elapsed | 8613 |\n", "| total_timesteps | 3964690 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.271 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4084 |\n", "| fps | 112 |\n", "| time_elapsed | 8649 |\n", "| total_timesteps | 3969162 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.132 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4088 |\n", "| fps | 112 |\n", "| time_elapsed | 8690 |\n", "| total_timesteps | 3974190 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "Eval num_timesteps=3975000, episode_reward=66.50 +/- 43.81\n", "Episode length: 831.00 +/- 78.18\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 831 |\n", "| mean_reward | 66.5 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 3975000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4092 |\n", "| fps | 111 |\n", "| time_elapsed | 8748 |\n", "| total_timesteps | 3978793 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4096 |\n", "| fps | 111 |\n", "| time_elapsed | 8787 |\n", "| total_timesteps | 3983603 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4100 |\n", "| fps | 111 |\n", "| time_elapsed | 8826 |\n", "| total_timesteps | 3988305 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.157 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4104 |\n", "| fps | 112 |\n", "| time_elapsed | 8862 |\n", "| total_timesteps | 3992719 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.682 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4108 |\n", "| fps | 112 |\n", "| time_elapsed | 8904 |\n", "| total_timesteps | 3997887 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.293 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4112 |\n", "| fps | 112 |\n", "| time_elapsed | 8947 |\n", "| total_timesteps | 4003137 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.184 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 212 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4116 |\n", "| fps | 112 |\n", "| time_elapsed | 8984 |\n", "| total_timesteps | 4007658 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4120 |\n", "| fps | 112 |\n", "| time_elapsed | 9027 |\n", "| total_timesteps | 4012930 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.133 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4124 |\n", "| fps | 112 |\n", "| time_elapsed | 9071 |\n", "| total_timesteps | 4018316 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4128 |\n", "| fps | 112 |\n", "| time_elapsed | 9108 |\n", "| total_timesteps | 4022777 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.146 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 216 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4132 |\n", "| fps | 112 |\n", "| time_elapsed | 9140 |\n", "| total_timesteps | 4026671 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.523 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 208 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4136 |\n", "| fps | 112 |\n", "| time_elapsed | 9168 |\n", "| total_timesteps | 4030087 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.451 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 206 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4140 |\n", "| fps | 112 |\n", "| time_elapsed | 9201 |\n", "| total_timesteps | 4034102 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.251 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 200 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4144 |\n", "| fps | 112 |\n", "| time_elapsed | 9241 |\n", "| total_timesteps | 4039008 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 195 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4148 |\n", "| fps | 112 |\n", "| time_elapsed | 9278 |\n", "| total_timesteps | 4043596 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.054 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 192 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4152 |\n", "| fps | 112 |\n", "| time_elapsed | 9311 |\n", "| total_timesteps | 4047666 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "Eval num_timesteps=4050000, episode_reward=103.20 +/- 95.76\n", "Episode length: 1099.80 +/- 209.71\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.1e+03 |\n", "| mean_reward | 103 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.268 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 186 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4156 |\n", "| fps | 112 |\n", "| time_elapsed | 9376 |\n", "| total_timesteps | 4052322 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 181 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4160 |\n", "| fps | 112 |\n", "| time_elapsed | 9412 |\n", "| total_timesteps | 4056758 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.282 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 176 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4164 |\n", "| fps | 112 |\n", "| time_elapsed | 9445 |\n", "| total_timesteps | 4060744 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0935 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4168 |\n", "| fps | 112 |\n", "| time_elapsed | 9484 |\n", "| total_timesteps | 4065502 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 166 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4172 |\n", "| fps | 112 |\n", "| time_elapsed | 9513 |\n", "| total_timesteps | 4069074 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 166 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4176 |\n", "| fps | 112 |\n", "| time_elapsed | 9552 |\n", "| total_timesteps | 4073820 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 163 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4180 |\n", "| fps | 112 |\n", "| time_elapsed | 9590 |\n", "| total_timesteps | 4078474 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0574 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4184 |\n", "| fps | 112 |\n", "| time_elapsed | 9628 |\n", "| total_timesteps | 4083179 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.128 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4188 |\n", "| fps | 112 |\n", "| time_elapsed | 9666 |\n", "| total_timesteps | 4087717 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0583 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4192 |\n", "| fps | 112 |\n", "| time_elapsed | 9702 |\n", "| total_timesteps | 4092159 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.27 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 169 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4196 |\n", "| fps | 112 |\n", "| time_elapsed | 9749 |\n", "| total_timesteps | 4097933 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0987 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 170 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4200 |\n", "| fps | 112 |\n", "| time_elapsed | 9787 |\n", "| total_timesteps | 4102585 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.208 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 174 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4204 |\n", "| fps | 112 |\n", "| time_elapsed | 9828 |\n", "| total_timesteps | 4107604 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 173 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4208 |\n", "| fps | 112 |\n", "| time_elapsed | 9870 |\n", "| total_timesteps | 4112749 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 176 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4212 |\n", "| fps | 112 |\n", "| time_elapsed | 9911 |\n", "| total_timesteps | 4117694 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0774 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 176 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4216 |\n", "| fps | 112 |\n", "| time_elapsed | 9953 |\n", "| total_timesteps | 4122898 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0458 |\n", "----------------------------------\n", "Eval num_timesteps=4125000, episode_reward=242.90 +/- 82.48\n", "Episode length: 1305.50 +/- 195.95\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.31e+03 |\n", "| mean_reward | 243 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4125000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 173 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4220 |\n", "| fps | 112 |\n", "| time_elapsed | 10028 |\n", "| total_timesteps | 4127816 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 171 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4224 |\n", "| fps | 112 |\n", "| time_elapsed | 10068 |\n", "| total_timesteps | 4131971 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 166 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4228 |\n", "| fps | 112 |\n", "| time_elapsed | 10108 |\n", "| total_timesteps | 4136886 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 168 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4232 |\n", "| fps | 112 |\n", "| time_elapsed | 10144 |\n", "| total_timesteps | 4141296 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.157 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 176 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4236 |\n", "| fps | 112 |\n", "| time_elapsed | 10184 |\n", "| total_timesteps | 4146198 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0935 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 179 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4240 |\n", "| fps | 112 |\n", "| time_elapsed | 10223 |\n", "| total_timesteps | 4150914 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 184 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4244 |\n", "| fps | 112 |\n", "| time_elapsed | 10261 |\n", "| total_timesteps | 4155542 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.14 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 187 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4248 |\n", "| fps | 112 |\n", "| time_elapsed | 10295 |\n", "| total_timesteps | 4159724 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 189 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4252 |\n", "| fps | 112 |\n", "| time_elapsed | 10329 |\n", "| total_timesteps | 4164000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0635 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 196 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4256 |\n", "| fps | 112 |\n", "| time_elapsed | 10364 |\n", "| total_timesteps | 4168266 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0836 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 198 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4260 |\n", "| fps | 112 |\n", "| time_elapsed | 10406 |\n", "| total_timesteps | 4173384 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 205 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4264 |\n", "| fps | 112 |\n", "| time_elapsed | 10446 |\n", "| total_timesteps | 4178297 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.195 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 210 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4268 |\n", "| fps | 112 |\n", "| time_elapsed | 10490 |\n", "| total_timesteps | 4183638 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 214 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4272 |\n", "| fps | 112 |\n", "| time_elapsed | 10527 |\n", "| total_timesteps | 4188130 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.346 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4276 |\n", "| fps | 112 |\n", "| time_elapsed | 10566 |\n", "| total_timesteps | 4192918 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0573 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4280 |\n", "| fps | 112 |\n", "| time_elapsed | 10605 |\n", "| total_timesteps | 4197588 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.575 |\n", "----------------------------------\n", "Eval num_timesteps=4200000, episode_reward=95.90 +/- 37.91\n", "Episode length: 1303.90 +/- 154.47\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.3e+03 |\n", "| mean_reward | 95.9 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.153 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4284 |\n", "| fps | 112 |\n", "| time_elapsed | 10882 |\n", "| total_timesteps | 4227700 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.35 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4288 |\n", "| fps | 112 |\n", "| time_elapsed | 10917 |\n", "| total_timesteps | 4232038 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0596 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 213 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4292 |\n", "| fps | 112 |\n", "| time_elapsed | 10955 |\n", "| total_timesteps | 4236698 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0933 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 214 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4296 |\n", "| fps | 112 |\n", "| time_elapsed | 10991 |\n", "| total_timesteps | 4241048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0324 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 219 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4300 |\n", "| fps | 112 |\n", "| time_elapsed | 11028 |\n", "| total_timesteps | 4245586 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0444 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4304 |\n", "| fps | 112 |\n", "| time_elapsed | 11060 |\n", "| total_timesteps | 4249431 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0635 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4308 |\n", "| fps | 112 |\n", "| time_elapsed | 11089 |\n", "| total_timesteps | 4253022 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0392 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4312 |\n", "| fps | 113 |\n", "| time_elapsed | 11121 |\n", "| total_timesteps | 4256946 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.43 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4316 |\n", "| fps | 113 |\n", "| time_elapsed | 11154 |\n", "| total_timesteps | 4260876 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4320 |\n", "| fps | 113 |\n", "| time_elapsed | 11187 |\n", "| total_timesteps | 4264896 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0656 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4324 |\n", "| fps | 113 |\n", "| time_elapsed | 11227 |\n", "| total_timesteps | 4269880 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.204 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4328 |\n", "| fps | 113 |\n", "| time_elapsed | 11268 |\n", "| total_timesteps | 4274896 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.67 |\n", "----------------------------------\n", "Eval num_timesteps=4275000, episode_reward=248.20 +/- 134.43\n", "Episode length: 3895.90 +/- 7701.30\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.9e+03 |\n", "| mean_reward | 248 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4275000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4332 |\n", "| fps | 112 |\n", "| time_elapsed | 11391 |\n", "| total_timesteps | 4278465 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0533 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4336 |\n", "| fps | 112 |\n", "| time_elapsed | 11435 |\n", "| total_timesteps | 4283839 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.088 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4340 |\n", "| fps | 112 |\n", "| time_elapsed | 11476 |\n", "| total_timesteps | 4288894 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0715 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4344 |\n", "| fps | 112 |\n", "| time_elapsed | 11512 |\n", "| total_timesteps | 4293289 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4348 |\n", "| fps | 112 |\n", "| time_elapsed | 11546 |\n", "| total_timesteps | 4297424 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0843 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4352 |\n", "| fps | 112 |\n", "| time_elapsed | 11587 |\n", "| total_timesteps | 4302342 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.873 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4356 |\n", "| fps | 112 |\n", "| time_elapsed | 11628 |\n", "| total_timesteps | 4307348 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0952 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4360 |\n", "| fps | 112 |\n", "| time_elapsed | 11663 |\n", "| total_timesteps | 4311572 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4364 |\n", "| fps | 112 |\n", "| time_elapsed | 11700 |\n", "| total_timesteps | 4316178 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0642 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4368 |\n", "| fps | 112 |\n", "| time_elapsed | 11736 |\n", "| total_timesteps | 4320485 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.146 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4372 |\n", "| fps | 112 |\n", "| time_elapsed | 11775 |\n", "| total_timesteps | 4325277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4376 |\n", "| fps | 112 |\n", "| time_elapsed | 11808 |\n", "| total_timesteps | 4329365 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.183 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 237 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4380 |\n", "| fps | 112 |\n", "| time_elapsed | 11849 |\n", "| total_timesteps | 4334371 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0539 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4384 |\n", "| fps | 112 |\n", "| time_elapsed | 11889 |\n", "| total_timesteps | 4339247 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.146 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4388 |\n", "| fps | 112 |\n", "| time_elapsed | 11928 |\n", "| total_timesteps | 4343983 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4392 |\n", "| fps | 112 |\n", "| time_elapsed | 11971 |\n", "| total_timesteps | 4349241 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.695 |\n", "----------------------------------\n", "Eval num_timesteps=4350000, episode_reward=258.40 +/- 107.18\n", "Episode length: 1256.90 +/- 159.73\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.26e+03 |\n", "| mean_reward | 258 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0545 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4396 |\n", "| fps | 112 |\n", "| time_elapsed | 12043 |\n", "| total_timesteps | 4354321 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.567 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4400 |\n", "| fps | 112 |\n", "| time_elapsed | 12086 |\n", "| total_timesteps | 4359611 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.253 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4404 |\n", "| fps | 112 |\n", "| time_elapsed | 12122 |\n", "| total_timesteps | 4364037 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.33 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4408 |\n", "| fps | 112 |\n", "| time_elapsed | 12162 |\n", "| total_timesteps | 4368905 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.366 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4412 |\n", "| fps | 112 |\n", "| time_elapsed | 12206 |\n", "| total_timesteps | 4374277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4416 |\n", "| fps | 112 |\n", "| time_elapsed | 12249 |\n", "| total_timesteps | 4379587 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0764 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4420 |\n", "| fps | 112 |\n", "| time_elapsed | 12284 |\n", "| total_timesteps | 4383842 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0869 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4424 |\n", "| fps | 112 |\n", "| time_elapsed | 12326 |\n", "| total_timesteps | 4388932 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4428 |\n", "| fps | 112 |\n", "| time_elapsed | 12362 |\n", "| total_timesteps | 4393406 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4432 |\n", "| fps | 112 |\n", "| time_elapsed | 12400 |\n", "| total_timesteps | 4398014 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0306 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4436 |\n", "| fps | 112 |\n", "| time_elapsed | 12435 |\n", "| total_timesteps | 4402280 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.35 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4440 |\n", "| fps | 112 |\n", "| time_elapsed | 12476 |\n", "| total_timesteps | 4407274 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0949 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4444 |\n", "| fps | 112 |\n", "| time_elapsed | 12518 |\n", "| total_timesteps | 4412386 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4448 |\n", "| fps | 112 |\n", "| time_elapsed | 12555 |\n", "| total_timesteps | 4416932 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0498 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4452 |\n", "| fps | 112 |\n", "| time_elapsed | 12596 |\n", "| total_timesteps | 4421926 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "Eval num_timesteps=4425000, episode_reward=138.70 +/- 30.61\n", "Episode length: 1228.60 +/- 233.70\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 139 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4425000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4456 |\n", "| fps | 112 |\n", "| time_elapsed | 12659 |\n", "| total_timesteps | 4426048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.162 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4460 |\n", "| fps | 112 |\n", "| time_elapsed | 12695 |\n", "| total_timesteps | 4430468 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0603 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4464 |\n", "| fps | 112 |\n", "| time_elapsed | 12734 |\n", "| total_timesteps | 4435153 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0892 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4468 |\n", "| fps | 112 |\n", "| time_elapsed | 12773 |\n", "| total_timesteps | 4439905 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.238 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4472 |\n", "| fps | 112 |\n", "| time_elapsed | 12812 |\n", "| total_timesteps | 4444711 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0677 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4476 |\n", "| fps | 112 |\n", "| time_elapsed | 12850 |\n", "| total_timesteps | 4449369 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0522 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4480 |\n", "| fps | 112 |\n", "| time_elapsed | 12886 |\n", "| total_timesteps | 4453814 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.151 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 219 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4484 |\n", "| fps | 112 |\n", "| time_elapsed | 12913 |\n", "| total_timesteps | 4457108 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0978 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 214 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4488 |\n", "| fps | 112 |\n", "| time_elapsed | 12936 |\n", "| total_timesteps | 4459859 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4492 |\n", "| fps | 112 |\n", "| time_elapsed | 12968 |\n", "| total_timesteps | 4463729 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0981 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 206 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4496 |\n", "| fps | 112 |\n", "| time_elapsed | 13004 |\n", "| total_timesteps | 4468138 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4500 |\n", "| fps | 112 |\n", "| time_elapsed | 13037 |\n", "| total_timesteps | 4472184 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0693 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 212 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4504 |\n", "| fps | 112 |\n", "| time_elapsed | 13078 |\n", "| total_timesteps | 4477178 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 212 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4508 |\n", "| fps | 112 |\n", "| time_elapsed | 13113 |\n", "| total_timesteps | 4481382 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0498 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4512 |\n", "| fps | 112 |\n", "| time_elapsed | 13150 |\n", "| total_timesteps | 4486008 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0818 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 217 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4516 |\n", "| fps | 113 |\n", "| time_elapsed | 13187 |\n", "| total_timesteps | 4490476 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.271 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 214 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4520 |\n", "| fps | 113 |\n", "| time_elapsed | 13221 |\n", "| total_timesteps | 4494638 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4524 |\n", "| fps | 113 |\n", "| time_elapsed | 13256 |\n", "| total_timesteps | 4498933 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0756 |\n", "----------------------------------\n", "Eval num_timesteps=4500000, episode_reward=218.00 +/- 135.33\n", "Episode length: 1142.60 +/- 208.50\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.14e+03 |\n", "| mean_reward | 218 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0991 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "bda94f2d", "metadata": { "execution": { "iopub.execute_input": "2024-05-11T01:41:04.663130Z", "iopub.status.busy": "2024-05-11T01:41:04.662323Z", "iopub.status.idle": "2024-05-11T01:41:57.585374Z", "shell.execute_reply": "2024-05-11T01:41:57.584165Z" }, "papermill": { "duration": 52.966023, "end_time": "2024-05-11T01:41:57.588327", "exception": false, "start_time": "2024-05-11T01:41:04.622304", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 176858961, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 13561.860896, "end_time": "2024-05-11T01:42:00.659070", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-10T21:55:58.798174", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }