{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f35cdef0", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T17:59:29.851996Z", "iopub.status.busy": "2024-05-09T17:59:29.851730Z", "iopub.status.idle": "2024-05-09T18:00:30.414258Z", "shell.execute_reply": "2024-05-09T18:00:30.412981Z" }, "papermill": { "duration": 60.569531, "end_time": "2024-05-09T18:00:30.416976", "exception": false, "start_time": "2024-05-09T17:59:29.847445", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "91b98e95", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:30.425167Z", "iopub.status.busy": "2024-05-09T18:00:30.424863Z", "iopub.status.idle": "2024-05-09T18:00:46.167563Z", "shell.execute_reply": "2024-05-09T18:00:46.166782Z" }, "papermill": { "duration": 15.74916, "end_time": "2024-05-09T18:00:46.169847", "exception": false, "start_time": "2024-05-09T18:00:30.420687", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-09 18:00:36.767705: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-09 18:00:36.767830: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-09 18:00:36.891074: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in tenths\n", "EVAL_CALLBACK_FREQ = 150_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "VIDEO_CALLBACK_FREQ = 375_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_500_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "BUFFER_SIZE = 60_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.0002\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.1\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000" ] }, { "cell_type": "code", "execution_count": 3, "id": "a1428943", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:46.178664Z", "iopub.status.busy": "2024-05-09T18:00:46.178125Z", "iopub.status.idle": "2024-05-09T18:00:46.188483Z", "shell.execute_reply": "2024-05-09T18:00:46.187683Z" }, "papermill": { "duration": 0.016817, "end_time": "2024-05-09T18:00:46.190322", "exception": false, "start_time": "2024-05-09T18:00:46.173505", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "0233aabc", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:46.198096Z", "iopub.status.busy": "2024-05-09T18:00:46.197551Z", "iopub.status.idle": "2024-05-09T18:00:46.208900Z", "shell.execute_reply": "2024-05-09T18:00:46.208068Z" }, "papermill": { "duration": 0.017496, "end_time": "2024-05-09T18:00:46.210939", "exception": false, "start_time": "2024-05-09T18:00:46.193443", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "73547a8c", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:46.218546Z", "iopub.status.busy": "2024-05-09T18:00:46.218198Z", "iopub.status.idle": "2024-05-09T18:00:46.233390Z", "shell.execute_reply": "2024-05-09T18:00:46.232561Z" }, "papermill": { "duration": 0.021169, "end_time": "2024-05-09T18:00:46.235171", "exception": false, "start_time": "2024-05-09T18:00:46.214002", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "6646d20b", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:46.242691Z", "iopub.status.busy": "2024-05-09T18:00:46.242160Z", "iopub.status.idle": "2024-05-09T18:00:48.261901Z", "shell.execute_reply": "2024-05-09T18:00:48.261077Z" }, "papermill": { "duration": 2.026055, "end_time": "2024-05-09T18:00:48.264373", "exception": false, "start_time": "2024-05-09T18:00:46.238318", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "model = DQN(\n", " \"CnnPolicy\",\n", " train_env,\n", " verbose=1,\n", " buffer_size=BUFFER_SIZE,\n", " exploration_fraction = EXPLORATION_FRACTION,\n", " batch_size=BATCH_SIZE,\n", " exploration_final_eps=FINAL_EPSILON,\n", " gamma=GAMMA,\n", " learning_starts=LEARNING_STARTS,\n", " learning_rate=LEARNING_RATE,\n", " target_update_interval=TARGET_UPDATE_INTERVAL,\n", " tensorboard_log=\"./\",\n", " )" ] }, { "cell_type": "code", "execution_count": 7, "id": "4947ec60", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:48.273307Z", "iopub.status.busy": "2024-05-09T18:00:48.272717Z", "iopub.status.idle": "2024-05-09T18:00:48.278547Z", "shell.execute_reply": "2024-05-09T18:00:48.277676Z" }, "papermill": { "duration": 0.012273, "end_time": "2024-05-09T18:00:48.280346", "exception": false, "start_time": "2024-05-09T18:00:48.268073", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "10f3f9d3", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T18:00:48.288666Z", "iopub.status.busy": "2024-05-09T18:00:48.288031Z", "iopub.status.idle": "2024-05-09T21:21:21.979504Z", "shell.execute_reply": "2024-05-09T21:21:21.978524Z" }, "papermill": { "duration": 12033.698017, "end_time": "2024-05-09T21:21:21.981863", "exception": false, "start_time": "2024-05-09T18:00:48.283846", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 375 |\n", "| ep_rew_mean | 13 |\n", "| exploration_rate | 0.997 |\n", "| time/ | |\n", "| episodes | 4 |\n", "| fps | 658 |\n", "| time_elapsed | 2 |\n", "| total_timesteps | 1500 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 421 |\n", "| ep_rew_mean | 15.6 |\n", "| exploration_rate | 0.993 |\n", "| time/ | |\n", "| episodes | 8 |\n", "| fps | 670 |\n", "| time_elapsed | 5 |\n", "| total_timesteps | 3368 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 426 |\n", "| ep_rew_mean | 15.2 |\n", "| exploration_rate | 0.99 |\n", "| time/ | |\n", "| episodes | 12 |\n", "| fps | 675 |\n", "| time_elapsed | 7 |\n", "| total_timesteps | 5114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 439 |\n", "| ep_rew_mean | 16.4 |\n", "| exploration_rate | 0.986 |\n", "| time/ | |\n", "| episodes | 16 |\n", "| fps | 677 |\n", "| time_elapsed | 10 |\n", "| total_timesteps | 7018 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 434 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.983 |\n", "| time/ | |\n", "| episodes | 20 |\n", "| fps | 680 |\n", "| time_elapsed | 12 |\n", "| total_timesteps | 8676 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 432 |\n", "| ep_rew_mean | 15.9 |\n", "| exploration_rate | 0.979 |\n", "| time/ | |\n", "| episodes | 24 |\n", "| fps | 676 |\n", "| time_elapsed | 15 |\n", "| total_timesteps | 10372 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 428 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.976 |\n", "| time/ | |\n", "| episodes | 28 |\n", "| fps | 674 |\n", "| time_elapsed | 17 |\n", "| total_timesteps | 11984 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 427 |\n", "| ep_rew_mean | 15.1 |\n", "| exploration_rate | 0.973 |\n", "| time/ | |\n", "| episodes | 32 |\n", "| fps | 676 |\n", "| time_elapsed | 20 |\n", "| total_timesteps | 13650 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 423 |\n", "| ep_rew_mean | 15.2 |\n", "| exploration_rate | 0.97 |\n", "| time/ | |\n", "| episodes | 36 |\n", "| fps | 677 |\n", "| time_elapsed | 22 |\n", "| total_timesteps | 15228 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 425 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.966 |\n", "| time/ | |\n", "| episodes | 40 |\n", "| fps | 678 |\n", "| time_elapsed | 25 |\n", "| total_timesteps | 16992 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 425 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.963 |\n", "| time/ | |\n", "| episodes | 44 |\n", "| fps | 678 |\n", "| time_elapsed | 27 |\n", "| total_timesteps | 18716 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 427 |\n", "| ep_rew_mean | 15.9 |\n", "| exploration_rate | 0.959 |\n", "| time/ | |\n", "| episodes | 48 |\n", "| fps | 679 |\n", "| time_elapsed | 30 |\n", "| total_timesteps | 20492 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 425 |\n", "| ep_rew_mean | 15.8 |\n", "| exploration_rate | 0.956 |\n", "| time/ | |\n", "| episodes | 52 |\n", "| fps | 679 |\n", "| time_elapsed | 32 |\n", "| total_timesteps | 22080 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 426 |\n", "| ep_rew_mean | 16.9 |\n", "| exploration_rate | 0.952 |\n", "| time/ | |\n", "| episodes | 56 |\n", "| fps | 680 |\n", "| time_elapsed | 35 |\n", "| total_timesteps | 23878 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 424 |\n", "| ep_rew_mean | 16.5 |\n", "| exploration_rate | 0.949 |\n", "| time/ | |\n", "| episodes | 60 |\n", "| fps | 680 |\n", "| time_elapsed | 37 |\n", "| total_timesteps | 25442 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 423 |\n", "| ep_rew_mean | 16.4 |\n", "| exploration_rate | 0.946 |\n", "| time/ | |\n", "| episodes | 64 |\n", "| fps | 679 |\n", "| time_elapsed | 39 |\n", "| total_timesteps | 27076 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 422 |\n", "| ep_rew_mean | 16.2 |\n", "| exploration_rate | 0.943 |\n", "| time/ | |\n", "| episodes | 68 |\n", "| fps | 680 |\n", "| time_elapsed | 42 |\n", "| total_timesteps | 28728 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 422 |\n", "| ep_rew_mean | 16 |\n", "| exploration_rate | 0.939 |\n", "| time/ | |\n", "| episodes | 72 |\n", "| fps | 680 |\n", "| time_elapsed | 44 |\n", "| total_timesteps | 30374 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 423 |\n", "| ep_rew_mean | 15.9 |\n", "| exploration_rate | 0.936 |\n", "| time/ | |\n", "| episodes | 76 |\n", "| fps | 681 |\n", "| time_elapsed | 47 |\n", "| total_timesteps | 32126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 421 |\n", "| ep_rew_mean | 15.6 |\n", "| exploration_rate | 0.933 |\n", "| time/ | |\n", "| episodes | 80 |\n", "| fps | 680 |\n", "| time_elapsed | 49 |\n", "| total_timesteps | 33692 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 422 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.929 |\n", "| time/ | |\n", "| episodes | 84 |\n", "| fps | 681 |\n", "| time_elapsed | 52 |\n", "| total_timesteps | 35449 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 420 |\n", "| ep_rew_mean | 15.4 |\n", "| exploration_rate | 0.926 |\n", "| time/ | |\n", "| episodes | 88 |\n", "| fps | 682 |\n", "| time_elapsed | 54 |\n", "| total_timesteps | 36931 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 426 |\n", "| ep_rew_mean | 15.8 |\n", "| exploration_rate | 0.922 |\n", "| time/ | |\n", "| episodes | 92 |\n", "| fps | 682 |\n", "| time_elapsed | 57 |\n", "| total_timesteps | 39155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 430 |\n", "| ep_rew_mean | 15.6 |\n", "| exploration_rate | 0.917 |\n", "| time/ | |\n", "| episodes | 96 |\n", "| fps | 683 |\n", "| time_elapsed | 60 |\n", "| total_timesteps | 41293 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 429 |\n", "| ep_rew_mean | 15.4 |\n", "| exploration_rate | 0.914 |\n", "| time/ | |\n", "| episodes | 100 |\n", "| fps | 683 |\n", "| time_elapsed | 62 |\n", "| total_timesteps | 42863 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 429 |\n", "| ep_rew_mean | 15.3 |\n", "| exploration_rate | 0.911 |\n", "| time/ | |\n", "| episodes | 104 |\n", "| fps | 683 |\n", "| time_elapsed | 64 |\n", "| total_timesteps | 44367 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 428 |\n", "| ep_rew_mean | 15.4 |\n", "| exploration_rate | 0.908 |\n", "| time/ | |\n", "| episodes | 108 |\n", "| fps | 683 |\n", "| time_elapsed | 67 |\n", "| total_timesteps | 46169 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 430 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.904 |\n", "| time/ | |\n", "| episodes | 112 |\n", "| fps | 683 |\n", "| time_elapsed | 70 |\n", "| total_timesteps | 48081 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 429 |\n", "| ep_rew_mean | 15.4 |\n", "| exploration_rate | 0.9 |\n", "| time/ | |\n", "| episodes | 116 |\n", "| fps | 683 |\n", "| time_elapsed | 73 |\n", "| total_timesteps | 49945 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 429 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.897 |\n", "| time/ | |\n", "| episodes | 120 |\n", "| fps | 611 |\n", "| time_elapsed | 84 |\n", "| total_timesteps | 51551 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0224 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 428 |\n", "| ep_rew_mean | 15.3 |\n", "| exploration_rate | 0.894 |\n", "| time/ | |\n", "| episodes | 124 |\n", "| fps | 563 |\n", "| time_elapsed | 94 |\n", "| total_timesteps | 53135 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0156 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 430 |\n", "| ep_rew_mean | 15.4 |\n", "| exploration_rate | 0.89 |\n", "| time/ | |\n", "| episodes | 128 |\n", "| fps | 517 |\n", "| time_elapsed | 106 |\n", "| total_timesteps | 54991 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0195 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 430 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.887 |\n", "| time/ | |\n", "| episodes | 132 |\n", "| fps | 484 |\n", "| time_elapsed | 116 |\n", "| total_timesteps | 56619 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0079 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 432 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.883 |\n", "| time/ | |\n", "| episodes | 136 |\n", "| fps | 453 |\n", "| time_elapsed | 128 |\n", "| total_timesteps | 58441 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.00742 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 434 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.879 |\n", "| time/ | |\n", "| episodes | 140 |\n", "| fps | 426 |\n", "| time_elapsed | 141 |\n", "| total_timesteps | 60409 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0198 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 435 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.876 |\n", "| time/ | |\n", "| episodes | 144 |\n", "| fps | 405 |\n", "| time_elapsed | 153 |\n", "| total_timesteps | 62195 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 434 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.872 |\n", "| time/ | |\n", "| episodes | 148 |\n", "| fps | 388 |\n", "| time_elapsed | 164 |\n", "| total_timesteps | 63845 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 435 |\n", "| ep_rew_mean | 15.6 |\n", "| exploration_rate | 0.869 |\n", "| time/ | |\n", "| episodes | 152 |\n", "| fps | 373 |\n", "| time_elapsed | 175 |\n", "| total_timesteps | 65545 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0195 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 435 |\n", "| ep_rew_mean | 15.3 |\n", "| exploration_rate | 0.865 |\n", "| time/ | |\n", "| episodes | 156 |\n", "| fps | 359 |\n", "| time_elapsed | 187 |\n", "| total_timesteps | 67375 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0504 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 436 |\n", "| ep_rew_mean | 15.5 |\n", "| exploration_rate | 0.862 |\n", "| time/ | |\n", "| episodes | 160 |\n", "| fps | 347 |\n", "| time_elapsed | 198 |\n", "| total_timesteps | 69047 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0271 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 438 |\n", "| ep_rew_mean | 15.7 |\n", "| exploration_rate | 0.858 |\n", "| time/ | |\n", "| episodes | 164 |\n", "| fps | 336 |\n", "| time_elapsed | 210 |\n", "| total_timesteps | 70873 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0203 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 438 |\n", "| ep_rew_mean | 15.8 |\n", "| exploration_rate | 0.855 |\n", "| time/ | |\n", "| episodes | 168 |\n", "| fps | 327 |\n", "| time_elapsed | 221 |\n", "| total_timesteps | 72511 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0854 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 439 |\n", "| ep_rew_mean | 16 |\n", "| exploration_rate | 0.851 |\n", "| time/ | |\n", "| episodes | 172 |\n", "| fps | 318 |\n", "| time_elapsed | 233 |\n", "| total_timesteps | 74293 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 439 |\n", "| ep_rew_mean | 16.2 |\n", "| exploration_rate | 0.848 |\n", "| time/ | |\n", "| episodes | 176 |\n", "| fps | 310 |\n", "| time_elapsed | 244 |\n", "| total_timesteps | 76045 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.108 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 440 |\n", "| ep_rew_mean | 16.5 |\n", "| exploration_rate | 0.845 |\n", "| time/ | |\n", "| episodes | 180 |\n", "| fps | 303 |\n", "| time_elapsed | 255 |\n", "| total_timesteps | 77741 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0668 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 441 |\n", "| ep_rew_mean | 16.6 |\n", "| exploration_rate | 0.841 |\n", "| time/ | |\n", "| episodes | 184 |\n", "| fps | 296 |\n", "| time_elapsed | 267 |\n", "| total_timesteps | 79529 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0254 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 447 |\n", "| ep_rew_mean | 17.2 |\n", "| exploration_rate | 0.837 |\n", "| time/ | |\n", "| episodes | 188 |\n", "| fps | 289 |\n", "| time_elapsed | 281 |\n", "| total_timesteps | 81669 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0405 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 444 |\n", "| ep_rew_mean | 17 |\n", "| exploration_rate | 0.833 |\n", "| time/ | |\n", "| episodes | 192 |\n", "| fps | 283 |\n", "| time_elapsed | 294 |\n", "| total_timesteps | 83593 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0573 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 440 |\n", "| ep_rew_mean | 17.3 |\n", "| exploration_rate | 0.829 |\n", "| time/ | |\n", "| episodes | 196 |\n", "| fps | 278 |\n", "| time_elapsed | 306 |\n", "| total_timesteps | 85285 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.133 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 444 |\n", "| ep_rew_mean | 17.9 |\n", "| exploration_rate | 0.825 |\n", "| time/ | |\n", "| episodes | 200 |\n", "| fps | 273 |\n", "| time_elapsed | 319 |\n", "| total_timesteps | 87271 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0529 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 446 |\n", "| ep_rew_mean | 18.3 |\n", "| exploration_rate | 0.822 |\n", "| time/ | |\n", "| episodes | 204 |\n", "| fps | 268 |\n", "| time_elapsed | 330 |\n", "| total_timesteps | 88951 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0403 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 445 |\n", "| ep_rew_mean | 18.2 |\n", "| exploration_rate | 0.819 |\n", "| time/ | |\n", "| episodes | 208 |\n", "| fps | 264 |\n", "| time_elapsed | 342 |\n", "| total_timesteps | 90691 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 443 |\n", "| ep_rew_mean | 18.2 |\n", "| exploration_rate | 0.815 |\n", "| time/ | |\n", "| episodes | 212 |\n", "| fps | 260 |\n", "| time_elapsed | 354 |\n", "| total_timesteps | 92363 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0602 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 441 |\n", "| ep_rew_mean | 18.2 |\n", "| exploration_rate | 0.812 |\n", "| time/ | |\n", "| episodes | 216 |\n", "| fps | 257 |\n", "| time_elapsed | 365 |\n", "| total_timesteps | 94023 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.414 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 444 |\n", "| ep_rew_mean | 18.3 |\n", "| exploration_rate | 0.808 |\n", "| time/ | |\n", "| episodes | 220 |\n", "| fps | 253 |\n", "| time_elapsed | 378 |\n", "| total_timesteps | 95963 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.157 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 444 |\n", "| ep_rew_mean | 18.4 |\n", "| exploration_rate | 0.805 |\n", "| time/ | |\n", "| episodes | 224 |\n", "| fps | 250 |\n", "| time_elapsed | 389 |\n", "| total_timesteps | 97559 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.235 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 447 |\n", "| ep_rew_mean | 18.6 |\n", "| exploration_rate | 0.801 |\n", "| time/ | |\n", "| episodes | 228 |\n", "| fps | 246 |\n", "| time_elapsed | 403 |\n", "| total_timesteps | 99642 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 447 |\n", "| ep_rew_mean | 18.7 |\n", "| exploration_rate | 0.797 |\n", "| time/ | |\n", "| episodes | 232 |\n", "| fps | 244 |\n", "| time_elapsed | 415 |\n", "| total_timesteps | 101354 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.159 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 449 |\n", "| ep_rew_mean | 18.8 |\n", "| exploration_rate | 0.793 |\n", "| time/ | |\n", "| episodes | 236 |\n", "| fps | 241 |\n", "| time_elapsed | 428 |\n", "| total_timesteps | 103302 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 443 |\n", "| ep_rew_mean | 18.7 |\n", "| exploration_rate | 0.79 |\n", "| time/ | |\n", "| episodes | 240 |\n", "| fps | 238 |\n", "| time_elapsed | 438 |\n", "| total_timesteps | 104758 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0463 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 444 |\n", "| ep_rew_mean | 18.7 |\n", "| exploration_rate | 0.787 |\n", "| time/ | |\n", "| episodes | 244 |\n", "| fps | 236 |\n", "| time_elapsed | 450 |\n", "| total_timesteps | 106554 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.216 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 450 |\n", "| ep_rew_mean | 19.2 |\n", "| exploration_rate | 0.782 |\n", "| time/ | |\n", "| episodes | 248 |\n", "| fps | 233 |\n", "| time_elapsed | 466 |\n", "| total_timesteps | 108824 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.421 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 450 |\n", "| ep_rew_mean | 19.1 |\n", "| exploration_rate | 0.779 |\n", "| time/ | |\n", "| episodes | 252 |\n", "| fps | 230 |\n", "| time_elapsed | 478 |\n", "| total_timesteps | 110594 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.268 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 450 |\n", "| ep_rew_mean | 18.9 |\n", "| exploration_rate | 0.775 |\n", "| time/ | |\n", "| episodes | 256 |\n", "| fps | 228 |\n", "| time_elapsed | 491 |\n", "| total_timesteps | 112354 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.41 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 452 |\n", "| ep_rew_mean | 19 |\n", "| exploration_rate | 0.772 |\n", "| time/ | |\n", "| episodes | 260 |\n", "| fps | 226 |\n", "| time_elapsed | 504 |\n", "| total_timesteps | 114236 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 452 |\n", "| ep_rew_mean | 18.9 |\n", "| exploration_rate | 0.768 |\n", "| time/ | |\n", "| episodes | 264 |\n", "| fps | 224 |\n", "| time_elapsed | 516 |\n", "| total_timesteps | 116026 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0686 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 454 |\n", "| ep_rew_mean | 19.1 |\n", "| exploration_rate | 0.764 |\n", "| time/ | |\n", "| episodes | 268 |\n", "| fps | 222 |\n", "| time_elapsed | 529 |\n", "| total_timesteps | 117894 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 455 |\n", "| ep_rew_mean | 19.1 |\n", "| exploration_rate | 0.76 |\n", "| time/ | |\n", "| episodes | 272 |\n", "| fps | 220 |\n", "| time_elapsed | 542 |\n", "| total_timesteps | 119784 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 458 |\n", "| ep_rew_mean | 19.5 |\n", "| exploration_rate | 0.756 |\n", "| time/ | |\n", "| episodes | 276 |\n", "| fps | 218 |\n", "| time_elapsed | 557 |\n", "| total_timesteps | 121860 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.191 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 459 |\n", "| ep_rew_mean | 19.7 |\n", "| exploration_rate | 0.753 |\n", "| time/ | |\n", "| episodes | 280 |\n", "| fps | 216 |\n", "| time_elapsed | 569 |\n", "| total_timesteps | 123620 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.137 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 458 |\n", "| ep_rew_mean | 19.5 |\n", "| exploration_rate | 0.749 |\n", "| time/ | |\n", "| episodes | 284 |\n", "| fps | 215 |\n", "| time_elapsed | 582 |\n", "| total_timesteps | 125376 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0943 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 455 |\n", "| ep_rew_mean | 19.2 |\n", "| exploration_rate | 0.746 |\n", "| time/ | |\n", "| episodes | 288 |\n", "| fps | 213 |\n", "| time_elapsed | 594 |\n", "| total_timesteps | 127170 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0892 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 456 |\n", "| ep_rew_mean | 19.5 |\n", "| exploration_rate | 0.742 |\n", "| time/ | |\n", "| episodes | 292 |\n", "| fps | 212 |\n", "| time_elapsed | 609 |\n", "| total_timesteps | 129184 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.281 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 460 |\n", "| ep_rew_mean | 19.9 |\n", "| exploration_rate | 0.737 |\n", "| time/ | |\n", "| episodes | 296 |\n", "| fps | 210 |\n", "| time_elapsed | 624 |\n", "| total_timesteps | 131276 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.163 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 458 |\n", "| ep_rew_mean | 19.6 |\n", "| exploration_rate | 0.734 |\n", "| time/ | |\n", "| episodes | 300 |\n", "| fps | 208 |\n", "| time_elapsed | 636 |\n", "| total_timesteps | 133072 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 460 |\n", "| ep_rew_mean | 19.6 |\n", "| exploration_rate | 0.73 |\n", "| time/ | |\n", "| episodes | 304 |\n", "| fps | 207 |\n", "| time_elapsed | 650 |\n", "| total_timesteps | 134942 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.439 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 458 |\n", "| ep_rew_mean | 19.4 |\n", "| exploration_rate | 0.727 |\n", "| time/ | |\n", "| episodes | 308 |\n", "| fps | 206 |\n", "| time_elapsed | 660 |\n", "| total_timesteps | 136486 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.37 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 460 |\n", "| ep_rew_mean | 19.7 |\n", "| exploration_rate | 0.723 |\n", "| time/ | |\n", "| episodes | 312 |\n", "| fps | 205 |\n", "| time_elapsed | 674 |\n", "| total_timesteps | 138342 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.522 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 466 |\n", "| ep_rew_mean | 20.1 |\n", "| exploration_rate | 0.719 |\n", "| time/ | |\n", "| episodes | 316 |\n", "| fps | 203 |\n", "| time_elapsed | 690 |\n", "| total_timesteps | 140582 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.524 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 466 |\n", "| ep_rew_mean | 20.2 |\n", "| exploration_rate | 0.715 |\n", "| time/ | |\n", "| episodes | 320 |\n", "| fps | 202 |\n", "| time_elapsed | 704 |\n", "| total_timesteps | 142574 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.165 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 468 |\n", "| ep_rew_mean | 20.6 |\n", "| exploration_rate | 0.711 |\n", "| time/ | |\n", "| episodes | 324 |\n", "| fps | 201 |\n", "| time_elapsed | 717 |\n", "| total_timesteps | 144404 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.166 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 466 |\n", "| ep_rew_mean | 20.6 |\n", "| exploration_rate | 0.707 |\n", "| time/ | |\n", "| episodes | 328 |\n", "| fps | 200 |\n", "| time_elapsed | 731 |\n", "| total_timesteps | 146288 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.356 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 469 |\n", "| ep_rew_mean | 20.6 |\n", "| exploration_rate | 0.704 |\n", "| time/ | |\n", "| episodes | 332 |\n", "| fps | 198 |\n", "| time_elapsed | 745 |\n", "| total_timesteps | 148242 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.471 |\n", "----------------------------------\n", "Eval num_timesteps=150000, episode_reward=30.80 +/- 4.31\n", "Episode length: 490.80 +/- 49.64\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 491 |\n", "| mean_reward | 30.8 |\n", "| rollout/ | |\n", "| exploration_rate | 0.7 |\n", "| time/ | |\n", "| total_timesteps | 150000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.465 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 469 |\n", "| ep_rew_mean | 20.6 |\n", "| exploration_rate | 0.7 |\n", "| time/ | |\n", "| episodes | 336 |\n", "| fps | 194 |\n", "| time_elapsed | 771 |\n", "| total_timesteps | 150186 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 471 |\n", "| ep_rew_mean | 20.8 |\n", "| exploration_rate | 0.696 |\n", "| time/ | |\n", "| episodes | 340 |\n", "| fps | 193 |\n", "| time_elapsed | 783 |\n", "| total_timesteps | 151846 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.291 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 469 |\n", "| ep_rew_mean | 20.9 |\n", "| exploration_rate | 0.693 |\n", "| time/ | |\n", "| episodes | 344 |\n", "| fps | 192 |\n", "| time_elapsed | 795 |\n", "| total_timesteps | 153466 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.232 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 466 |\n", "| ep_rew_mean | 20.5 |\n", "| exploration_rate | 0.689 |\n", "| time/ | |\n", "| episodes | 348 |\n", "| fps | 192 |\n", "| time_elapsed | 809 |\n", "| total_timesteps | 155432 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.459 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 469 |\n", "| ep_rew_mean | 20.8 |\n", "| exploration_rate | 0.685 |\n", "| time/ | |\n", "| episodes | 352 |\n", "| fps | 191 |\n", "| time_elapsed | 823 |\n", "| total_timesteps | 157462 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.249 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 471 |\n", "| ep_rew_mean | 21 |\n", "| exploration_rate | 0.681 |\n", "| time/ | |\n", "| episodes | 356 |\n", "| fps | 190 |\n", "| time_elapsed | 838 |\n", "| total_timesteps | 159410 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.299 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 21.6 |\n", "| exploration_rate | 0.677 |\n", "| time/ | |\n", "| episodes | 360 |\n", "| fps | 189 |\n", "| time_elapsed | 852 |\n", "| total_timesteps | 161434 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.994 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 471 |\n", "| ep_rew_mean | 21.8 |\n", "| exploration_rate | 0.674 |\n", "| time/ | |\n", "| episodes | 364 |\n", "| fps | 188 |\n", "| time_elapsed | 864 |\n", "| total_timesteps | 163144 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 478 |\n", "| ep_rew_mean | 22.5 |\n", "| exploration_rate | 0.669 |\n", "| time/ | |\n", "| episodes | 368 |\n", "| fps | 187 |\n", "| time_elapsed | 883 |\n", "| total_timesteps | 165654 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 477 |\n", "| ep_rew_mean | 22.6 |\n", "| exploration_rate | 0.665 |\n", "| time/ | |\n", "| episodes | 372 |\n", "| fps | 186 |\n", "| time_elapsed | 895 |\n", "| total_timesteps | 167436 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.374 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 474 |\n", "| ep_rew_mean | 22.4 |\n", "| exploration_rate | 0.661 |\n", "| time/ | |\n", "| episodes | 376 |\n", "| fps | 186 |\n", "| time_elapsed | 909 |\n", "| total_timesteps | 169278 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.627 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 477 |\n", "| ep_rew_mean | 22.7 |\n", "| exploration_rate | 0.657 |\n", "| time/ | |\n", "| episodes | 380 |\n", "| fps | 185 |\n", "| time_elapsed | 924 |\n", "| total_timesteps | 171346 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.318 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 479 |\n", "| ep_rew_mean | 23 |\n", "| exploration_rate | 0.654 |\n", "| time/ | |\n", "| episodes | 384 |\n", "| fps | 184 |\n", "| time_elapsed | 937 |\n", "| total_timesteps | 173230 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.875 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 479 |\n", "| ep_rew_mean | 23.3 |\n", "| exploration_rate | 0.65 |\n", "| time/ | |\n", "| episodes | 388 |\n", "| fps | 184 |\n", "| time_elapsed | 951 |\n", "| total_timesteps | 175106 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 476 |\n", "| ep_rew_mean | 23.1 |\n", "| exploration_rate | 0.646 |\n", "| time/ | |\n", "| episodes | 392 |\n", "| fps | 183 |\n", "| time_elapsed | 963 |\n", "| total_timesteps | 176772 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.73 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 22.8 |\n", "| exploration_rate | 0.643 |\n", "| time/ | |\n", "| episodes | 396 |\n", "| fps | 182 |\n", "| time_elapsed | 976 |\n", "| total_timesteps | 178494 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.157 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 23.1 |\n", "| exploration_rate | 0.639 |\n", "| time/ | |\n", "| episodes | 400 |\n", "| fps | 182 |\n", "| time_elapsed | 989 |\n", "| total_timesteps | 180292 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.309 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 474 |\n", "| ep_rew_mean | 23.5 |\n", "| exploration_rate | 0.635 |\n", "| time/ | |\n", "| episodes | 404 |\n", "| fps | 181 |\n", "| time_elapsed | 1004 |\n", "| total_timesteps | 182304 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.383 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 477 |\n", "| ep_rew_mean | 23.8 |\n", "| exploration_rate | 0.632 |\n", "| time/ | |\n", "| episodes | 408 |\n", "| fps | 180 |\n", "| time_elapsed | 1018 |\n", "| total_timesteps | 184180 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.38 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 476 |\n", "| ep_rew_mean | 23.8 |\n", "| exploration_rate | 0.628 |\n", "| time/ | |\n", "| episodes | 412 |\n", "| fps | 180 |\n", "| time_elapsed | 1031 |\n", "| total_timesteps | 185964 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.224 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 23.6 |\n", "| exploration_rate | 0.625 |\n", "| time/ | |\n", "| episodes | 416 |\n", "| fps | 179 |\n", "| time_elapsed | 1044 |\n", "| total_timesteps | 187738 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.736 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 23.8 |\n", "| exploration_rate | 0.62 |\n", "| time/ | |\n", "| episodes | 420 |\n", "| fps | 179 |\n", "| time_elapsed | 1059 |\n", "| total_timesteps | 189764 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.01 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 23.9 |\n", "| exploration_rate | 0.617 |\n", "| time/ | |\n", "| episodes | 424 |\n", "| fps | 178 |\n", "| time_elapsed | 1073 |\n", "| total_timesteps | 191648 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.591 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 473 |\n", "| ep_rew_mean | 24.2 |\n", "| exploration_rate | 0.613 |\n", "| time/ | |\n", "| episodes | 428 |\n", "| fps | 177 |\n", "| time_elapsed | 1088 |\n", "| total_timesteps | 193611 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.395 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 476 |\n", "| ep_rew_mean | 24.5 |\n", "| exploration_rate | 0.608 |\n", "| time/ | |\n", "| episodes | 432 |\n", "| fps | 177 |\n", "| time_elapsed | 1104 |\n", "| total_timesteps | 195833 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.972 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 478 |\n", "| ep_rew_mean | 24.9 |\n", "| exploration_rate | 0.604 |\n", "| time/ | |\n", "| episodes | 436 |\n", "| fps | 176 |\n", "| time_elapsed | 1120 |\n", "| total_timesteps | 198015 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 483 |\n", "| ep_rew_mean | 25.6 |\n", "| exploration_rate | 0.6 |\n", "| time/ | |\n", "| episodes | 440 |\n", "| fps | 176 |\n", "| time_elapsed | 1137 |\n", "| total_timesteps | 200185 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.719 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 487 |\n", "| ep_rew_mean | 25.9 |\n", "| exploration_rate | 0.596 |\n", "| time/ | |\n", "| episodes | 444 |\n", "| fps | 175 |\n", "| time_elapsed | 1152 |\n", "| total_timesteps | 202183 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.63 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 487 |\n", "| ep_rew_mean | 26.2 |\n", "| exploration_rate | 0.592 |\n", "| time/ | |\n", "| episodes | 448 |\n", "| fps | 174 |\n", "| time_elapsed | 1166 |\n", "| total_timesteps | 204113 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.712 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 487 |\n", "| ep_rew_mean | 26.2 |\n", "| exploration_rate | 0.588 |\n", "| time/ | |\n", "| episodes | 452 |\n", "| fps | 174 |\n", "| time_elapsed | 1181 |\n", "| total_timesteps | 206133 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.536 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 486 |\n", "| ep_rew_mean | 26.1 |\n", "| exploration_rate | 0.584 |\n", "| time/ | |\n", "| episodes | 456 |\n", "| fps | 173 |\n", "| time_elapsed | 1195 |\n", "| total_timesteps | 208021 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.504 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 485 |\n", "| ep_rew_mean | 25.7 |\n", "| exploration_rate | 0.58 |\n", "| time/ | |\n", "| episodes | 460 |\n", "| fps | 173 |\n", "| time_elapsed | 1210 |\n", "| total_timesteps | 209971 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.625 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 489 |\n", "| ep_rew_mean | 25.9 |\n", "| exploration_rate | 0.576 |\n", "| time/ | |\n", "| episodes | 464 |\n", "| fps | 172 |\n", "| time_elapsed | 1226 |\n", "| total_timesteps | 212049 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.279 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 484 |\n", "| ep_rew_mean | 25.9 |\n", "| exploration_rate | 0.572 |\n", "| time/ | |\n", "| episodes | 468 |\n", "| fps | 172 |\n", "| time_elapsed | 1241 |\n", "| total_timesteps | 214006 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.769 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 491 |\n", "| ep_rew_mean | 26.5 |\n", "| exploration_rate | 0.567 |\n", "| time/ | |\n", "| episodes | 472 |\n", "| fps | 171 |\n", "| time_elapsed | 1260 |\n", "| total_timesteps | 216528 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 494 |\n", "| ep_rew_mean | 26.7 |\n", "| exploration_rate | 0.563 |\n", "| time/ | |\n", "| episodes | 476 |\n", "| fps | 171 |\n", "| time_elapsed | 1276 |\n", "| total_timesteps | 218678 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.845 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 492 |\n", "| ep_rew_mean | 26.5 |\n", "| exploration_rate | 0.559 |\n", "| time/ | |\n", "| episodes | 480 |\n", "| fps | 170 |\n", "| time_elapsed | 1290 |\n", "| total_timesteps | 220572 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.361 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 492 |\n", "| ep_rew_mean | 26.6 |\n", "| exploration_rate | 0.555 |\n", "| time/ | |\n", "| episodes | 484 |\n", "| fps | 170 |\n", "| time_elapsed | 1305 |\n", "| total_timesteps | 222448 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.719 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 500 |\n", "| ep_rew_mean | 27.7 |\n", "| exploration_rate | 0.55 |\n", "| time/ | |\n", "| episodes | 488 |\n", "| fps | 169 |\n", "| time_elapsed | 1325 |\n", "| total_timesteps | 225124 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.408 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 502 |\n", "| ep_rew_mean | 27.9 |\n", "| exploration_rate | 0.546 |\n", "| time/ | |\n", "| episodes | 492 |\n", "| fps | 169 |\n", "| time_elapsed | 1339 |\n", "| total_timesteps | 226994 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 506 |\n", "| ep_rew_mean | 27.9 |\n", "| exploration_rate | 0.542 |\n", "| time/ | |\n", "| episodes | 496 |\n", "| fps | 168 |\n", "| time_elapsed | 1355 |\n", "| total_timesteps | 229094 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.13 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 510 |\n", "| ep_rew_mean | 28.5 |\n", "| exploration_rate | 0.537 |\n", "| time/ | |\n", "| episodes | 500 |\n", "| fps | 168 |\n", "| time_elapsed | 1372 |\n", "| total_timesteps | 231316 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.91 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 507 |\n", "| ep_rew_mean | 28 |\n", "| exploration_rate | 0.534 |\n", "| time/ | |\n", "| episodes | 504 |\n", "| fps | 168 |\n", "| time_elapsed | 1385 |\n", "| total_timesteps | 232988 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 509 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.53 |\n", "| time/ | |\n", "| episodes | 508 |\n", "| fps | 167 |\n", "| time_elapsed | 1401 |\n", "| total_timesteps | 235128 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.482 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.526 |\n", "| time/ | |\n", "| episodes | 512 |\n", "| fps | 167 |\n", "| time_elapsed | 1417 |\n", "| total_timesteps | 237172 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.503 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 516 |\n", "| ep_rew_mean | 28.9 |\n", "| exploration_rate | 0.521 |\n", "| time/ | |\n", "| episodes | 516 |\n", "| fps | 166 |\n", "| time_elapsed | 1434 |\n", "| total_timesteps | 239354 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.782 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 513 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.518 |\n", "| time/ | |\n", "| episodes | 520 |\n", "| fps | 166 |\n", "| time_elapsed | 1447 |\n", "| total_timesteps | 241112 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.78 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 516 |\n", "| ep_rew_mean | 28.7 |\n", "| exploration_rate | 0.513 |\n", "| time/ | |\n", "| episodes | 524 |\n", "| fps | 166 |\n", "| time_elapsed | 1464 |\n", "| total_timesteps | 243260 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 519 |\n", "| ep_rew_mean | 28.8 |\n", "| exploration_rate | 0.509 |\n", "| time/ | |\n", "| episodes | 528 |\n", "| fps | 165 |\n", "| time_elapsed | 1481 |\n", "| total_timesteps | 245494 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.938 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 517 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.505 |\n", "| time/ | |\n", "| episodes | 532 |\n", "| fps | 165 |\n", "| time_elapsed | 1497 |\n", "| total_timesteps | 247519 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 516 |\n", "| ep_rew_mean | 28.3 |\n", "| exploration_rate | 0.501 |\n", "| time/ | |\n", "| episodes | 536 |\n", "| fps | 164 |\n", "| time_elapsed | 1513 |\n", "| total_timesteps | 249629 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.704 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 514 |\n", "| ep_rew_mean | 28.1 |\n", "| exploration_rate | 0.497 |\n", "| time/ | |\n", "| episodes | 540 |\n", "| fps | 164 |\n", "| time_elapsed | 1528 |\n", "| total_timesteps | 251575 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.442 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 513 |\n", "| ep_rew_mean | 27.9 |\n", "| exploration_rate | 0.493 |\n", "| time/ | |\n", "| episodes | 544 |\n", "| fps | 164 |\n", "| time_elapsed | 1543 |\n", "| total_timesteps | 253525 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.511 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 516 |\n", "| ep_rew_mean | 27.8 |\n", "| exploration_rate | 0.489 |\n", "| time/ | |\n", "| episodes | 548 |\n", "| fps | 163 |\n", "| time_elapsed | 1560 |\n", "| total_timesteps | 255685 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.653 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 517 |\n", "| ep_rew_mean | 28.1 |\n", "| exploration_rate | 0.484 |\n", "| time/ | |\n", "| episodes | 552 |\n", "| fps | 163 |\n", "| time_elapsed | 1577 |\n", "| total_timesteps | 257823 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 520 |\n", "| ep_rew_mean | 28.5 |\n", "| exploration_rate | 0.48 |\n", "| time/ | |\n", "| episodes | 556 |\n", "| fps | 163 |\n", "| time_elapsed | 1594 |\n", "| total_timesteps | 260027 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.881 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 522 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.476 |\n", "| time/ | |\n", "| episodes | 560 |\n", "| fps | 162 |\n", "| time_elapsed | 1611 |\n", "| total_timesteps | 262187 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.844 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 521 |\n", "| ep_rew_mean | 28.5 |\n", "| exploration_rate | 0.472 |\n", "| time/ | |\n", "| episodes | 564 |\n", "| fps | 162 |\n", "| time_elapsed | 1626 |\n", "| total_timesteps | 264165 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.52 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 519 |\n", "| ep_rew_mean | 27.8 |\n", "| exploration_rate | 0.468 |\n", "| time/ | |\n", "| episodes | 568 |\n", "| fps | 162 |\n", "| time_elapsed | 1640 |\n", "| total_timesteps | 265893 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.79 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 27 |\n", "| exploration_rate | 0.465 |\n", "| time/ | |\n", "| episodes | 572 |\n", "| fps | 161 |\n", "| time_elapsed | 1654 |\n", "| total_timesteps | 267687 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.535 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 509 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.461 |\n", "| time/ | |\n", "| episodes | 576 |\n", "| fps | 161 |\n", "| time_elapsed | 1669 |\n", "| total_timesteps | 269601 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.61 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 27.1 |\n", "| exploration_rate | 0.456 |\n", "| time/ | |\n", "| episodes | 580 |\n", "| fps | 161 |\n", "| time_elapsed | 1686 |\n", "| total_timesteps | 271793 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.45 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 516 |\n", "| ep_rew_mean | 27.2 |\n", "| exploration_rate | 0.452 |\n", "| time/ | |\n", "| episodes | 584 |\n", "| fps | 160 |\n", "| time_elapsed | 1703 |\n", "| total_timesteps | 274035 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.04 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 26.7 |\n", "| exploration_rate | 0.447 |\n", "| time/ | |\n", "| episodes | 588 |\n", "| fps | 160 |\n", "| time_elapsed | 1721 |\n", "| total_timesteps | 276331 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.923 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 513 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.443 |\n", "| time/ | |\n", "| episodes | 592 |\n", "| fps | 160 |\n", "| time_elapsed | 1736 |\n", "| total_timesteps | 278277 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.45 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.439 |\n", "| time/ | |\n", "| episodes | 596 |\n", "| fps | 159 |\n", "| time_elapsed | 1752 |\n", "| total_timesteps | 280273 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.566 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 510 |\n", "| ep_rew_mean | 26.1 |\n", "| exploration_rate | 0.435 |\n", "| time/ | |\n", "| episodes | 600 |\n", "| fps | 159 |\n", "| time_elapsed | 1768 |\n", "| total_timesteps | 282323 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 511 |\n", "| ep_rew_mean | 26.2 |\n", "| exploration_rate | 0.432 |\n", "| time/ | |\n", "| episodes | 604 |\n", "| fps | 159 |\n", "| time_elapsed | 1782 |\n", "| total_timesteps | 284085 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.82 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 509 |\n", "| ep_rew_mean | 25.6 |\n", "| exploration_rate | 0.428 |\n", "| time/ | |\n", "| episodes | 608 |\n", "| fps | 159 |\n", "| time_elapsed | 1797 |\n", "| total_timesteps | 286001 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.5 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 510 |\n", "| ep_rew_mean | 27 |\n", "| exploration_rate | 0.424 |\n", "| time/ | |\n", "| episodes | 612 |\n", "| fps | 158 |\n", "| time_elapsed | 1815 |\n", "| total_timesteps | 288191 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.93 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 512 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.419 |\n", "| time/ | |\n", "| episodes | 616 |\n", "| fps | 158 |\n", "| time_elapsed | 1833 |\n", "| total_timesteps | 290525 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.62 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 514 |\n", "| ep_rew_mean | 27.1 |\n", "| exploration_rate | 0.415 |\n", "| time/ | |\n", "| episodes | 620 |\n", "| fps | 158 |\n", "| time_elapsed | 1848 |\n", "| total_timesteps | 292485 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.906 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 517 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.41 |\n", "| time/ | |\n", "| episodes | 624 |\n", "| fps | 157 |\n", "| time_elapsed | 1868 |\n", "| total_timesteps | 294953 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.867 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 518 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.405 |\n", "| time/ | |\n", "| episodes | 628 |\n", "| fps | 157 |\n", "| time_elapsed | 1887 |\n", "| total_timesteps | 297265 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.84 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 517 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.401 |\n", "| time/ | |\n", "| episodes | 632 |\n", "| fps | 157 |\n", "| time_elapsed | 1902 |\n", "| total_timesteps | 299251 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.768 |\n", "----------------------------------\n", "Eval num_timesteps=300000, episode_reward=21.10 +/- 4.16\n", "Episode length: 571.80 +/- 77.82\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 572 |\n", "| mean_reward | 21.1 |\n", "| rollout/ | |\n", "| exploration_rate | 0.4 |\n", "| time/ | |\n", "| total_timesteps | 300000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.42 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 517 |\n", "| ep_rew_mean | 27.5 |\n", "| exploration_rate | 0.397 |\n", "| time/ | |\n", "| episodes | 636 |\n", "| fps | 155 |\n", "| time_elapsed | 1933 |\n", "| total_timesteps | 301355 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.637 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 519 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.393 |\n", "| time/ | |\n", "| episodes | 640 |\n", "| fps | 155 |\n", "| time_elapsed | 1950 |\n", "| total_timesteps | 303437 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.799 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 518 |\n", "| ep_rew_mean | 27.5 |\n", "| exploration_rate | 0.389 |\n", "| time/ | |\n", "| episodes | 644 |\n", "| fps | 155 |\n", "| time_elapsed | 1965 |\n", "| total_timesteps | 305371 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.94 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 521 |\n", "| ep_rew_mean | 27.9 |\n", "| exploration_rate | 0.384 |\n", "| time/ | |\n", "| episodes | 648 |\n", "| fps | 155 |\n", "| time_elapsed | 1985 |\n", "| total_timesteps | 307809 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.607 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 523 |\n", "| ep_rew_mean | 27.7 |\n", "| exploration_rate | 0.38 |\n", "| time/ | |\n", "| episodes | 652 |\n", "| fps | 154 |\n", "| time_elapsed | 2003 |\n", "| total_timesteps | 310141 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.79 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 523 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.375 |\n", "| time/ | |\n", "| episodes | 656 |\n", "| fps | 154 |\n", "| time_elapsed | 2021 |\n", "| total_timesteps | 312303 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.589 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 522 |\n", "| ep_rew_mean | 27.6 |\n", "| exploration_rate | 0.371 |\n", "| time/ | |\n", "| episodes | 660 |\n", "| fps | 154 |\n", "| time_elapsed | 2037 |\n", "| total_timesteps | 314397 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.716 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 526 |\n", "| ep_rew_mean | 27.9 |\n", "| exploration_rate | 0.366 |\n", "| time/ | |\n", "| episodes | 664 |\n", "| fps | 153 |\n", "| time_elapsed | 2057 |\n", "| total_timesteps | 316771 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.489 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 530 |\n", "| ep_rew_mean | 28.3 |\n", "| exploration_rate | 0.362 |\n", "| time/ | |\n", "| episodes | 668 |\n", "| fps | 153 |\n", "| time_elapsed | 2074 |\n", "| total_timesteps | 318901 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.729 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 534 |\n", "| ep_rew_mean | 28.7 |\n", "| exploration_rate | 0.358 |\n", "| time/ | |\n", "| episodes | 672 |\n", "| fps | 153 |\n", "| time_elapsed | 2091 |\n", "| total_timesteps | 321051 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 536 |\n", "| ep_rew_mean | 28.7 |\n", "| exploration_rate | 0.354 |\n", "| time/ | |\n", "| episodes | 676 |\n", "| fps | 153 |\n", "| time_elapsed | 2108 |\n", "| total_timesteps | 323181 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.595 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 536 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.349 |\n", "| time/ | |\n", "| episodes | 680 |\n", "| fps | 153 |\n", "| time_elapsed | 2126 |\n", "| total_timesteps | 325405 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.83 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 535 |\n", "| ep_rew_mean | 28.4 |\n", "| exploration_rate | 0.345 |\n", "| time/ | |\n", "| episodes | 684 |\n", "| fps | 152 |\n", "| time_elapsed | 2143 |\n", "| total_timesteps | 327515 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.667 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 533 |\n", "| ep_rew_mean | 27.8 |\n", "| exploration_rate | 0.341 |\n", "| time/ | |\n", "| episodes | 688 |\n", "| fps | 152 |\n", "| time_elapsed | 2160 |\n", "| total_timesteps | 329655 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 534 |\n", "| ep_rew_mean | 27.8 |\n", "| exploration_rate | 0.337 |\n", "| time/ | |\n", "| episodes | 692 |\n", "| fps | 152 |\n", "| time_elapsed | 2176 |\n", "| total_timesteps | 331649 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 538 |\n", "| ep_rew_mean | 28.1 |\n", "| exploration_rate | 0.332 |\n", "| time/ | |\n", "| episodes | 696 |\n", "| fps | 152 |\n", "| time_elapsed | 2196 |\n", "| total_timesteps | 334049 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.709 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 540 |\n", "| ep_rew_mean | 28.3 |\n", "| exploration_rate | 0.327 |\n", "| time/ | |\n", "| episodes | 700 |\n", "| fps | 151 |\n", "| time_elapsed | 2214 |\n", "| total_timesteps | 336347 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.37 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 546 |\n", "| ep_rew_mean | 28.9 |\n", "| exploration_rate | 0.323 |\n", "| time/ | |\n", "| episodes | 704 |\n", "| fps | 151 |\n", "| time_elapsed | 2233 |\n", "| total_timesteps | 338649 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.516 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 551 |\n", "| ep_rew_mean | 28.7 |\n", "| exploration_rate | 0.318 |\n", "| time/ | |\n", "| episodes | 708 |\n", "| fps | 151 |\n", "| time_elapsed | 2253 |\n", "| total_timesteps | 341099 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.612 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 553 |\n", "| ep_rew_mean | 27.3 |\n", "| exploration_rate | 0.313 |\n", "| time/ | |\n", "| episodes | 712 |\n", "| fps | 151 |\n", "| time_elapsed | 2273 |\n", "| total_timesteps | 343501 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.435 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 553 |\n", "| ep_rew_mean | 27.4 |\n", "| exploration_rate | 0.308 |\n", "| time/ | |\n", "| episodes | 716 |\n", "| fps | 150 |\n", "| time_elapsed | 2292 |\n", "| total_timesteps | 345853 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.815 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 555 |\n", "| ep_rew_mean | 27.5 |\n", "| exploration_rate | 0.304 |\n", "| time/ | |\n", "| episodes | 720 |\n", "| fps | 150 |\n", "| time_elapsed | 2309 |\n", "| total_timesteps | 348008 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.592 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 554 |\n", "| ep_rew_mean | 27.1 |\n", "| exploration_rate | 0.299 |\n", "| time/ | |\n", "| episodes | 724 |\n", "| fps | 150 |\n", "| time_elapsed | 2328 |\n", "| total_timesteps | 350361 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 557 |\n", "| ep_rew_mean | 27.1 |\n", "| exploration_rate | 0.294 |\n", "| time/ | |\n", "| episodes | 728 |\n", "| fps | 150 |\n", "| time_elapsed | 2349 |\n", "| total_timesteps | 352953 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.34 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 559 |\n", "| ep_rew_mean | 27 |\n", "| exploration_rate | 0.29 |\n", "| time/ | |\n", "| episodes | 732 |\n", "| fps | 149 |\n", "| time_elapsed | 2367 |\n", "| total_timesteps | 355173 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.734 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 563 |\n", "| ep_rew_mean | 26.9 |\n", "| exploration_rate | 0.285 |\n", "| time/ | |\n", "| episodes | 736 |\n", "| fps | 149 |\n", "| time_elapsed | 2387 |\n", "| total_timesteps | 357609 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.68 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 565 |\n", "| ep_rew_mean | 26.9 |\n", "| exploration_rate | 0.28 |\n", "| time/ | |\n", "| episodes | 740 |\n", "| fps | 149 |\n", "| time_elapsed | 2406 |\n", "| total_timesteps | 359935 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.446 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 566 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.276 |\n", "| time/ | |\n", "| episodes | 744 |\n", "| fps | 149 |\n", "| time_elapsed | 2423 |\n", "| total_timesteps | 362009 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.324 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 564 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.272 |\n", "| time/ | |\n", "| episodes | 748 |\n", "| fps | 149 |\n", "| time_elapsed | 2442 |\n", "| total_timesteps | 364225 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.661 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 562 |\n", "| ep_rew_mean | 26.8 |\n", "| exploration_rate | 0.267 |\n", "| time/ | |\n", "| episodes | 752 |\n", "| fps | 148 |\n", "| time_elapsed | 2459 |\n", "| total_timesteps | 366343 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.496 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 566 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.262 |\n", "| time/ | |\n", "| episodes | 756 |\n", "| fps | 148 |\n", "| time_elapsed | 2480 |\n", "| total_timesteps | 368885 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 569 |\n", "| ep_rew_mean | 28.6 |\n", "| exploration_rate | 0.257 |\n", "| time/ | |\n", "| episodes | 760 |\n", "| fps | 148 |\n", "| time_elapsed | 2500 |\n", "| total_timesteps | 371307 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.56 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 566 |\n", "| ep_rew_mean | 28.5 |\n", "| exploration_rate | 0.253 |\n", "| time/ | |\n", "| episodes | 764 |\n", "| fps | 148 |\n", "| time_elapsed | 2518 |\n", "| total_timesteps | 373413 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.55 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 567 |\n", "| ep_rew_mean | 28.4 |\n", "| exploration_rate | 0.249 |\n", "| time/ | |\n", "| episodes | 768 |\n", "| fps | 148 |\n", "| time_elapsed | 2537 |\n", "| total_timesteps | 375571 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.776 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 572 |\n", "| ep_rew_mean | 28.8 |\n", "| exploration_rate | 0.244 |\n", "| time/ | |\n", "| episodes | 772 |\n", "| fps | 147 |\n", "| time_elapsed | 2560 |\n", "| total_timesteps | 378215 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.848 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 575 |\n", "| ep_rew_mean | 29.2 |\n", "| exploration_rate | 0.239 |\n", "| time/ | |\n", "| episodes | 776 |\n", "| fps | 147 |\n", "| time_elapsed | 2580 |\n", "| total_timesteps | 380709 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.755 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 577 |\n", "| ep_rew_mean | 29.4 |\n", "| exploration_rate | 0.234 |\n", "| time/ | |\n", "| episodes | 780 |\n", "| fps | 147 |\n", "| time_elapsed | 2598 |\n", "| total_timesteps | 383115 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.625 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 581 |\n", "| ep_rew_mean | 29.6 |\n", "| exploration_rate | 0.229 |\n", "| time/ | |\n", "| episodes | 784 |\n", "| fps | 147 |\n", "| time_elapsed | 2617 |\n", "| total_timesteps | 385569 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.32 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 584 |\n", "| ep_rew_mean | 30 |\n", "| exploration_rate | 0.224 |\n", "| time/ | |\n", "| episodes | 788 |\n", "| fps | 147 |\n", "| time_elapsed | 2636 |\n", "| total_timesteps | 388055 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.609 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 587 |\n", "| ep_rew_mean | 30.3 |\n", "| exploration_rate | 0.219 |\n", "| time/ | |\n", "| episodes | 792 |\n", "| fps | 147 |\n", "| time_elapsed | 2654 |\n", "| total_timesteps | 390325 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.651 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 583 |\n", "| ep_rew_mean | 30.1 |\n", "| exploration_rate | 0.215 |\n", "| time/ | |\n", "| episodes | 796 |\n", "| fps | 146 |\n", "| time_elapsed | 2669 |\n", "| total_timesteps | 392341 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.05 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 581 |\n", "| ep_rew_mean | 29.9 |\n", "| exploration_rate | 0.211 |\n", "| time/ | |\n", "| episodes | 800 |\n", "| fps | 146 |\n", "| time_elapsed | 2685 |\n", "| total_timesteps | 394425 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 582 |\n", "| ep_rew_mean | 30.5 |\n", "| exploration_rate | 0.206 |\n", "| time/ | |\n", "| episodes | 804 |\n", "| fps | 146 |\n", "| time_elapsed | 2704 |\n", "| total_timesteps | 396893 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.47 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 580 |\n", "| ep_rew_mean | 31.1 |\n", "| exploration_rate | 0.202 |\n", "| time/ | |\n", "| episodes | 808 |\n", "| fps | 146 |\n", "| time_elapsed | 2721 |\n", "| total_timesteps | 399083 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.608 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 582 |\n", "| ep_rew_mean | 31.9 |\n", "| exploration_rate | 0.197 |\n", "| time/ | |\n", "| episodes | 812 |\n", "| fps | 146 |\n", "| time_elapsed | 2742 |\n", "| total_timesteps | 401717 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.64 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 580 |\n", "| ep_rew_mean | 31.8 |\n", "| exploration_rate | 0.192 |\n", "| time/ | |\n", "| episodes | 816 |\n", "| fps | 146 |\n", "| time_elapsed | 2758 |\n", "| total_timesteps | 403809 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.659 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 584 |\n", "| ep_rew_mean | 32.5 |\n", "| exploration_rate | 0.187 |\n", "| time/ | |\n", "| episodes | 820 |\n", "| fps | 146 |\n", "| time_elapsed | 2778 |\n", "| total_timesteps | 406397 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.67 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 582 |\n", "| ep_rew_mean | 32.6 |\n", "| exploration_rate | 0.183 |\n", "| time/ | |\n", "| episodes | 824 |\n", "| fps | 146 |\n", "| time_elapsed | 2795 |\n", "| total_timesteps | 408589 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.693 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 582 |\n", "| ep_rew_mean | 32.7 |\n", "| exploration_rate | 0.178 |\n", "| time/ | |\n", "| episodes | 828 |\n", "| fps | 146 |\n", "| time_elapsed | 2815 |\n", "| total_timesteps | 411155 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.14 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 591 |\n", "| ep_rew_mean | 34 |\n", "| exploration_rate | 0.171 |\n", "| time/ | |\n", "| episodes | 832 |\n", "| fps | 145 |\n", "| time_elapsed | 2840 |\n", "| total_timesteps | 414299 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.888 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 590 |\n", "| ep_rew_mean | 34.2 |\n", "| exploration_rate | 0.167 |\n", "| time/ | |\n", "| episodes | 836 |\n", "| fps | 145 |\n", "| time_elapsed | 2857 |\n", "| total_timesteps | 416581 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.43 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 587 |\n", "| ep_rew_mean | 34.3 |\n", "| exploration_rate | 0.163 |\n", "| time/ | |\n", "| episodes | 840 |\n", "| fps | 145 |\n", "| time_elapsed | 2874 |\n", "| total_timesteps | 418665 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.5 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 586 |\n", "| ep_rew_mean | 34.2 |\n", "| exploration_rate | 0.159 |\n", "| time/ | |\n", "| episodes | 844 |\n", "| fps | 145 |\n", "| time_elapsed | 2889 |\n", "| total_timesteps | 420655 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.78 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 588 |\n", "| ep_rew_mean | 34.4 |\n", "| exploration_rate | 0.154 |\n", "| time/ | |\n", "| episodes | 848 |\n", "| fps | 145 |\n", "| time_elapsed | 2907 |\n", "| total_timesteps | 422985 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.713 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 591 |\n", "| ep_rew_mean | 34.7 |\n", "| exploration_rate | 0.149 |\n", "| time/ | |\n", "| episodes | 852 |\n", "| fps | 145 |\n", "| time_elapsed | 2927 |\n", "| total_timesteps | 425461 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.815 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 591 |\n", "| ep_rew_mean | 33.6 |\n", "| exploration_rate | 0.144 |\n", "| time/ | |\n", "| episodes | 856 |\n", "| fps | 145 |\n", "| time_elapsed | 2946 |\n", "| total_timesteps | 427959 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.56 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 594 |\n", "| ep_rew_mean | 34.6 |\n", "| exploration_rate | 0.139 |\n", "| time/ | |\n", "| episodes | 860 |\n", "| fps | 145 |\n", "| time_elapsed | 2967 |\n", "| total_timesteps | 430671 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.469 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 595 |\n", "| ep_rew_mean | 34.5 |\n", "| exploration_rate | 0.134 |\n", "| time/ | |\n", "| episodes | 864 |\n", "| fps | 145 |\n", "| time_elapsed | 2985 |\n", "| total_timesteps | 432867 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.781 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 595 |\n", "| ep_rew_mean | 34.1 |\n", "| exploration_rate | 0.13 |\n", "| time/ | |\n", "| episodes | 868 |\n", "| fps | 144 |\n", "| time_elapsed | 3002 |\n", "| total_timesteps | 435075 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.72 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 589 |\n", "| ep_rew_mean | 33.7 |\n", "| exploration_rate | 0.126 |\n", "| time/ | |\n", "| episodes | 872 |\n", "| fps | 144 |\n", "| time_elapsed | 3018 |\n", "| total_timesteps | 437121 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.423 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 592 |\n", "| ep_rew_mean | 33.8 |\n", "| exploration_rate | 0.12 |\n", "| time/ | |\n", "| episodes | 876 |\n", "| fps | 144 |\n", "| time_elapsed | 3040 |\n", "| total_timesteps | 439865 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.63 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 588 |\n", "| ep_rew_mean | 33.7 |\n", "| exploration_rate | 0.116 |\n", "| time/ | |\n", "| episodes | 880 |\n", "| fps | 144 |\n", "| time_elapsed | 3056 |\n", "| total_timesteps | 441915 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 588 |\n", "| ep_rew_mean | 34 |\n", "| exploration_rate | 0.111 |\n", "| time/ | |\n", "| episodes | 884 |\n", "| fps | 144 |\n", "| time_elapsed | 3075 |\n", "| total_timesteps | 444357 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.93 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 586 |\n", "| ep_rew_mean | 34 |\n", "| exploration_rate | 0.107 |\n", "| time/ | |\n", "| episodes | 888 |\n", "| fps | 144 |\n", "| time_elapsed | 3093 |\n", "| total_timesteps | 446631 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.479 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 585 |\n", "| ep_rew_mean | 34 |\n", "| exploration_rate | 0.102 |\n", "| time/ | |\n", "| episodes | 892 |\n", "| fps | 144 |\n", "| time_elapsed | 3111 |\n", "| total_timesteps | 448837 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.487 |\n", "----------------------------------\n", "Eval num_timesteps=450000, episode_reward=33.60 +/- 8.44\n", "Episode length: 656.80 +/- 128.17\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 657 |\n", "| mean_reward | 33.6 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 450000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.773 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 589 |\n", "| ep_rew_mean | 34.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 896 |\n", "| fps | 143 |\n", "| time_elapsed | 3146 |\n", "| total_timesteps | 451221 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.512 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 593 |\n", "| ep_rew_mean | 35 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 900 |\n", "| fps | 143 |\n", "| time_elapsed | 3166 |\n", "| total_timesteps | 453737 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.392 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 591 |\n", "| ep_rew_mean | 34.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 904 |\n", "| fps | 143 |\n", "| time_elapsed | 3184 |\n", "| total_timesteps | 456037 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.71 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 594 |\n", "| ep_rew_mean | 34.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 908 |\n", "| fps | 143 |\n", "| time_elapsed | 3203 |\n", "| total_timesteps | 458459 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.469 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 591 |\n", "| ep_rew_mean | 34.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 912 |\n", "| fps | 143 |\n", "| time_elapsed | 3222 |\n", "| total_timesteps | 460811 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.774 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 596 |\n", "| ep_rew_mean | 34.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 916 |\n", "| fps | 142 |\n", "| time_elapsed | 3242 |\n", "| total_timesteps | 463381 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.59 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 592 |\n", "| ep_rew_mean | 35.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 920 |\n", "| fps | 142 |\n", "| time_elapsed | 3260 |\n", "| total_timesteps | 465598 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.622 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 593 |\n", "| ep_rew_mean | 35.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 924 |\n", "| fps | 142 |\n", "| time_elapsed | 3278 |\n", "| total_timesteps | 467924 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.537 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 593 |\n", "| ep_rew_mean | 36.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 928 |\n", "| fps | 142 |\n", "| time_elapsed | 3299 |\n", "| total_timesteps | 470481 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.849 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 592 |\n", "| ep_rew_mean | 36.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 932 |\n", "| fps | 142 |\n", "| time_elapsed | 3322 |\n", "| total_timesteps | 473454 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.446 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 599 |\n", "| ep_rew_mean | 36.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 936 |\n", "| fps | 142 |\n", "| time_elapsed | 3346 |\n", "| total_timesteps | 476494 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.78 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 604 |\n", "| ep_rew_mean | 37.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 940 |\n", "| fps | 142 |\n", "| time_elapsed | 3367 |\n", "| total_timesteps | 479096 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.461 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 610 |\n", "| ep_rew_mean | 38 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 944 |\n", "| fps | 142 |\n", "| time_elapsed | 3387 |\n", "| total_timesteps | 481630 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.844 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 609 |\n", "| ep_rew_mean | 38 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 948 |\n", "| fps | 142 |\n", "| time_elapsed | 3405 |\n", "| total_timesteps | 483926 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.561 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 38.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 952 |\n", "| fps | 141 |\n", "| time_elapsed | 3429 |\n", "| total_timesteps | 486944 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.545 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 617 |\n", "| ep_rew_mean | 38.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 956 |\n", "| fps | 141 |\n", "| time_elapsed | 3450 |\n", "| total_timesteps | 489648 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.379 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 614 |\n", "| ep_rew_mean | 38.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 960 |\n", "| fps | 141 |\n", "| time_elapsed | 3470 |\n", "| total_timesteps | 492073 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.664 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 38.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 964 |\n", "| fps | 141 |\n", "| time_elapsed | 3488 |\n", "| total_timesteps | 494391 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 616 |\n", "| ep_rew_mean | 39.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 968 |\n", "| fps | 141 |\n", "| time_elapsed | 3506 |\n", "| total_timesteps | 496675 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.08 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 621 |\n", "| ep_rew_mean | 40.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 972 |\n", "| fps | 141 |\n", "| time_elapsed | 3527 |\n", "| total_timesteps | 499255 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.604 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 623 |\n", "| ep_rew_mean | 40.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 976 |\n", "| fps | 141 |\n", "| time_elapsed | 3550 |\n", "| total_timesteps | 502141 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.41 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 624 |\n", "| ep_rew_mean | 40.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 980 |\n", "| fps | 141 |\n", "| time_elapsed | 3567 |\n", "| total_timesteps | 504349 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.42 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 624 |\n", "| ep_rew_mean | 40.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 984 |\n", "| fps | 141 |\n", "| time_elapsed | 3586 |\n", "| total_timesteps | 506790 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.365 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 623 |\n", "| ep_rew_mean | 40.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 988 |\n", "| fps | 141 |\n", "| time_elapsed | 3604 |\n", "| total_timesteps | 508974 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 619 |\n", "| ep_rew_mean | 41.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 992 |\n", "| fps | 141 |\n", "| time_elapsed | 3618 |\n", "| total_timesteps | 510766 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 621 |\n", "| ep_rew_mean | 41.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 996 |\n", "| fps | 141 |\n", "| time_elapsed | 3639 |\n", "| total_timesteps | 513370 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.521 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 620 |\n", "| ep_rew_mean | 41.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1000 |\n", "| fps | 140 |\n", "| time_elapsed | 3657 |\n", "| total_timesteps | 515708 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.353 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 619 |\n", "| ep_rew_mean | 41.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1004 |\n", "| fps | 140 |\n", "| time_elapsed | 3674 |\n", "| total_timesteps | 517890 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.976 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 40.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1008 |\n", "| fps | 140 |\n", "| time_elapsed | 3691 |\n", "| total_timesteps | 519974 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.32 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 618 |\n", "| ep_rew_mean | 40.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1012 |\n", "| fps | 140 |\n", "| time_elapsed | 3711 |\n", "| total_timesteps | 522578 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.505 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 618 |\n", "| ep_rew_mean | 40.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1016 |\n", "| fps | 140 |\n", "| time_elapsed | 3732 |\n", "| total_timesteps | 525170 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.07 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 618 |\n", "| ep_rew_mean | 39.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1020 |\n", "| fps | 140 |\n", "| time_elapsed | 3750 |\n", "| total_timesteps | 527404 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.403 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 617 |\n", "| ep_rew_mean | 39.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1024 |\n", "| fps | 140 |\n", "| time_elapsed | 3767 |\n", "| total_timesteps | 529618 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 3.43 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 619 |\n", "| ep_rew_mean | 39.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1028 |\n", "| fps | 140 |\n", "| time_elapsed | 3789 |\n", "| total_timesteps | 532366 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.442 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 617 |\n", "| ep_rew_mean | 38.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1032 |\n", "| fps | 140 |\n", "| time_elapsed | 3811 |\n", "| total_timesteps | 535148 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 39 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1036 |\n", "| fps | 140 |\n", "| time_elapsed | 3834 |\n", "| total_timesteps | 538030 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.375 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 611 |\n", "| ep_rew_mean | 38.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1040 |\n", "| fps | 140 |\n", "| time_elapsed | 3851 |\n", "| total_timesteps | 540160 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 614 |\n", "| ep_rew_mean | 39.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1044 |\n", "| fps | 140 |\n", "| time_elapsed | 3874 |\n", "| total_timesteps | 543072 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.437 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 612 |\n", "| ep_rew_mean | 39.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1048 |\n", "| fps | 140 |\n", "| time_elapsed | 3890 |\n", "| total_timesteps | 545150 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 606 |\n", "| ep_rew_mean | 39.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1052 |\n", "| fps | 140 |\n", "| time_elapsed | 3910 |\n", "| total_timesteps | 547584 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.956 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 598 |\n", "| ep_rew_mean | 38.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1056 |\n", "| fps | 139 |\n", "| time_elapsed | 3925 |\n", "| total_timesteps | 549478 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.82 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 600 |\n", "| ep_rew_mean | 38.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1060 |\n", "| fps | 139 |\n", "| time_elapsed | 3945 |\n", "| total_timesteps | 552066 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.78 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 602 |\n", "| ep_rew_mean | 38.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1064 |\n", "| fps | 139 |\n", "| time_elapsed | 3965 |\n", "| total_timesteps | 554594 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 607 |\n", "| ep_rew_mean | 39.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1068 |\n", "| fps | 139 |\n", "| time_elapsed | 3987 |\n", "| total_timesteps | 557382 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.92 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 610 |\n", "| ep_rew_mean | 39.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1072 |\n", "| fps | 139 |\n", "| time_elapsed | 4010 |\n", "| total_timesteps | 560252 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 609 |\n", "| ep_rew_mean | 39.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1076 |\n", "| fps | 139 |\n", "| time_elapsed | 4032 |\n", "| total_timesteps | 563032 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 609 |\n", "| ep_rew_mean | 40.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1080 |\n", "| fps | 139 |\n", "| time_elapsed | 4050 |\n", "| total_timesteps | 565289 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.637 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 609 |\n", "| ep_rew_mean | 40.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1084 |\n", "| fps | 139 |\n", "| time_elapsed | 4069 |\n", "| total_timesteps | 567720 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.472 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 616 |\n", "| ep_rew_mean | 41.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1088 |\n", "| fps | 139 |\n", "| time_elapsed | 4092 |\n", "| total_timesteps | 570602 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.992 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 628 |\n", "| ep_rew_mean | 42 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1092 |\n", "| fps | 139 |\n", "| time_elapsed | 4115 |\n", "| total_timesteps | 573561 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.331 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 627 |\n", "| ep_rew_mean | 42 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1096 |\n", "| fps | 139 |\n", "| time_elapsed | 4135 |\n", "| total_timesteps | 576037 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 638 |\n", "| ep_rew_mean | 42.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1100 |\n", "| fps | 139 |\n", "| time_elapsed | 4163 |\n", "| total_timesteps | 579504 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.635 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 646 |\n", "| ep_rew_mean | 43.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1104 |\n", "| fps | 139 |\n", "| time_elapsed | 4186 |\n", "| total_timesteps | 582488 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.948 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 656 |\n", "| ep_rew_mean | 44.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1108 |\n", "| fps | 139 |\n", "| time_elapsed | 4211 |\n", "| total_timesteps | 585618 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.34 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 655 |\n", "| ep_rew_mean | 44.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1112 |\n", "| fps | 138 |\n", "| time_elapsed | 4231 |\n", "| total_timesteps | 588108 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.986 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 658 |\n", "| ep_rew_mean | 45.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1116 |\n", "| fps | 138 |\n", "| time_elapsed | 4254 |\n", "| total_timesteps | 590996 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.296 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 661 |\n", "| ep_rew_mean | 45.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1120 |\n", "| fps | 138 |\n", "| time_elapsed | 4274 |\n", "| total_timesteps | 593518 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.361 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 665 |\n", "| ep_rew_mean | 45.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1124 |\n", "| fps | 138 |\n", "| time_elapsed | 4295 |\n", "| total_timesteps | 596142 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.512 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 661 |\n", "| ep_rew_mean | 44.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1128 |\n", "| fps | 138 |\n", "| time_elapsed | 4313 |\n", "| total_timesteps | 598440 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.16 |\n", "----------------------------------\n", "Eval num_timesteps=600000, episode_reward=54.70 +/- 4.58\n", "Episode length: 776.00 +/- 44.56\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 776 |\n", "| mean_reward | 54.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 600000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.69 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 665 |\n", "| ep_rew_mean | 44.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1132 |\n", "| fps | 138 |\n", "| time_elapsed | 4358 |\n", "| total_timesteps | 601653 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.552 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 667 |\n", "| ep_rew_mean | 44.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1136 |\n", "| fps | 137 |\n", "| time_elapsed | 4382 |\n", "| total_timesteps | 604709 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.462 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 676 |\n", "| ep_rew_mean | 45 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1140 |\n", "| fps | 137 |\n", "| time_elapsed | 4407 |\n", "| total_timesteps | 607781 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.377 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 678 |\n", "| ep_rew_mean | 44.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1144 |\n", "| fps | 137 |\n", "| time_elapsed | 4431 |\n", "| total_timesteps | 610878 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.36 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 692 |\n", "| ep_rew_mean | 46 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1148 |\n", "| fps | 137 |\n", "| time_elapsed | 4459 |\n", "| total_timesteps | 614330 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.27 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 695 |\n", "| ep_rew_mean | 45.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1152 |\n", "| fps | 137 |\n", "| time_elapsed | 4481 |\n", "| total_timesteps | 617102 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 706 |\n", "| ep_rew_mean | 47.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1156 |\n", "| fps | 137 |\n", "| time_elapsed | 4505 |\n", "| total_timesteps | 620120 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.57 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 709 |\n", "| ep_rew_mean | 46.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1160 |\n", "| fps | 137 |\n", "| time_elapsed | 4527 |\n", "| total_timesteps | 622956 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.485 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 46.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1164 |\n", "| fps | 137 |\n", "| time_elapsed | 4549 |\n", "| total_timesteps | 625752 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.357 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 711 |\n", "| ep_rew_mean | 45.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1168 |\n", "| fps | 137 |\n", "| time_elapsed | 4571 |\n", "| total_timesteps | 628515 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 713 |\n", "| ep_rew_mean | 46.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1172 |\n", "| fps | 137 |\n", "| time_elapsed | 4596 |\n", "| total_timesteps | 631601 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.877 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 46.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1176 |\n", "| fps | 137 |\n", "| time_elapsed | 4616 |\n", "| total_timesteps | 634191 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.21 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 44.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1180 |\n", "| fps | 137 |\n", "| time_elapsed | 4635 |\n", "| total_timesteps | 636501 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 716 |\n", "| ep_rew_mean | 44.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1184 |\n", "| fps | 137 |\n", "| time_elapsed | 4657 |\n", "| total_timesteps | 639368 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.964 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 718 |\n", "| ep_rew_mean | 44.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1188 |\n", "| fps | 137 |\n", "| time_elapsed | 4682 |\n", "| total_timesteps | 642442 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.951 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 721 |\n", "| ep_rew_mean | 45.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1192 |\n", "| fps | 137 |\n", "| time_elapsed | 4708 |\n", "| total_timesteps | 645697 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.505 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 737 |\n", "| ep_rew_mean | 46.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1196 |\n", "| fps | 137 |\n", "| time_elapsed | 4739 |\n", "| total_timesteps | 649703 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.275 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 729 |\n", "| ep_rew_mean | 45.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1200 |\n", "| fps | 137 |\n", "| time_elapsed | 4761 |\n", "| total_timesteps | 652441 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.367 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 736 |\n", "| ep_rew_mean | 46.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1204 |\n", "| fps | 136 |\n", "| time_elapsed | 4790 |\n", "| total_timesteps | 656045 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.353 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 45.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1208 |\n", "| fps | 136 |\n", "| time_elapsed | 4814 |\n", "| total_timesteps | 659127 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.938 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 744 |\n", "| ep_rew_mean | 45.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1212 |\n", "| fps | 136 |\n", "| time_elapsed | 4841 |\n", "| total_timesteps | 662477 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.348 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 745 |\n", "| ep_rew_mean | 46.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1216 |\n", "| fps | 136 |\n", "| time_elapsed | 4865 |\n", "| total_timesteps | 665520 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.304 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 744 |\n", "| ep_rew_mean | 46.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1220 |\n", "| fps | 136 |\n", "| time_elapsed | 4884 |\n", "| total_timesteps | 667950 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.232 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 744 |\n", "| ep_rew_mean | 46.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1224 |\n", "| fps | 136 |\n", "| time_elapsed | 4904 |\n", "| total_timesteps | 670498 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.351 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 47.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1228 |\n", "| fps | 136 |\n", "| time_elapsed | 4930 |\n", "| total_timesteps | 673778 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.206 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 48.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1232 |\n", "| fps | 136 |\n", "| time_elapsed | 4954 |\n", "| total_timesteps | 676770 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.304 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 48.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1236 |\n", "| fps | 136 |\n", "| time_elapsed | 4979 |\n", "| total_timesteps | 679984 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.297 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 48.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1240 |\n", "| fps | 136 |\n", "| time_elapsed | 5004 |\n", "| total_timesteps | 683128 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.216 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 48.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1244 |\n", "| fps | 136 |\n", "| time_elapsed | 5027 |\n", "| total_timesteps | 685990 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.304 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 747 |\n", "| ep_rew_mean | 47.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1248 |\n", "| fps | 136 |\n", "| time_elapsed | 5051 |\n", "| total_timesteps | 689009 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.434 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 755 |\n", "| ep_rew_mean | 48 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1252 |\n", "| fps | 136 |\n", "| time_elapsed | 5079 |\n", "| total_timesteps | 692595 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.387 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 760 |\n", "| ep_rew_mean | 48.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1256 |\n", "| fps | 136 |\n", "| time_elapsed | 5107 |\n", "| total_timesteps | 696111 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.274 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 763 |\n", "| ep_rew_mean | 48.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1260 |\n", "| fps | 136 |\n", "| time_elapsed | 5132 |\n", "| total_timesteps | 699214 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.59 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 765 |\n", "| ep_rew_mean | 49.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1264 |\n", "| fps | 136 |\n", "| time_elapsed | 5156 |\n", "| total_timesteps | 702248 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.35 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 773 |\n", "| ep_rew_mean | 50.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1268 |\n", "| fps | 136 |\n", "| time_elapsed | 5184 |\n", "| total_timesteps | 705848 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.305 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 50.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1272 |\n", "| fps | 136 |\n", "| time_elapsed | 5211 |\n", "| total_timesteps | 709306 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 50.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1276 |\n", "| fps | 136 |\n", "| time_elapsed | 5238 |\n", "| total_timesteps | 712708 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 793 |\n", "| ep_rew_mean | 51.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1280 |\n", "| fps | 135 |\n", "| time_elapsed | 5263 |\n", "| total_timesteps | 715820 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 53.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1284 |\n", "| fps | 135 |\n", "| time_elapsed | 5289 |\n", "| total_timesteps | 719093 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.387 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 53.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1288 |\n", "| fps | 135 |\n", "| time_elapsed | 5312 |\n", "| total_timesteps | 722003 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 53.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1292 |\n", "| fps | 135 |\n", "| time_elapsed | 5338 |\n", "| total_timesteps | 725281 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.329 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 52.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1296 |\n", "| fps | 135 |\n", "| time_elapsed | 5360 |\n", "| total_timesteps | 728081 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.303 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 787 |\n", "| ep_rew_mean | 53 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1300 |\n", "| fps | 135 |\n", "| time_elapsed | 5384 |\n", "| total_timesteps | 731179 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.271 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 53.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1304 |\n", "| fps | 135 |\n", "| time_elapsed | 5409 |\n", "| total_timesteps | 734353 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.603 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 53.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1308 |\n", "| fps | 135 |\n", "| time_elapsed | 5434 |\n", "| total_timesteps | 737444 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.421 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 53.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1312 |\n", "| fps | 135 |\n", "| time_elapsed | 5455 |\n", "| total_timesteps | 740136 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.79 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 53.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1316 |\n", "| fps | 135 |\n", "| time_elapsed | 5479 |\n", "| total_timesteps | 743180 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.446 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 788 |\n", "| ep_rew_mean | 54.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1320 |\n", "| fps | 135 |\n", "| time_elapsed | 5507 |\n", "| total_timesteps | 746700 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.766 |\n", "----------------------------------\n", "Eval num_timesteps=750000, episode_reward=47.40 +/- 3.29\n", "Episode length: 851.40 +/- 138.79\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 851 |\n", "| mean_reward | 47.4 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 750000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.203 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 54.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1324 |\n", "| fps | 135 |\n", "| time_elapsed | 5554 |\n", "| total_timesteps | 750000 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 54.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1328 |\n", "| fps | 134 |\n", "| time_elapsed | 5583 |\n", "| total_timesteps | 753259 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 54.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1332 |\n", "| fps | 134 |\n", "| time_elapsed | 5609 |\n", "| total_timesteps | 756603 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.237 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 799 |\n", "| ep_rew_mean | 55.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1336 |\n", "| fps | 134 |\n", "| time_elapsed | 5635 |\n", "| total_timesteps | 759930 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.299 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 802 |\n", "| ep_rew_mean | 56 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1340 |\n", "| fps | 134 |\n", "| time_elapsed | 5662 |\n", "| total_timesteps | 763286 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.289 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 809 |\n", "| ep_rew_mean | 56.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1344 |\n", "| fps | 134 |\n", "| time_elapsed | 5691 |\n", "| total_timesteps | 766922 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.195 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 820 |\n", "| ep_rew_mean | 57.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1348 |\n", "| fps | 134 |\n", "| time_elapsed | 5723 |\n", "| total_timesteps | 771048 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.257 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 58.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1352 |\n", "| fps | 134 |\n", "| time_elapsed | 5746 |\n", "| total_timesteps | 773916 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.354 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 816 |\n", "| ep_rew_mean | 59.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1356 |\n", "| fps | 134 |\n", "| time_elapsed | 5776 |\n", "| total_timesteps | 777671 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.206 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 820 |\n", "| ep_rew_mean | 59.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1360 |\n", "| fps | 134 |\n", "| time_elapsed | 5804 |\n", "| total_timesteps | 781203 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.19 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 832 |\n", "| ep_rew_mean | 59.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1364 |\n", "| fps | 134 |\n", "| time_elapsed | 5837 |\n", "| total_timesteps | 785437 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.191 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 831 |\n", "| ep_rew_mean | 60.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1368 |\n", "| fps | 134 |\n", "| time_elapsed | 5865 |\n", "| total_timesteps | 788931 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.295 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 830 |\n", "| ep_rew_mean | 60.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1372 |\n", "| fps | 134 |\n", "| time_elapsed | 5891 |\n", "| total_timesteps | 792313 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.882 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 829 |\n", "| ep_rew_mean | 60.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1376 |\n", "| fps | 134 |\n", "| time_elapsed | 5918 |\n", "| total_timesteps | 795657 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.61 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 836 |\n", "| ep_rew_mean | 61.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1380 |\n", "| fps | 134 |\n", "| time_elapsed | 5948 |\n", "| total_timesteps | 799463 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.272 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 836 |\n", "| ep_rew_mean | 59.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1384 |\n", "| fps | 134 |\n", "| time_elapsed | 5973 |\n", "| total_timesteps | 802687 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.261 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 843 |\n", "| ep_rew_mean | 60.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1388 |\n", "| fps | 134 |\n", "| time_elapsed | 6002 |\n", "| total_timesteps | 806327 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.232 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 846 |\n", "| ep_rew_mean | 60.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1392 |\n", "| fps | 134 |\n", "| time_elapsed | 6030 |\n", "| total_timesteps | 809836 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.398 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 852 |\n", "| ep_rew_mean | 61 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1396 |\n", "| fps | 134 |\n", "| time_elapsed | 6057 |\n", "| total_timesteps | 813326 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.221 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 854 |\n", "| ep_rew_mean | 62 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1400 |\n", "| fps | 134 |\n", "| time_elapsed | 6083 |\n", "| total_timesteps | 816542 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.208 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 62.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1404 |\n", "| fps | 134 |\n", "| time_elapsed | 6114 |\n", "| total_timesteps | 820496 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 62.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1408 |\n", "| fps | 134 |\n", "| time_elapsed | 6135 |\n", "| total_timesteps | 823129 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.35 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 858 |\n", "| ep_rew_mean | 63.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1412 |\n", "| fps | 134 |\n", "| time_elapsed | 6157 |\n", "| total_timesteps | 825965 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.316 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 852 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1416 |\n", "| fps | 134 |\n", "| time_elapsed | 6176 |\n", "| total_timesteps | 828385 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.278 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 850 |\n", "| ep_rew_mean | 64.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1420 |\n", "| fps | 134 |\n", "| time_elapsed | 6202 |\n", "| total_timesteps | 831683 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.257 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 849 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1424 |\n", "| fps | 134 |\n", "| time_elapsed | 6228 |\n", "| total_timesteps | 834876 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.83 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 64.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1428 |\n", "| fps | 134 |\n", "| time_elapsed | 6255 |\n", "| total_timesteps | 838350 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.69 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 846 |\n", "| ep_rew_mean | 64.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1432 |\n", "| fps | 133 |\n", "| time_elapsed | 6278 |\n", "| total_timesteps | 841188 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 946 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1436 |\n", "| fps | 133 |\n", "| time_elapsed | 6384 |\n", "| total_timesteps | 854564 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 945 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1440 |\n", "| fps | 133 |\n", "| time_elapsed | 6411 |\n", "| total_timesteps | 857779 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.01 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 944 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1444 |\n", "| fps | 133 |\n", "| time_elapsed | 6440 |\n", "| total_timesteps | 861276 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.278 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 936 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1448 |\n", "| fps | 133 |\n", "| time_elapsed | 6467 |\n", "| total_timesteps | 864600 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.262 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 943 |\n", "| ep_rew_mean | 63.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1452 |\n", "| fps | 133 |\n", "| time_elapsed | 6497 |\n", "| total_timesteps | 868204 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.226 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 945 |\n", "| ep_rew_mean | 62.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1456 |\n", "| fps | 133 |\n", "| time_elapsed | 6530 |\n", "| total_timesteps | 872186 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.294 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 944 |\n", "| ep_rew_mean | 63.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1460 |\n", "| fps | 133 |\n", "| time_elapsed | 6558 |\n", "| total_timesteps | 875574 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.254 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 933 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1464 |\n", "| fps | 133 |\n", "| time_elapsed | 6585 |\n", "| total_timesteps | 878769 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 923 |\n", "| ep_rew_mean | 62.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1468 |\n", "| fps | 133 |\n", "| time_elapsed | 6605 |\n", "| total_timesteps | 881267 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.186 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 927 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1472 |\n", "| fps | 133 |\n", "| time_elapsed | 6636 |\n", "| total_timesteps | 884981 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.405 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 925 |\n", "| ep_rew_mean | 62.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1476 |\n", "| fps | 133 |\n", "| time_elapsed | 6662 |\n", "| total_timesteps | 888156 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.19 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 917 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1480 |\n", "| fps | 133 |\n", "| time_elapsed | 6687 |\n", "| total_timesteps | 891191 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.451 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 913 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1484 |\n", "| fps | 133 |\n", "| time_elapsed | 6710 |\n", "| total_timesteps | 893942 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.589 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 909 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1488 |\n", "| fps | 133 |\n", "| time_elapsed | 6737 |\n", "| total_timesteps | 897189 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.06 |\n", "----------------------------------\n", "Eval num_timesteps=900000, episode_reward=51.80 +/- 43.53\n", "Episode length: 3280.00 +/- 7906.82\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.28e+03 |\n", "| mean_reward | 51.8 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 900000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.293 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 904 |\n", "| ep_rew_mean | 62.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1492 |\n", "| fps | 131 |\n", "| time_elapsed | 6849 |\n", "| total_timesteps | 900265 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 903 |\n", "| ep_rew_mean | 62.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1496 |\n", "| fps | 131 |\n", "| time_elapsed | 6877 |\n", "| total_timesteps | 903582 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.394 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 896 |\n", "| ep_rew_mean | 61.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1500 |\n", "| fps | 131 |\n", "| time_elapsed | 6899 |\n", "| total_timesteps | 906141 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.232 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 886 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1504 |\n", "| fps | 131 |\n", "| time_elapsed | 6923 |\n", "| total_timesteps | 909078 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 892 |\n", "| ep_rew_mean | 64.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1508 |\n", "| fps | 131 |\n", "| time_elapsed | 6951 |\n", "| total_timesteps | 912300 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.598 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 900 |\n", "| ep_rew_mean | 65 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1512 |\n", "| fps | 131 |\n", "| time_elapsed | 6982 |\n", "| total_timesteps | 915962 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.189 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 909 |\n", "| ep_rew_mean | 65 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1516 |\n", "| fps | 131 |\n", "| time_elapsed | 7010 |\n", "| total_timesteps | 919322 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.334 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 905 |\n", "| ep_rew_mean | 63.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1520 |\n", "| fps | 131 |\n", "| time_elapsed | 7035 |\n", "| total_timesteps | 922168 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.435 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 907 |\n", "| ep_rew_mean | 64.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1524 |\n", "| fps | 131 |\n", "| time_elapsed | 7063 |\n", "| total_timesteps | 925560 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.245 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 900 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1528 |\n", "| fps | 130 |\n", "| time_elapsed | 7087 |\n", "| total_timesteps | 928300 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.277 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 905 |\n", "| ep_rew_mean | 64.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1532 |\n", "| fps | 130 |\n", "| time_elapsed | 7115 |\n", "| total_timesteps | 931706 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 805 |\n", "| ep_rew_mean | 63.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1536 |\n", "| fps | 130 |\n", "| time_elapsed | 7143 |\n", "| total_timesteps | 935024 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.346 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 810 |\n", "| ep_rew_mean | 64.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1540 |\n", "| fps | 130 |\n", "| time_elapsed | 7175 |\n", "| total_timesteps | 938738 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 809 |\n", "| ep_rew_mean | 64.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1544 |\n", "| fps | 130 |\n", "| time_elapsed | 7204 |\n", "| total_timesteps | 942188 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 806 |\n", "| ep_rew_mean | 63.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1548 |\n", "| fps | 130 |\n", "| time_elapsed | 7230 |\n", "| total_timesteps | 945207 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 2.75 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 808 |\n", "| ep_rew_mean | 63.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1552 |\n", "| fps | 130 |\n", "| time_elapsed | 7261 |\n", "| total_timesteps | 948991 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.289 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1556 |\n", "| fps | 130 |\n", "| time_elapsed | 7287 |\n", "| total_timesteps | 951999 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.304 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 62.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1560 |\n", "| fps | 130 |\n", "| time_elapsed | 7313 |\n", "| total_timesteps | 955070 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.692 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1564 |\n", "| fps | 130 |\n", "| time_elapsed | 7340 |\n", "| total_timesteps | 958310 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.267 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 806 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1568 |\n", "| fps | 130 |\n", "| time_elapsed | 7370 |\n", "| total_timesteps | 961906 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.01 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 802 |\n", "| ep_rew_mean | 61.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1572 |\n", "| fps | 130 |\n", "| time_elapsed | 7399 |\n", "| total_timesteps | 965174 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.161 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 808 |\n", "| ep_rew_mean | 62.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1576 |\n", "| fps | 130 |\n", "| time_elapsed | 7430 |\n", "| total_timesteps | 968926 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.303 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 810 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1580 |\n", "| fps | 130 |\n", "| time_elapsed | 7458 |\n", "| total_timesteps | 972187 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 818 |\n", "| ep_rew_mean | 62.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1584 |\n", "| fps | 130 |\n", "| time_elapsed | 7488 |\n", "| total_timesteps | 975735 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.507 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 820 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1588 |\n", "| fps | 130 |\n", "| time_elapsed | 7517 |\n", "| total_timesteps | 979234 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.284 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 830 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1592 |\n", "| fps | 130 |\n", "| time_elapsed | 7551 |\n", "| total_timesteps | 983218 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 832 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1596 |\n", "| fps | 130 |\n", "| time_elapsed | 7582 |\n", "| total_timesteps | 986790 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.04 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 64.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1600 |\n", "| fps | 130 |\n", "| time_elapsed | 7612 |\n", "| total_timesteps | 990354 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.386 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 844 |\n", "| ep_rew_mean | 62.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1604 |\n", "| fps | 130 |\n", "| time_elapsed | 7638 |\n", "| total_timesteps | 993451 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 848 |\n", "| ep_rew_mean | 62.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1608 |\n", "| fps | 129 |\n", "| time_elapsed | 7670 |\n", "| total_timesteps | 997078 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.264 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 846 |\n", "| ep_rew_mean | 60.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1612 |\n", "| fps | 129 |\n", "| time_elapsed | 7699 |\n", "| total_timesteps | 1000587 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.942 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 850 |\n", "| ep_rew_mean | 61.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1616 |\n", "| fps | 129 |\n", "| time_elapsed | 7731 |\n", "| total_timesteps | 1004361 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.205 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 850 |\n", "| ep_rew_mean | 61.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1620 |\n", "| fps | 129 |\n", "| time_elapsed | 7755 |\n", "| total_timesteps | 1007125 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 61 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1624 |\n", "| fps | 129 |\n", "| time_elapsed | 7785 |\n", "| total_timesteps | 1010657 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 864 |\n", "| ep_rew_mean | 62 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1628 |\n", "| fps | 129 |\n", "| time_elapsed | 7820 |\n", "| total_timesteps | 1014673 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 866 |\n", "| ep_rew_mean | 62.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1632 |\n", "| fps | 129 |\n", "| time_elapsed | 7851 |\n", "| total_timesteps | 1018259 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.188 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 872 |\n", "| ep_rew_mean | 62.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1636 |\n", "| fps | 129 |\n", "| time_elapsed | 7886 |\n", "| total_timesteps | 1022247 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.924 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 868 |\n", "| ep_rew_mean | 62.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1640 |\n", "| fps | 129 |\n", "| time_elapsed | 7916 |\n", "| total_timesteps | 1025579 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.237 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 62.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1644 |\n", "| fps | 129 |\n", "| time_elapsed | 7948 |\n", "| total_timesteps | 1029171 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.219 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 879 |\n", "| ep_rew_mean | 62.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1648 |\n", "| fps | 129 |\n", "| time_elapsed | 7982 |\n", "| total_timesteps | 1033097 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.416 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 867 |\n", "| ep_rew_mean | 61.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1652 |\n", "| fps | 129 |\n", "| time_elapsed | 8005 |\n", "| total_timesteps | 1035691 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.287 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 865 |\n", "| ep_rew_mean | 61.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1656 |\n", "| fps | 129 |\n", "| time_elapsed | 8029 |\n", "| total_timesteps | 1038523 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 60.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1660 |\n", "| fps | 129 |\n", "| time_elapsed | 8052 |\n", "| total_timesteps | 1041151 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.231 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 868 |\n", "| ep_rew_mean | 60.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1664 |\n", "| fps | 129 |\n", "| time_elapsed | 8086 |\n", "| total_timesteps | 1045085 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.209 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 864 |\n", "| ep_rew_mean | 61.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1668 |\n", "| fps | 129 |\n", "| time_elapsed | 8114 |\n", "| total_timesteps | 1048343 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.241 |\n", "----------------------------------\n", "Eval num_timesteps=1050000, episode_reward=55.40 +/- 2.62\n", "Episode length: 905.60 +/- 124.99\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 906 |\n", "| mean_reward | 55.4 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1050000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.228 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 61.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1672 |\n", "| fps | 128 |\n", "| time_elapsed | 8173 |\n", "| total_timesteps | 1052133 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.277 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 61.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1676 |\n", "| fps | 128 |\n", "| time_elapsed | 8205 |\n", "| total_timesteps | 1055896 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.68 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 871 |\n", "| ep_rew_mean | 60.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1680 |\n", "| fps | 128 |\n", "| time_elapsed | 8234 |\n", "| total_timesteps | 1059269 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.291 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 873 |\n", "| ep_rew_mean | 60.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1684 |\n", "| fps | 128 |\n", "| time_elapsed | 8267 |\n", "| total_timesteps | 1063011 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0922 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 60.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1688 |\n", "| fps | 128 |\n", "| time_elapsed | 8295 |\n", "| total_timesteps | 1066217 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.288 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 874 |\n", "| ep_rew_mean | 60 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1692 |\n", "| fps | 128 |\n", "| time_elapsed | 8335 |\n", "| total_timesteps | 1070663 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.199 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 867 |\n", "| ep_rew_mean | 59.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1696 |\n", "| fps | 128 |\n", "| time_elapsed | 8360 |\n", "| total_timesteps | 1073472 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.355 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 865 |\n", "| ep_rew_mean | 58.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1700 |\n", "| fps | 128 |\n", "| time_elapsed | 8390 |\n", "| total_timesteps | 1076848 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.885 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 865 |\n", "| ep_rew_mean | 58.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1704 |\n", "| fps | 128 |\n", "| time_elapsed | 8417 |\n", "| total_timesteps | 1079980 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.223 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 855 |\n", "| ep_rew_mean | 57.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1708 |\n", "| fps | 128 |\n", "| time_elapsed | 8441 |\n", "| total_timesteps | 1082611 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.224 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 855 |\n", "| ep_rew_mean | 57.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1712 |\n", "| fps | 128 |\n", "| time_elapsed | 8471 |\n", "| total_timesteps | 1086075 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.704 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 58.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1716 |\n", "| fps | 128 |\n", "| time_elapsed | 8500 |\n", "| total_timesteps | 1089425 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.55 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 866 |\n", "| ep_rew_mean | 59 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1720 |\n", "| fps | 128 |\n", "| time_elapsed | 8538 |\n", "| total_timesteps | 1093759 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.179 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 860 |\n", "| ep_rew_mean | 58.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1724 |\n", "| fps | 128 |\n", "| time_elapsed | 8562 |\n", "| total_timesteps | 1096619 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.855 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 58.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1728 |\n", "| fps | 128 |\n", "| time_elapsed | 8594 |\n", "| total_timesteps | 1100254 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.504 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 58.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1732 |\n", "| fps | 127 |\n", "| time_elapsed | 8625 |\n", "| total_timesteps | 1103858 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.305 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 57.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1736 |\n", "| fps | 127 |\n", "| time_elapsed | 8655 |\n", "| total_timesteps | 1107320 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 849 |\n", "| ep_rew_mean | 57.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1740 |\n", "| fps | 127 |\n", "| time_elapsed | 8682 |\n", "| total_timesteps | 1110450 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.751 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 57.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1744 |\n", "| fps | 127 |\n", "| time_elapsed | 8716 |\n", "| total_timesteps | 1114318 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.13 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 57.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1748 |\n", "| fps | 127 |\n", "| time_elapsed | 8975 |\n", "| total_timesteps | 1144164 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 58.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1752 |\n", "| fps | 127 |\n", "| time_elapsed | 9012 |\n", "| total_timesteps | 1147790 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0974 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 59.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1756 |\n", "| fps | 127 |\n", "| time_elapsed | 9039 |\n", "| total_timesteps | 1150974 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 60.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1760 |\n", "| fps | 127 |\n", "| time_elapsed | 9073 |\n", "| total_timesteps | 1154894 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 59.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1764 |\n", "| fps | 127 |\n", "| time_elapsed | 9105 |\n", "| total_timesteps | 1158635 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 59.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1768 |\n", "| fps | 127 |\n", "| time_elapsed | 9135 |\n", "| total_timesteps | 1162185 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 59.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1772 |\n", "| fps | 127 |\n", "| time_elapsed | 9162 |\n", "| total_timesteps | 1165255 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.161 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 58.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1776 |\n", "| fps | 127 |\n", "| time_elapsed | 9192 |\n", "| total_timesteps | 1168823 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.216 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 58.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1780 |\n", "| fps | 127 |\n", "| time_elapsed | 9226 |\n", "| total_timesteps | 1172769 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 58.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1784 |\n", "| fps | 127 |\n", "| time_elapsed | 9254 |\n", "| total_timesteps | 1176087 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.186 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 58.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1788 |\n", "| fps | 127 |\n", "| time_elapsed | 9281 |\n", "| total_timesteps | 1179203 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.249 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 58.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1792 |\n", "| fps | 127 |\n", "| time_elapsed | 9306 |\n", "| total_timesteps | 1182188 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0821 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 59.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1796 |\n", "| fps | 126 |\n", "| time_elapsed | 9337 |\n", "| total_timesteps | 1185858 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 58.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1800 |\n", "| fps | 126 |\n", "| time_elapsed | 9360 |\n", "| total_timesteps | 1188540 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.231 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 59.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1804 |\n", "| fps | 126 |\n", "| time_elapsed | 9389 |\n", "| total_timesteps | 1192067 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.265 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 59.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1808 |\n", "| fps | 126 |\n", "| time_elapsed | 9417 |\n", "| total_timesteps | 1195269 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.207 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 59.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1812 |\n", "| fps | 126 |\n", "| time_elapsed | 9446 |\n", "| total_timesteps | 1198775 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "Eval num_timesteps=1200000, episode_reward=23.00 +/- 0.00\n", "Episode length: 500.60 +/- 4.20\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 501 |\n", "| mean_reward | 23 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1200000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.188 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 58.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1816 |\n", "| fps | 126 |\n", "| time_elapsed | 9495 |\n", "| total_timesteps | 1202886 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.41 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 60.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1820 |\n", "| fps | 126 |\n", "| time_elapsed | 9523 |\n", "| total_timesteps | 1206280 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 62.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1824 |\n", "| fps | 126 |\n", "| time_elapsed | 9551 |\n", "| total_timesteps | 1209546 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.02 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 62.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1828 |\n", "| fps | 126 |\n", "| time_elapsed | 9581 |\n", "| total_timesteps | 1213088 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 61.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1832 |\n", "| fps | 126 |\n", "| time_elapsed | 9615 |\n", "| total_timesteps | 1217068 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.173 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 63.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1836 |\n", "| fps | 126 |\n", "| time_elapsed | 9651 |\n", "| total_timesteps | 1221294 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.05 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 63.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1840 |\n", "| fps | 126 |\n", "| time_elapsed | 9682 |\n", "| total_timesteps | 1224871 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.147 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 64.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1844 |\n", "| fps | 126 |\n", "| time_elapsed | 9715 |\n", "| total_timesteps | 1228689 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.888 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 884 |\n", "| ep_rew_mean | 65 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1848 |\n", "| fps | 126 |\n", "| time_elapsed | 9747 |\n", "| total_timesteps | 1232559 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.492 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 879 |\n", "| ep_rew_mean | 64.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1852 |\n", "| fps | 126 |\n", "| time_elapsed | 9773 |\n", "| total_timesteps | 1235647 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.291 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 880 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1856 |\n", "| fps | 126 |\n", "| time_elapsed | 9801 |\n", "| total_timesteps | 1238986 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.252 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 871 |\n", "| ep_rew_mean | 63.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1860 |\n", "| fps | 126 |\n", "| time_elapsed | 9827 |\n", "| total_timesteps | 1242001 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 967 |\n", "| ep_rew_mean | 65.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1864 |\n", "| fps | 126 |\n", "| time_elapsed | 9939 |\n", "| total_timesteps | 1255351 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.807 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 967 |\n", "| ep_rew_mean | 65.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1868 |\n", "| fps | 126 |\n", "| time_elapsed | 9968 |\n", "| total_timesteps | 1258875 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 65.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1872 |\n", "| fps | 126 |\n", "| time_elapsed | 9997 |\n", "| total_timesteps | 1262291 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 971 |\n", "| ep_rew_mean | 65.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1876 |\n", "| fps | 126 |\n", "| time_elapsed | 10028 |\n", "| total_timesteps | 1265935 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0933 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 965 |\n", "| ep_rew_mean | 65.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1880 |\n", "| fps | 126 |\n", "| time_elapsed | 10056 |\n", "| total_timesteps | 1269268 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.325 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 973 |\n", "| ep_rew_mean | 65.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1884 |\n", "| fps | 126 |\n", "| time_elapsed | 10091 |\n", "| total_timesteps | 1273419 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0623 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 64.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1888 |\n", "| fps | 126 |\n", "| time_elapsed | 10120 |\n", "| total_timesteps | 1276886 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.245 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 65.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1892 |\n", "| fps | 126 |\n", "| time_elapsed | 10145 |\n", "| total_timesteps | 1279842 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.46 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 964 |\n", "| ep_rew_mean | 65.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1896 |\n", "| fps | 126 |\n", "| time_elapsed | 10165 |\n", "| total_timesteps | 1282288 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.208 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 972 |\n", "| ep_rew_mean | 65.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1900 |\n", "| fps | 126 |\n", "| time_elapsed | 10195 |\n", "| total_timesteps | 1285788 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.063 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 971 |\n", "| ep_rew_mean | 65.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1904 |\n", "| fps | 126 |\n", "| time_elapsed | 10223 |\n", "| total_timesteps | 1289144 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.138 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 968 |\n", "| ep_rew_mean | 66 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1908 |\n", "| fps | 126 |\n", "| time_elapsed | 10247 |\n", "| total_timesteps | 1292061 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.319 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 975 |\n", "| ep_rew_mean | 66.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1912 |\n", "| fps | 126 |\n", "| time_elapsed | 10282 |\n", "| total_timesteps | 1296257 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.42 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 959 |\n", "| ep_rew_mean | 66 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1916 |\n", "| fps | 126 |\n", "| time_elapsed | 10304 |\n", "| total_timesteps | 1298787 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0998 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 64.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1920 |\n", "| fps | 126 |\n", "| time_elapsed | 10334 |\n", "| total_timesteps | 1302418 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0956 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 64.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1924 |\n", "| fps | 125 |\n", "| time_elapsed | 10369 |\n", "| total_timesteps | 1306542 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 65.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1928 |\n", "| fps | 125 |\n", "| time_elapsed | 10399 |\n", "| total_timesteps | 1310102 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.162 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 66 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1932 |\n", "| fps | 125 |\n", "| time_elapsed | 10432 |\n", "| total_timesteps | 1313997 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.233 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 965 |\n", "| ep_rew_mean | 64.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1936 |\n", "| fps | 125 |\n", "| time_elapsed | 10464 |\n", "| total_timesteps | 1317773 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.257 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 965 |\n", "| ep_rew_mean | 64.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1940 |\n", "| fps | 125 |\n", "| time_elapsed | 10496 |\n", "| total_timesteps | 1321405 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 959 |\n", "| ep_rew_mean | 63.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1944 |\n", "| fps | 125 |\n", "| time_elapsed | 10523 |\n", "| total_timesteps | 1324616 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 948 |\n", "| ep_rew_mean | 61.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1948 |\n", "| fps | 125 |\n", "| time_elapsed | 10547 |\n", "| total_timesteps | 1327368 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.244 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 946 |\n", "| ep_rew_mean | 64.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1952 |\n", "| fps | 125 |\n", "| time_elapsed | 10572 |\n", "| total_timesteps | 1330294 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.316 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 946 |\n", "| ep_rew_mean | 64.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1956 |\n", "| fps | 125 |\n", "| time_elapsed | 10601 |\n", "| total_timesteps | 1333612 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.178 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 955 |\n", "| ep_rew_mean | 63.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1960 |\n", "| fps | 125 |\n", "| time_elapsed | 10634 |\n", "| total_timesteps | 1337548 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.193 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 862 |\n", "| ep_rew_mean | 63.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1964 |\n", "| fps | 125 |\n", "| time_elapsed | 10668 |\n", "| total_timesteps | 1341504 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.436 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 860 |\n", "| ep_rew_mean | 62.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1968 |\n", "| fps | 125 |\n", "| time_elapsed | 10697 |\n", "| total_timesteps | 1344896 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.286 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 62.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1972 |\n", "| fps | 125 |\n", "| time_elapsed | 10726 |\n", "| total_timesteps | 1348389 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0834 |\n", "----------------------------------\n", "Eval num_timesteps=1350000, episode_reward=21.20 +/- 2.40\n", "Episode length: 642.00 +/- 13.54\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 642 |\n", "| mean_reward | 21.2 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1350000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.248 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 858 |\n", "| ep_rew_mean | 63.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1976 |\n", "| fps | 125 |\n", "| time_elapsed | 10772 |\n", "| total_timesteps | 1351753 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.373 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 63.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1980 |\n", "| fps | 125 |\n", "| time_elapsed | 10801 |\n", "| total_timesteps | 1355205 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0778 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 63 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1984 |\n", "| fps | 125 |\n", "| time_elapsed | 10832 |\n", "| total_timesteps | 1358976 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.785 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 860 |\n", "| ep_rew_mean | 63.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1988 |\n", "| fps | 125 |\n", "| time_elapsed | 10864 |\n", "| total_timesteps | 1362840 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.301 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 866 |\n", "| ep_rew_mean | 62.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1992 |\n", "| fps | 125 |\n", "| time_elapsed | 10894 |\n", "| total_timesteps | 1366433 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.302 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 911 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 1996 |\n", "| fps | 125 |\n", "| time_elapsed | 10952 |\n", "| total_timesteps | 1373387 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 912 |\n", "| ep_rew_mean | 62.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2000 |\n", "| fps | 125 |\n", "| time_elapsed | 10982 |\n", "| total_timesteps | 1376964 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.06 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 911 |\n", "| ep_rew_mean | 62.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2004 |\n", "| fps | 125 |\n", "| time_elapsed | 11010 |\n", "| total_timesteps | 1380270 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.677 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 917 |\n", "| ep_rew_mean | 64 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2008 |\n", "| fps | 125 |\n", "| time_elapsed | 11040 |\n", "| total_timesteps | 1383800 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.305 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 913 |\n", "| ep_rew_mean | 64.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2012 |\n", "| fps | 125 |\n", "| time_elapsed | 11071 |\n", "| total_timesteps | 1387576 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 925 |\n", "| ep_rew_mean | 64.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2016 |\n", "| fps | 125 |\n", "| time_elapsed | 11103 |\n", "| total_timesteps | 1391316 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.177 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 927 |\n", "| ep_rew_mean | 64.1 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2020 |\n", "| fps | 125 |\n", "| time_elapsed | 11135 |\n", "| total_timesteps | 1395166 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.267 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 923 |\n", "| ep_rew_mean | 64.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2024 |\n", "| fps | 125 |\n", "| time_elapsed | 11166 |\n", "| total_timesteps | 1398848 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 1.57 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 926 |\n", "| ep_rew_mean | 64.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2028 |\n", "| fps | 125 |\n", "| time_elapsed | 11198 |\n", "| total_timesteps | 1402673 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.171 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 926 |\n", "| ep_rew_mean | 65.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2032 |\n", "| fps | 125 |\n", "| time_elapsed | 11231 |\n", "| total_timesteps | 1406575 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 927 |\n", "| ep_rew_mean | 66 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2036 |\n", "| fps | 125 |\n", "| time_elapsed | 11263 |\n", "| total_timesteps | 1410443 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.238 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 934 |\n", "| ep_rew_mean | 66.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2040 |\n", "| fps | 125 |\n", "| time_elapsed | 11300 |\n", "| total_timesteps | 1414805 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 936 |\n", "| ep_rew_mean | 67 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2044 |\n", "| fps | 125 |\n", "| time_elapsed | 11329 |\n", "| total_timesteps | 1418242 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 952 |\n", "| ep_rew_mean | 68 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2048 |\n", "| fps | 125 |\n", "| time_elapsed | 11366 |\n", "| total_timesteps | 1422524 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 66.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2052 |\n", "| fps | 125 |\n", "| time_elapsed | 11398 |\n", "| total_timesteps | 1426392 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.19 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 68.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2056 |\n", "| fps | 125 |\n", "| time_elapsed | 11634 |\n", "| total_timesteps | 1454436 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0978 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 67.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2060 |\n", "| fps | 125 |\n", "| time_elapsed | 11655 |\n", "| total_timesteps | 1456910 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 67.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2064 |\n", "| fps | 124 |\n", "| time_elapsed | 11679 |\n", "| total_timesteps | 1459784 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.145 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 69.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2068 |\n", "| fps | 124 |\n", "| time_elapsed | 11705 |\n", "| total_timesteps | 1462975 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0984 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 69 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2072 |\n", "| fps | 124 |\n", "| time_elapsed | 11731 |\n", "| total_timesteps | 1466034 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.219 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 68.7 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2076 |\n", "| fps | 124 |\n", "| time_elapsed | 11759 |\n", "| total_timesteps | 1469366 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 69.3 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2080 |\n", "| fps | 124 |\n", "| time_elapsed | 11789 |\n", "| total_timesteps | 1472886 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.353 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 70.6 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2084 |\n", "| fps | 124 |\n", "| time_elapsed | 11822 |\n", "| total_timesteps | 1476886 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.625 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 70.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2088 |\n", "| fps | 124 |\n", "| time_elapsed | 11849 |\n", "| total_timesteps | 1480042 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.0754 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 70.8 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2092 |\n", "| fps | 124 |\n", "| time_elapsed | 11874 |\n", "| total_timesteps | 1483138 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.328 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 69.5 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2096 |\n", "| fps | 124 |\n", "| time_elapsed | 11895 |\n", "| total_timesteps | 1485570 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.19 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 69.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2100 |\n", "| fps | 124 |\n", "| time_elapsed | 11926 |\n", "| total_timesteps | 1489218 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 70.4 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2104 |\n", "| fps | 124 |\n", "| time_elapsed | 11959 |\n", "| total_timesteps | 1493062 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.16 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 68.2 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2108 |\n", "| fps | 124 |\n", "| time_elapsed | 11984 |\n", "| total_timesteps | 1495936 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 71.9 |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| episodes | 2112 |\n", "| fps | 124 |\n", "| time_elapsed | 12018 |\n", "| total_timesteps | 1499994 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.158 |\n", "----------------------------------\n", "Eval num_timesteps=1500000, episode_reward=26.10 +/- 1.51\n", "Episode length: 497.40 +/- 10.39\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 497 |\n", "| mean_reward | 26.1 |\n", "| rollout/ | |\n", "| exploration_rate | 0.1 |\n", "| time/ | |\n", "| total_timesteps | 1500000 |\n", "| train/ | |\n", "| learning_rate | 0.0002 |\n", "| loss | 0.2 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, callback=callback_list, tb_log_name=\"./tb/\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "5d962340", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T21:21:22.100160Z", "iopub.status.busy": "2024-05-09T21:21:22.099100Z", "iopub.status.idle": "2024-05-09T21:22:12.351528Z", "shell.execute_reply": "2024-05-09T21:22:12.350390Z" }, "papermill": { "duration": 50.314619, "end_time": "2024-05-09T21:22:12.354207", "exception": false, "start_time": "2024-05-09T21:21:22.039588", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 12168.569614, "end_time": "2024-05-09T21:22:15.829390", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-09T17:59:27.259776", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }