{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "de5cdf66", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:24:21.082948Z", "iopub.status.busy": "2024-05-14T19:24:21.082292Z", "iopub.status.idle": "2024-05-14T19:25:20.580255Z", "shell.execute_reply": "2024-05-14T19:25:20.579024Z" }, "papermill": { "duration": 59.505512, "end_time": "2024-05-14T19:25:20.582615", "exception": false, "start_time": "2024-05-14T19:24:21.077103", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "e57e078b", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:25:20.591757Z", "iopub.status.busy": "2024-05-14T19:25:20.591462Z", "iopub.status.idle": "2024-05-14T19:25:35.880855Z", "shell.execute_reply": "2024-05-14T19:25:35.880095Z" }, "papermill": { "duration": 15.296447, "end_time": "2024-05-14T19:25:35.883128", "exception": false, "start_time": "2024-05-14T19:25:20.586681", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-14 19:25:26.607645: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-14 19:25:26.607751: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-14 19:25:26.726454: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-4\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-4\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 50_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "# If I record in quarters, but drop the frequency down to 240k instead of 250k, this might trigger a recording near the end.\n", "VIDEO_CALLBACK_FREQ = 240_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_000_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "# Increasing buffer size to 70K, should be able to store it.\n", "BUFFER_SIZE = 70_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.05\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 5_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "4c9e6843", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:25:35.892062Z", "iopub.status.busy": "2024-05-14T19:25:35.891564Z", "iopub.status.idle": "2024-05-14T19:25:35.901926Z", "shell.execute_reply": "2024-05-14T19:25:35.901073Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.016837, "end_time": "2024-05-14T19:25:35.903893", "exception": false, "start_time": "2024-05-14T19:25:35.887056", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "2b73d7a9", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:25:35.912014Z", "iopub.status.busy": "2024-05-14T19:25:35.911762Z", "iopub.status.idle": "2024-05-14T19:25:35.923376Z", "shell.execute_reply": "2024-05-14T19:25:35.922528Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017854, "end_time": "2024-05-14T19:25:35.925216", "exception": false, "start_time": "2024-05-14T19:25:35.907362", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "0edc2745", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:25:35.933352Z", "iopub.status.busy": "2024-05-14T19:25:35.933112Z", "iopub.status.idle": "2024-05-14T19:25:35.947511Z", "shell.execute_reply": "2024-05-14T19:25:35.946688Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.020648, "end_time": "2024-05-14T19:25:35.949311", "exception": false, "start_time": "2024-05-14T19:25:35.928663", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "97c77eb6", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:25:35.957376Z", "iopub.status.busy": "2024-05-14T19:25:35.957123Z", "iopub.status.idle": "2024-05-14T19:26:55.537233Z", "shell.execute_reply": "2024-05-14T19:26:55.536182Z" }, "papermill": { "duration": 79.586963, "end_time": "2024-05-14T19:26:55.539754", "exception": false, "start_time": "2024-05-14T19:25:35.952791", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2v4/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2v4/dqn_replay_buffer_pacman_v2-3\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "0db7ff3a", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:26:55.549297Z", "iopub.status.busy": "2024-05-14T19:26:55.548774Z", "iopub.status.idle": "2024-05-14T19:26:55.554851Z", "shell.execute_reply": "2024-05-14T19:26:55.553819Z" }, "papermill": { "duration": 0.013057, "end_time": "2024-05-14T19:26:55.557002", "exception": false, "start_time": "2024-05-14T19:26:55.543945", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "684dc98c", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T19:26:55.566152Z", "iopub.status.busy": "2024-05-14T19:26:55.565695Z", "iopub.status.idle": "2024-05-14T21:58:42.957979Z", "shell.execute_reply": "2024-05-14T21:58:42.957073Z" }, "papermill": { "duration": 9107.39885, "end_time": "2024-05-14T21:58:42.959938", "exception": false, "start_time": "2024-05-14T19:26:55.561088", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5344 |\n", "| fps | 108 |\n", "| time_elapsed | 42 |\n", "| total_timesteps | 5504620 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.248 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5348 |\n", "| fps | 110 |\n", "| time_elapsed | 87 |\n", "| total_timesteps | 5509654 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0989 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5352 |\n", "| fps | 110 |\n", "| time_elapsed | 135 |\n", "| total_timesteps | 5514926 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0485 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5356 |\n", "| fps | 110 |\n", "| time_elapsed | 178 |\n", "| total_timesteps | 5519760 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 303 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5360 |\n", "| fps | 110 |\n", "| time_elapsed | 220 |\n", "| total_timesteps | 5524435 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0885 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5364 |\n", "| fps | 110 |\n", "| time_elapsed | 265 |\n", "| total_timesteps | 5529389 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5368 |\n", "| fps | 110 |\n", "| time_elapsed | 308 |\n", "| total_timesteps | 5534181 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.234 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5372 |\n", "| fps | 110 |\n", "| time_elapsed | 351 |\n", "| total_timesteps | 5538948 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5376 |\n", "| fps | 110 |\n", "| time_elapsed | 398 |\n", "| total_timesteps | 5544162 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0675 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5380 |\n", "| fps | 110 |\n", "| time_elapsed | 441 |\n", "| total_timesteps | 5548988 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "Eval num_timesteps=5550000, episode_reward=74.80 +/- 109.14\n", "Episode length: 692.60 +/- 295.14\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 693 |\n", "| mean_reward | 74.8 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0658 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5384 |\n", "| fps | 106 |\n", "| time_elapsed | 496 |\n", "| total_timesteps | 5553102 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5388 |\n", "| fps | 107 |\n", "| time_elapsed | 541 |\n", "| total_timesteps | 5558044 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.21 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5392 |\n", "| fps | 107 |\n", "| time_elapsed | 581 |\n", "| total_timesteps | 5562458 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0578 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5396 |\n", "| fps | 107 |\n", "| time_elapsed | 623 |\n", "| total_timesteps | 5567172 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0735 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5400 |\n", "| fps | 107 |\n", "| time_elapsed | 667 |\n", "| total_timesteps | 5572034 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0621 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5404 |\n", "| fps | 108 |\n", "| time_elapsed | 713 |\n", "| total_timesteps | 5577090 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0439 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5408 |\n", "| fps | 108 |\n", "| time_elapsed | 757 |\n", "| total_timesteps | 5582078 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.148 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5412 |\n", "| fps | 108 |\n", "| time_elapsed | 799 |\n", "| total_timesteps | 5586678 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0509 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 301 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5416 |\n", "| fps | 108 |\n", "| time_elapsed | 847 |\n", "| total_timesteps | 5592012 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0566 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 302 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5420 |\n", "| fps | 108 |\n", "| time_elapsed | 892 |\n", "| total_timesteps | 5596898 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "Eval num_timesteps=5600000, episode_reward=387.60 +/- 54.76\n", "Episode length: 1202.20 +/- 129.23\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.2e+03 |\n", "| mean_reward | 388 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0425 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5424 |\n", "| fps | 105 |\n", "| time_elapsed | 973 |\n", "| total_timesteps | 5602444 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0502 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5428 |\n", "| fps | 105 |\n", "| time_elapsed | 1023 |\n", "| total_timesteps | 5607868 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.439 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5432 |\n", "| fps | 105 |\n", "| time_elapsed | 1067 |\n", "| total_timesteps | 5612754 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0501 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5436 |\n", "| fps | 105 |\n", "| time_elapsed | 1109 |\n", "| total_timesteps | 5617368 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.054 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5440 |\n", "| fps | 105 |\n", "| time_elapsed | 1149 |\n", "| total_timesteps | 5621743 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.098 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5444 |\n", "| fps | 106 |\n", "| time_elapsed | 1195 |\n", "| total_timesteps | 5626893 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.219 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5448 |\n", "| fps | 106 |\n", "| time_elapsed | 1238 |\n", "| total_timesteps | 5631665 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5452 |\n", "| fps | 106 |\n", "| time_elapsed | 1278 |\n", "| total_timesteps | 5636009 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.4 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5456 |\n", "| fps | 106 |\n", "| time_elapsed | 1320 |\n", "| total_timesteps | 5640663 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0621 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5460 |\n", "| fps | 106 |\n", "| time_elapsed | 1360 |\n", "| total_timesteps | 5645037 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0809 |\n", "----------------------------------\n", "Eval num_timesteps=5650000, episode_reward=274.30 +/- 82.52\n", "Episode length: 1173.00 +/- 128.68\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 274 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0527 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5464 |\n", "| fps | 104 |\n", "| time_elapsed | 1436 |\n", "| total_timesteps | 5650273 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0736 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5468 |\n", "| fps | 104 |\n", "| time_elapsed | 1484 |\n", "| total_timesteps | 5655575 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0406 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5472 |\n", "| fps | 104 |\n", "| time_elapsed | 1530 |\n", "| total_timesteps | 5660729 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5476 |\n", "| fps | 105 |\n", "| time_elapsed | 1576 |\n", "| total_timesteps | 5665803 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0315 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5480 |\n", "| fps | 105 |\n", "| time_elapsed | 1623 |\n", "| total_timesteps | 5670979 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5484 |\n", "| fps | 105 |\n", "| time_elapsed | 1671 |\n", "| total_timesteps | 5676101 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0524 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5488 |\n", "| fps | 105 |\n", "| time_elapsed | 1720 |\n", "| total_timesteps | 5681601 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0629 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5492 |\n", "| fps | 105 |\n", "| time_elapsed | 1765 |\n", "| total_timesteps | 5686553 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.065 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5496 |\n", "| fps | 105 |\n", "| time_elapsed | 1807 |\n", "| total_timesteps | 5691244 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0868 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5500 |\n", "| fps | 105 |\n", "| time_elapsed | 1854 |\n", "| total_timesteps | 5696504 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0771 |\n", "----------------------------------\n", "Eval num_timesteps=5700000, episode_reward=254.40 +/- 94.82\n", "Episode length: 3844.20 +/- 7720.58\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.84e+03 |\n", "| mean_reward | 254 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0507 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5504 |\n", "| fps | 101 |\n", "| time_elapsed | 1990 |\n", "| total_timesteps | 5701164 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.191 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5508 |\n", "| fps | 101 |\n", "| time_elapsed | 2028 |\n", "| total_timesteps | 5705294 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0437 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5512 |\n", "| fps | 101 |\n", "| time_elapsed | 2076 |\n", "| total_timesteps | 5710468 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0498 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5516 |\n", "| fps | 101 |\n", "| time_elapsed | 2120 |\n", "| total_timesteps | 5715398 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5520 |\n", "| fps | 101 |\n", "| time_elapsed | 2160 |\n", "| total_timesteps | 5719918 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0379 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5524 |\n", "| fps | 101 |\n", "| time_elapsed | 2205 |\n", "| total_timesteps | 5724892 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0565 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5528 |\n", "| fps | 102 |\n", "| time_elapsed | 2249 |\n", "| total_timesteps | 5729774 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0826 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5532 |\n", "| fps | 102 |\n", "| time_elapsed | 2297 |\n", "| total_timesteps | 5735170 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0902 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5536 |\n", "| fps | 102 |\n", "| time_elapsed | 2348 |\n", "| total_timesteps | 5740624 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0831 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5540 |\n", "| fps | 102 |\n", "| time_elapsed | 2404 |\n", "| total_timesteps | 5746520 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0258 |\n", "----------------------------------\n", "Eval num_timesteps=5750000, episode_reward=236.90 +/- 92.75\n", "Episode length: 1318.70 +/- 147.39\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.32e+03 |\n", "| mean_reward | 237 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.409 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5544 |\n", "| fps | 101 |\n", "| time_elapsed | 2477 |\n", "| total_timesteps | 5751550 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.423 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5548 |\n", "| fps | 101 |\n", "| time_elapsed | 2517 |\n", "| total_timesteps | 5756506 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.074 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5552 |\n", "| fps | 102 |\n", "| time_elapsed | 2555 |\n", "| total_timesteps | 5761195 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5556 |\n", "| fps | 102 |\n", "| time_elapsed | 2598 |\n", "| total_timesteps | 5766455 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.629 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5560 |\n", "| fps | 102 |\n", "| time_elapsed | 2640 |\n", "| total_timesteps | 5771601 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0519 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5564 |\n", "| fps | 103 |\n", "| time_elapsed | 2682 |\n", "| total_timesteps | 5776797 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0579 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5568 |\n", "| fps | 103 |\n", "| time_elapsed | 2724 |\n", "| total_timesteps | 5781949 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5572 |\n", "| fps | 103 |\n", "| time_elapsed | 2762 |\n", "| total_timesteps | 5786627 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.189 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5576 |\n", "| fps | 104 |\n", "| time_elapsed | 2801 |\n", "| total_timesteps | 5791411 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0708 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5580 |\n", "| fps | 104 |\n", "| time_elapsed | 2844 |\n", "| total_timesteps | 5796703 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0621 |\n", "----------------------------------\n", "Eval num_timesteps=5800000, episode_reward=248.10 +/- 121.03\n", "Episode length: 3742.80 +/- 7755.17\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.74e+03 |\n", "| mean_reward | 248 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.099 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5584 |\n", "| fps | 101 |\n", "| time_elapsed | 2967 |\n", "| total_timesteps | 5800849 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0505 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5588 |\n", "| fps | 101 |\n", "| time_elapsed | 3011 |\n", "| total_timesteps | 5806177 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0967 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5592 |\n", "| fps | 101 |\n", "| time_elapsed | 3043 |\n", "| total_timesteps | 5810143 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0893 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5596 |\n", "| fps | 102 |\n", "| time_elapsed | 3075 |\n", "| total_timesteps | 5814073 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.32 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5600 |\n", "| fps | 102 |\n", "| time_elapsed | 3111 |\n", "| total_timesteps | 5818513 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.129 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5604 |\n", "| fps | 102 |\n", "| time_elapsed | 3152 |\n", "| total_timesteps | 5823503 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0394 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5608 |\n", "| fps | 102 |\n", "| time_elapsed | 3187 |\n", "| total_timesteps | 5827851 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.54 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5612 |\n", "| fps | 103 |\n", "| time_elapsed | 3223 |\n", "| total_timesteps | 5832258 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0748 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5616 |\n", "| fps | 103 |\n", "| time_elapsed | 3260 |\n", "| total_timesteps | 5836790 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5620 |\n", "| fps | 103 |\n", "| time_elapsed | 3292 |\n", "| total_timesteps | 5840835 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0575 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5624 |\n", "| fps | 103 |\n", "| time_elapsed | 3329 |\n", "| total_timesteps | 5845391 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.267 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5628 |\n", "| fps | 103 |\n", "| time_elapsed | 3365 |\n", "| total_timesteps | 5849831 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "Eval num_timesteps=5850000, episode_reward=344.50 +/- 114.88\n", "Episode length: 3769.80 +/- 7743.52\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.77e+03 |\n", "| mean_reward | 344 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0926 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5632 |\n", "| fps | 101 |\n", "| time_elapsed | 3499 |\n", "| total_timesteps | 5855315 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.089 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5636 |\n", "| fps | 101 |\n", "| time_elapsed | 3539 |\n", "| total_timesteps | 5860232 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0859 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5640 |\n", "| fps | 101 |\n", "| time_elapsed | 3575 |\n", "| total_timesteps | 5864548 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0947 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5644 |\n", "| fps | 102 |\n", "| time_elapsed | 3619 |\n", "| total_timesteps | 5869960 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5648 |\n", "| fps | 102 |\n", "| time_elapsed | 3659 |\n", "| total_timesteps | 5874950 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0378 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5652 |\n", "| fps | 102 |\n", "| time_elapsed | 3702 |\n", "| total_timesteps | 5880210 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0225 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5656 |\n", "| fps | 102 |\n", "| time_elapsed | 3739 |\n", "| total_timesteps | 5884818 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5660 |\n", "| fps | 103 |\n", "| time_elapsed | 3781 |\n", "| total_timesteps | 5889988 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.033 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5664 |\n", "| fps | 103 |\n", "| time_elapsed | 3819 |\n", "| total_timesteps | 5894632 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5668 |\n", "| fps | 103 |\n", "| time_elapsed | 3863 |\n", "| total_timesteps | 5899956 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.345 |\n", "----------------------------------\n", "Eval num_timesteps=5900000, episode_reward=231.70 +/- 69.43\n", "Episode length: 1110.70 +/- 149.62\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.11e+03 |\n", "| mean_reward | 232 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0519 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5672 |\n", "| fps | 102 |\n", "| time_elapsed | 3927 |\n", "| total_timesteps | 5904532 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0256 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5676 |\n", "| fps | 103 |\n", "| time_elapsed | 3970 |\n", "| total_timesteps | 5909736 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0788 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5680 |\n", "| fps | 103 |\n", "| time_elapsed | 4010 |\n", "| total_timesteps | 5914584 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.067 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5684 |\n", "| fps | 103 |\n", "| time_elapsed | 4053 |\n", "| total_timesteps | 5919908 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0862 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5688 |\n", "| fps | 103 |\n", "| time_elapsed | 4085 |\n", "| total_timesteps | 5923742 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5692 |\n", "| fps | 103 |\n", "| time_elapsed | 4127 |\n", "| total_timesteps | 5928988 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0704 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5696 |\n", "| fps | 104 |\n", "| time_elapsed | 4164 |\n", "| total_timesteps | 5933528 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5700 |\n", "| fps | 104 |\n", "| time_elapsed | 4201 |\n", "| total_timesteps | 5938132 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "Eval num_timesteps=5950000, episode_reward=201.10 +/- 114.04\n", "Episode length: 1171.80 +/- 254.23\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 201 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0641 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5704 |\n", "| fps | 104 |\n", "| time_elapsed | 4476 |\n", "| total_timesteps | 5968608 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.037 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5708 |\n", "| fps | 104 |\n", "| time_elapsed | 4517 |\n", "| total_timesteps | 5973616 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0322 |\n", "----------------------------------\n", "Eval num_timesteps=6000000, episode_reward=51.70 +/- 49.90\n", "Episode length: 761.10 +/- 104.24\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 761 |\n", "| mean_reward | 51.7 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0253 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.72e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5712 |\n", "| fps | 105 |\n", "| time_elapsed | 4784 |\n", "| total_timesteps | 6003804 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5716 |\n", "| fps | 105 |\n", "| time_elapsed | 4814 |\n", "| total_timesteps | 6007388 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0517 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.71e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5720 |\n", "| fps | 105 |\n", "| time_elapsed | 4846 |\n", "| total_timesteps | 6011354 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0498 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.7e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5724 |\n", "| fps | 105 |\n", "| time_elapsed | 4876 |\n", "| total_timesteps | 6014981 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0949 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.69e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5728 |\n", "| fps | 105 |\n", "| time_elapsed | 4908 |\n", "| total_timesteps | 6018880 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.67e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5732 |\n", "| fps | 105 |\n", "| time_elapsed | 4935 |\n", "| total_timesteps | 6022144 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0182 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5736 |\n", "| fps | 105 |\n", "| time_elapsed | 4968 |\n", "| total_timesteps | 6026220 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.025 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.66e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5740 |\n", "| fps | 106 |\n", "| time_elapsed | 5005 |\n", "| total_timesteps | 6030686 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0318 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.65e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5744 |\n", "| fps | 106 |\n", "| time_elapsed | 5041 |\n", "| total_timesteps | 6035162 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.324 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.65e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5748 |\n", "| fps | 106 |\n", "| time_elapsed | 5076 |\n", "| total_timesteps | 6039465 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.64e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5752 |\n", "| fps | 106 |\n", "| time_elapsed | 5115 |\n", "| total_timesteps | 6044289 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5756 |\n", "| fps | 106 |\n", "| time_elapsed | 5146 |\n", "| total_timesteps | 6048015 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0357 |\n", "----------------------------------\n", "Eval num_timesteps=6050000, episode_reward=231.10 +/- 107.71\n", "Episode length: 1159.80 +/- 211.76\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 231 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0927 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 262 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5760 |\n", "| fps | 106 |\n", "| time_elapsed | 5214 |\n", "| total_timesteps | 6052951 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0629 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.62e+03 |\n", "| ep_rew_mean | 262 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5764 |\n", "| fps | 106 |\n", "| time_elapsed | 5244 |\n", "| total_timesteps | 6056643 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.61e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5768 |\n", "| fps | 106 |\n", "| time_elapsed | 5280 |\n", "| total_timesteps | 6061057 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0921 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.62e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5772 |\n", "| fps | 106 |\n", "| time_elapsed | 5323 |\n", "| total_timesteps | 6066275 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.32 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.61e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5776 |\n", "| fps | 106 |\n", "| time_elapsed | 5361 |\n", "| total_timesteps | 6071000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0947 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.6e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5780 |\n", "| fps | 106 |\n", "| time_elapsed | 5393 |\n", "| total_timesteps | 6074972 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0569 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.59e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5784 |\n", "| fps | 106 |\n", "| time_elapsed | 5425 |\n", "| total_timesteps | 6078858 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.6e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5788 |\n", "| fps | 106 |\n", "| time_elapsed | 5466 |\n", "| total_timesteps | 6083878 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0566 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.59e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5792 |\n", "| fps | 106 |\n", "| time_elapsed | 5502 |\n", "| total_timesteps | 6088274 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0943 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.6e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5796 |\n", "| fps | 107 |\n", "| time_elapsed | 5542 |\n", "| total_timesteps | 6093248 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0429 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.6e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5800 |\n", "| fps | 107 |\n", "| time_elapsed | 5580 |\n", "| total_timesteps | 6097914 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.189 |\n", "----------------------------------\n", "Eval num_timesteps=6100000, episode_reward=278.10 +/- 77.07\n", "Episode length: 1296.60 +/- 155.58\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.3e+03 |\n", "| mean_reward | 278 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6100000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.191 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5804 |\n", "| fps | 106 |\n", "| time_elapsed | 5654 |\n", "| total_timesteps | 6103076 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0292 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5808 |\n", "| fps | 106 |\n", "| time_elapsed | 5691 |\n", "| total_timesteps | 6107617 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0711 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.08e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5812 |\n", "| fps | 106 |\n", "| time_elapsed | 5728 |\n", "| total_timesteps | 6112183 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0964 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5816 |\n", "| fps | 106 |\n", "| time_elapsed | 5766 |\n", "| total_timesteps | 6116901 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0721 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5820 |\n", "| fps | 107 |\n", "| time_elapsed | 5807 |\n", "| total_timesteps | 6121877 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5824 |\n", "| fps | 107 |\n", "| time_elapsed | 5840 |\n", "| total_timesteps | 6126015 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.068 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5828 |\n", "| fps | 107 |\n", "| time_elapsed | 5871 |\n", "| total_timesteps | 6129811 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5832 |\n", "| fps | 107 |\n", "| time_elapsed | 5906 |\n", "| total_timesteps | 6134087 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0423 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5836 |\n", "| fps | 107 |\n", "| time_elapsed | 5942 |\n", "| total_timesteps | 6138493 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0491 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5840 |\n", "| fps | 107 |\n", "| time_elapsed | 5981 |\n", "| total_timesteps | 6143277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.301 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5844 |\n", "| fps | 107 |\n", "| time_elapsed | 6021 |\n", "| total_timesteps | 6148249 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0489 |\n", "----------------------------------\n", "Eval num_timesteps=6150000, episode_reward=415.90 +/- 25.34\n", "Episode length: 1254.40 +/- 98.52\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.25e+03 |\n", "| mean_reward | 416 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0784 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5848 |\n", "| fps | 107 |\n", "| time_elapsed | 6092 |\n", "| total_timesteps | 6153181 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0519 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5852 |\n", "| fps | 107 |\n", "| time_elapsed | 6136 |\n", "| total_timesteps | 6158525 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0525 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5856 |\n", "| fps | 107 |\n", "| time_elapsed | 6173 |\n", "| total_timesteps | 6163103 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.062 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5860 |\n", "| fps | 107 |\n", "| time_elapsed | 6211 |\n", "| total_timesteps | 6167713 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.55 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5864 |\n", "| fps | 107 |\n", "| time_elapsed | 6244 |\n", "| total_timesteps | 6171829 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5868 |\n", "| fps | 107 |\n", "| time_elapsed | 6290 |\n", "| total_timesteps | 6177495 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5872 |\n", "| fps | 107 |\n", "| time_elapsed | 6319 |\n", "| total_timesteps | 6181053 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0416 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5876 |\n", "| fps | 107 |\n", "| time_elapsed | 6354 |\n", "| total_timesteps | 6185291 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0596 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5880 |\n", "| fps | 107 |\n", "| time_elapsed | 6388 |\n", "| total_timesteps | 6189537 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5884 |\n", "| fps | 108 |\n", "| time_elapsed | 6431 |\n", "| total_timesteps | 6194853 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0479 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5888 |\n", "| fps | 108 |\n", "| time_elapsed | 6467 |\n", "| total_timesteps | 6199231 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.398 |\n", "----------------------------------\n", "Eval num_timesteps=6200000, episode_reward=203.50 +/- 34.25\n", "Episode length: 1222.10 +/- 205.00\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.22e+03 |\n", "| mean_reward | 204 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0617 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5892 |\n", "| fps | 107 |\n", "| time_elapsed | 6528 |\n", "| total_timesteps | 6203199 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0764 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5896 |\n", "| fps | 107 |\n", "| time_elapsed | 6568 |\n", "| total_timesteps | 6208017 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.235 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5900 |\n", "| fps | 107 |\n", "| time_elapsed | 6597 |\n", "| total_timesteps | 6211585 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0675 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5904 |\n", "| fps | 107 |\n", "| time_elapsed | 6635 |\n", "| total_timesteps | 6216353 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0558 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5908 |\n", "| fps | 107 |\n", "| time_elapsed | 6678 |\n", "| total_timesteps | 6221241 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5912 |\n", "| fps | 107 |\n", "| time_elapsed | 6716 |\n", "| total_timesteps | 6225304 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0986 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5916 |\n", "| fps | 108 |\n", "| time_elapsed | 6747 |\n", "| total_timesteps | 6229142 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5920 |\n", "| fps | 108 |\n", "| time_elapsed | 6778 |\n", "| total_timesteps | 6232840 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0476 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5924 |\n", "| fps | 108 |\n", "| time_elapsed | 6811 |\n", "| total_timesteps | 6236920 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.237 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5928 |\n", "| fps | 108 |\n", "| time_elapsed | 6846 |\n", "| total_timesteps | 6241222 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0966 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5932 |\n", "| fps | 108 |\n", "| time_elapsed | 6884 |\n", "| total_timesteps | 6245882 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0347 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5936 |\n", "| fps | 108 |\n", "| time_elapsed | 6917 |\n", "| total_timesteps | 6249844 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0504 |\n", "----------------------------------\n", "Eval num_timesteps=6250000, episode_reward=199.30 +/- 131.44\n", "Episode length: 988.10 +/- 296.28\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 988 |\n", "| mean_reward | 199 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6250000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0491 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5940 |\n", "| fps | 108 |\n", "| time_elapsed | 6980 |\n", "| total_timesteps | 6254574 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.52 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5944 |\n", "| fps | 108 |\n", "| time_elapsed | 7020 |\n", "| total_timesteps | 6259507 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0574 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5948 |\n", "| fps | 108 |\n", "| time_elapsed | 7050 |\n", "| total_timesteps | 6263189 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.07 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5952 |\n", "| fps | 108 |\n", "| time_elapsed | 7170 |\n", "| total_timesteps | 6278063 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0503 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5956 |\n", "| fps | 108 |\n", "| time_elapsed | 7205 |\n", "| total_timesteps | 6282423 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.61 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5960 |\n", "| fps | 108 |\n", "| time_elapsed | 7242 |\n", "| total_timesteps | 6286897 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 3.17 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5964 |\n", "| fps | 108 |\n", "| time_elapsed | 7279 |\n", "| total_timesteps | 6291541 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5968 |\n", "| fps | 108 |\n", "| time_elapsed | 7319 |\n", "| total_timesteps | 6296447 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.193 |\n", "----------------------------------\n", "Eval num_timesteps=6300000, episode_reward=148.40 +/- 26.83\n", "Episode length: 788.60 +/- 162.89\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 789 |\n", "| mean_reward | 148 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0637 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5972 |\n", "| fps | 108 |\n", "| time_elapsed | 7465 |\n", "| total_timesteps | 6311985 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0185 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5976 |\n", "| fps | 108 |\n", "| time_elapsed | 7502 |\n", "| total_timesteps | 6316579 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0329 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5980 |\n", "| fps | 108 |\n", "| time_elapsed | 7530 |\n", "| total_timesteps | 6320043 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0674 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 257 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5984 |\n", "| fps | 108 |\n", "| time_elapsed | 7574 |\n", "| total_timesteps | 6325449 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.118 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5988 |\n", "| fps | 109 |\n", "| time_elapsed | 7607 |\n", "| total_timesteps | 6329471 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0374 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5992 |\n", "| fps | 109 |\n", "| time_elapsed | 7643 |\n", "| total_timesteps | 6333911 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 251 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5996 |\n", "| fps | 109 |\n", "| time_elapsed | 7676 |\n", "| total_timesteps | 6337972 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0671 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 257 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6000 |\n", "| fps | 109 |\n", "| time_elapsed | 7717 |\n", "| total_timesteps | 6343044 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.193 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 256 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6004 |\n", "| fps | 109 |\n", "| time_elapsed | 7754 |\n", "| total_timesteps | 6347646 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0533 |\n", "----------------------------------\n", "Eval num_timesteps=6350000, episode_reward=178.40 +/- 168.96\n", "Episode length: 1019.20 +/- 302.46\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.02e+03 |\n", "| mean_reward | 178 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0648 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6008 |\n", "| fps | 109 |\n", "| time_elapsed | 7818 |\n", "| total_timesteps | 6352488 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0889 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6012 |\n", "| fps | 109 |\n", "| time_elapsed | 7856 |\n", "| total_timesteps | 6357160 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0222 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6016 |\n", "| fps | 109 |\n", "| time_elapsed | 7887 |\n", "| total_timesteps | 6360909 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6020 |\n", "| fps | 109 |\n", "| time_elapsed | 7913 |\n", "| total_timesteps | 6364201 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0602 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6024 |\n", "| fps | 109 |\n", "| time_elapsed | 7950 |\n", "| total_timesteps | 6368652 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.31 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6028 |\n", "| fps | 109 |\n", "| time_elapsed | 7988 |\n", "| total_timesteps | 6373408 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.24 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6032 |\n", "| fps | 109 |\n", "| time_elapsed | 8026 |\n", "| total_timesteps | 6378042 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.151 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6036 |\n", "| fps | 109 |\n", "| time_elapsed | 8062 |\n", "| total_timesteps | 6382528 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.66 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6040 |\n", "| fps | 109 |\n", "| time_elapsed | 8097 |\n", "| total_timesteps | 6386874 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0644 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6044 |\n", "| fps | 109 |\n", "| time_elapsed | 8136 |\n", "| total_timesteps | 6391652 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0666 |\n", "----------------------------------\n", "Eval num_timesteps=6400000, episode_reward=130.30 +/- 58.84\n", "Episode length: 1102.00 +/- 159.98\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.1e+03 |\n", "| mean_reward | 130 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6400000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0473 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.59e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6048 |\n", "| fps | 109 |\n", "| time_elapsed | 8409 |\n", "| total_timesteps | 6422050 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.031 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.48e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6052 |\n", "| fps | 109 |\n", "| time_elapsed | 8445 |\n", "| total_timesteps | 6426449 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0313 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.48e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6056 |\n", "| fps | 109 |\n", "| time_elapsed | 8481 |\n", "| total_timesteps | 6430865 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.023 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.49e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6060 |\n", "| fps | 109 |\n", "| time_elapsed | 8518 |\n", "| total_timesteps | 6435413 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0203 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.49e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6064 |\n", "| fps | 109 |\n", "| time_elapsed | 8555 |\n", "| total_timesteps | 6440043 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0308 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.49e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6068 |\n", "| fps | 109 |\n", "| time_elapsed | 8596 |\n", "| total_timesteps | 6445059 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0558 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6072 |\n", "| fps | 109 |\n", "| time_elapsed | 8632 |\n", "| total_timesteps | 6449579 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0432 |\n", "----------------------------------\n", "Eval num_timesteps=6450000, episode_reward=229.90 +/- 106.70\n", "Episode length: 1061.20 +/- 92.23\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.06e+03 |\n", "| mean_reward | 230 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0402 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6076 |\n", "| fps | 109 |\n", "| time_elapsed | 8699 |\n", "| total_timesteps | 6454424 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0364 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6080 |\n", "| fps | 109 |\n", "| time_elapsed | 8732 |\n", "| total_timesteps | 6458449 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0486 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6084 |\n", "| fps | 109 |\n", "| time_elapsed | 8769 |\n", "| total_timesteps | 6462665 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0477 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6088 |\n", "| fps | 109 |\n", "| time_elapsed | 8808 |\n", "| total_timesteps | 6466651 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6092 |\n", "| fps | 109 |\n", "| time_elapsed | 8847 |\n", "| total_timesteps | 6471483 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0967 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6096 |\n", "| fps | 109 |\n", "| time_elapsed | 8876 |\n", "| total_timesteps | 6475056 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0714 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6100 |\n", "| fps | 109 |\n", "| time_elapsed | 8913 |\n", "| total_timesteps | 6479552 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0444 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6104 |\n", "| fps | 109 |\n", "| time_elapsed | 8952 |\n", "| total_timesteps | 6484340 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0449 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6108 |\n", "| fps | 110 |\n", "| time_elapsed | 8990 |\n", "| total_timesteps | 6489030 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0953 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6112 |\n", "| fps | 110 |\n", "| time_elapsed | 9020 |\n", "| total_timesteps | 6492749 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6116 |\n", "| fps | 110 |\n", "| time_elapsed | 9054 |\n", "| total_timesteps | 6496921 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0722 |\n", "----------------------------------\n", "Eval num_timesteps=6500000, episode_reward=319.40 +/- 119.88\n", "Episode length: 1168.60 +/- 192.86\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 319 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0539 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "8fd61e61", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T21:58:43.008156Z", "iopub.status.busy": "2024-05-14T21:58:43.007506Z", "iopub.status.idle": "2024-05-14T21:59:37.198365Z", "shell.execute_reply": "2024-05-14T21:59:37.197546Z" }, "papermill": { "duration": 54.217944, "end_time": "2024-05-14T21:59:37.201125", "exception": false, "start_time": "2024-05-14T21:58:42.983181", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 177618485, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 9322.050913, "end_time": "2024-05-14T21:59:40.529020", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-14T19:24:18.478107", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }