{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e4113a3e", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:55:37.082757Z", "iopub.status.busy": "2024-05-15T15:55:37.082492Z", "iopub.status.idle": "2024-05-15T15:56:34.804509Z", "shell.execute_reply": "2024-05-15T15:56:34.803415Z" }, "papermill": { "duration": 57.72961, "end_time": "2024-05-15T15:56:34.807045", "exception": false, "start_time": "2024-05-15T15:55:37.077435", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "34b84a2a", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:56:34.816805Z", "iopub.status.busy": "2024-05-15T15:56:34.816497Z", "iopub.status.idle": "2024-05-15T15:56:50.789785Z", "shell.execute_reply": "2024-05-15T15:56:50.788803Z" }, "papermill": { "duration": 15.980967, "end_time": "2024-05-15T15:56:50.792231", "exception": false, "start_time": "2024-05-15T15:56:34.811264", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-15 15:56:41.171922: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-15 15:56:41.172028: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-15 15:56:41.341461: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-6\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-6\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 50_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "# If I record in quarters, but drop the frequency down to 240k instead of 250k, this might trigger a recording near the end.\n", "VIDEO_CALLBACK_FREQ = 240_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_000_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "# Increasing buffer size to 70K, should be able to store it.\n", "BUFFER_SIZE = 70_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.05\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "fa740c46", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:56:50.801543Z", "iopub.status.busy": "2024-05-15T15:56:50.800776Z", "iopub.status.idle": "2024-05-15T15:56:50.811413Z", "shell.execute_reply": "2024-05-15T15:56:50.810407Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017098, "end_time": "2024-05-15T15:56:50.813322", "exception": false, "start_time": "2024-05-15T15:56:50.796224", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "375260e1", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:56:50.821506Z", "iopub.status.busy": "2024-05-15T15:56:50.821059Z", "iopub.status.idle": "2024-05-15T15:56:50.832165Z", "shell.execute_reply": "2024-05-15T15:56:50.831339Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017234, "end_time": "2024-05-15T15:56:50.834009", "exception": false, "start_time": "2024-05-15T15:56:50.816775", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "92dd03f5", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:56:50.842376Z", "iopub.status.busy": "2024-05-15T15:56:50.842094Z", "iopub.status.idle": "2024-05-15T15:56:50.856918Z", "shell.execute_reply": "2024-05-15T15:56:50.856107Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.021155, "end_time": "2024-05-15T15:56:50.858696", "exception": false, "start_time": "2024-05-15T15:56:50.837541", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "4c32b706", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:56:50.867092Z", "iopub.status.busy": "2024-05-15T15:56:50.866581Z", "iopub.status.idle": "2024-05-15T15:58:10.633713Z", "shell.execute_reply": "2024-05-15T15:58:10.632618Z" }, "papermill": { "duration": 79.774051, "end_time": "2024-05-15T15:58:10.636222", "exception": false, "start_time": "2024-05-15T15:56:50.862171", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2v5/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2v5/dqn_replay_buffer_pacman_v2-4\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "26e5c094", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:58:10.650462Z", "iopub.status.busy": "2024-05-15T15:58:10.649668Z", "iopub.status.idle": "2024-05-15T15:58:10.655793Z", "shell.execute_reply": "2024-05-15T15:58:10.654916Z" }, "papermill": { "duration": 0.012968, "end_time": "2024-05-15T15:58:10.657727", "exception": false, "start_time": "2024-05-15T15:58:10.644759", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "205466ab", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T15:58:10.666096Z", "iopub.status.busy": "2024-05-15T15:58:10.665851Z", "iopub.status.idle": "2024-05-15T18:27:14.482109Z", "shell.execute_reply": "2024-05-15T18:27:14.480986Z" }, "papermill": { "duration": 8943.82281, "end_time": "2024-05-15T18:27:14.484230", "exception": false, "start_time": "2024-05-15T15:58:10.661420", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.36e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6120 |\n", "| fps | 104 |\n", "| time_elapsed | 14 |\n", "| total_timesteps | 6501480 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.154 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6124 |\n", "| fps | 114 |\n", "| time_elapsed | 283 |\n", "| total_timesteps | 6532348 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0428 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6128 |\n", "| fps | 113 |\n", "| time_elapsed | 325 |\n", "| total_timesteps | 6536956 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.26 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.62e+03 |\n", "| ep_rew_mean | 254 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6132 |\n", "| fps | 113 |\n", "| time_elapsed | 360 |\n", "| total_timesteps | 6540962 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0311 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.62e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6136 |\n", "| fps | 113 |\n", "| time_elapsed | 403 |\n", "| total_timesteps | 6545872 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "Eval num_timesteps=6550000, episode_reward=368.40 +/- 66.99\n", "Episode length: 1245.60 +/- 140.31\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.25e+03 |\n", "| mean_reward | 368 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0262 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6140 |\n", "| fps | 106 |\n", "| time_elapsed | 478 |\n", "| total_timesteps | 6551058 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0229 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.63e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6144 |\n", "| fps | 107 |\n", "| time_elapsed | 517 |\n", "| total_timesteps | 6555538 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6148 |\n", "| fps | 107 |\n", "| time_elapsed | 557 |\n", "| total_timesteps | 6560127 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0263 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6152 |\n", "| fps | 108 |\n", "| time_elapsed | 603 |\n", "| total_timesteps | 6565371 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0638 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6156 |\n", "| fps | 108 |\n", "| time_elapsed | 640 |\n", "| total_timesteps | 6569619 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.035 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6160 |\n", "| fps | 109 |\n", "| time_elapsed | 680 |\n", "| total_timesteps | 6574251 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.198 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6164 |\n", "| fps | 109 |\n", "| time_elapsed | 723 |\n", "| total_timesteps | 6579179 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0637 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6168 |\n", "| fps | 109 |\n", "| time_elapsed | 760 |\n", "| total_timesteps | 6583360 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.158 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.37e+03 |\n", "| ep_rew_mean | 244 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6172 |\n", "| fps | 109 |\n", "| time_elapsed | 795 |\n", "| total_timesteps | 6587470 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.287 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.38e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6176 |\n", "| fps | 110 |\n", "| time_elapsed | 843 |\n", "| total_timesteps | 6592922 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6180 |\n", "| fps | 110 |\n", "| time_elapsed | 887 |\n", "| total_timesteps | 6598079 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.237 |\n", "----------------------------------\n", "Eval num_timesteps=6600000, episode_reward=122.90 +/- 96.03\n", "Episode length: 993.20 +/- 238.74\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 993 |\n", "| mean_reward | 123 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 255 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6184 |\n", "| fps | 107 |\n", "| time_elapsed | 959 |\n", "| total_timesteps | 6603439 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0584 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 251 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6188 |\n", "| fps | 107 |\n", "| time_elapsed | 995 |\n", "| total_timesteps | 6607481 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.148 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6192 |\n", "| fps | 108 |\n", "| time_elapsed | 1040 |\n", "| total_timesteps | 6612527 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0784 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 251 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6196 |\n", "| fps | 108 |\n", "| time_elapsed | 1084 |\n", "| total_timesteps | 6617424 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.59 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 255 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6200 |\n", "| fps | 108 |\n", "| time_elapsed | 1131 |\n", "| total_timesteps | 6622672 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6204 |\n", "| fps | 108 |\n", "| time_elapsed | 1184 |\n", "| total_timesteps | 6628524 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.361 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6208 |\n", "| fps | 108 |\n", "| time_elapsed | 1257 |\n", "| total_timesteps | 6636816 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.377 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.48e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6212 |\n", "| fps | 108 |\n", "| time_elapsed | 1300 |\n", "| total_timesteps | 6641648 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0825 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.48e+03 |\n", "| ep_rew_mean | 251 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6216 |\n", "| fps | 109 |\n", "| time_elapsed | 1340 |\n", "| total_timesteps | 6646288 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0941 |\n", "----------------------------------\n", "Eval num_timesteps=6650000, episode_reward=168.90 +/- 40.86\n", "Episode length: 1370.80 +/- 98.92\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.37e+03 |\n", "| mean_reward | 169 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0994 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.5e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6220 |\n", "| fps | 106 |\n", "| time_elapsed | 1416 |\n", "| total_timesteps | 6651412 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 248 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6224 |\n", "| fps | 107 |\n", "| time_elapsed | 1457 |\n", "| total_timesteps | 6656226 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.192 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6228 |\n", "| fps | 107 |\n", "| time_elapsed | 1497 |\n", "| total_timesteps | 6660930 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.559 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6232 |\n", "| fps | 107 |\n", "| time_elapsed | 1540 |\n", "| total_timesteps | 6665922 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6236 |\n", "| fps | 107 |\n", "| time_elapsed | 1584 |\n", "| total_timesteps | 6671144 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.372 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 238 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6240 |\n", "| fps | 108 |\n", "| time_elapsed | 1626 |\n", "| total_timesteps | 6676056 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6244 |\n", "| fps | 108 |\n", "| time_elapsed | 1669 |\n", "| total_timesteps | 6681148 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0521 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6248 |\n", "| fps | 108 |\n", "| time_elapsed | 1712 |\n", "| total_timesteps | 6686156 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6252 |\n", "| fps | 108 |\n", "| time_elapsed | 1755 |\n", "| total_timesteps | 6691208 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6256 |\n", "| fps | 109 |\n", "| time_elapsed | 1799 |\n", "| total_timesteps | 6696146 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0932 |\n", "----------------------------------\n", "Eval num_timesteps=6700000, episode_reward=220.70 +/- 83.62\n", "Episode length: 1162.40 +/- 85.03\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 221 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0444 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6260 |\n", "| fps | 107 |\n", "| time_elapsed | 1865 |\n", "| total_timesteps | 6700748 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.697 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6264 |\n", "| fps | 107 |\n", "| time_elapsed | 1904 |\n", "| total_timesteps | 6705203 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.307 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6268 |\n", "| fps | 107 |\n", "| time_elapsed | 1947 |\n", "| total_timesteps | 6710285 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6272 |\n", "| fps | 108 |\n", "| time_elapsed | 1991 |\n", "| total_timesteps | 6715429 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.187 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6276 |\n", "| fps | 108 |\n", "| time_elapsed | 2037 |\n", "| total_timesteps | 6720713 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0821 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6280 |\n", "| fps | 108 |\n", "| time_elapsed | 2080 |\n", "| total_timesteps | 6725761 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.188 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6284 |\n", "| fps | 108 |\n", "| time_elapsed | 2126 |\n", "| total_timesteps | 6731165 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0898 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 238 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6288 |\n", "| fps | 108 |\n", "| time_elapsed | 2170 |\n", "| total_timesteps | 6736279 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.242 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.29e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6292 |\n", "| fps | 108 |\n", "| time_elapsed | 2220 |\n", "| total_timesteps | 6741769 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.34 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6296 |\n", "| fps | 108 |\n", "| time_elapsed | 2274 |\n", "| total_timesteps | 6747295 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.26 |\n", "----------------------------------\n", "Eval num_timesteps=6750000, episode_reward=349.10 +/- 81.14\n", "Episode length: 1371.40 +/- 168.48\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.37e+03 |\n", "| mean_reward | 349 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0648 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6300 |\n", "| fps | 107 |\n", "| time_elapsed | 2353 |\n", "| total_timesteps | 6753053 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.213 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6304 |\n", "| fps | 107 |\n", "| time_elapsed | 2399 |\n", "| total_timesteps | 6758619 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0853 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6308 |\n", "| fps | 108 |\n", "| time_elapsed | 2445 |\n", "| total_timesteps | 6764205 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0953 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6312 |\n", "| fps | 108 |\n", "| time_elapsed | 2484 |\n", "| total_timesteps | 6768874 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 255 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6316 |\n", "| fps | 108 |\n", "| time_elapsed | 2527 |\n", "| total_timesteps | 6774066 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0893 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 262 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6320 |\n", "| fps | 108 |\n", "| time_elapsed | 2570 |\n", "| total_timesteps | 6779298 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0676 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6324 |\n", "| fps | 108 |\n", "| time_elapsed | 2607 |\n", "| total_timesteps | 6783770 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0743 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6328 |\n", "| fps | 109 |\n", "| time_elapsed | 2645 |\n", "| total_timesteps | 6788470 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0425 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6332 |\n", "| fps | 109 |\n", "| time_elapsed | 2688 |\n", "| total_timesteps | 6793714 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.28e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6336 |\n", "| fps | 109 |\n", "| time_elapsed | 2730 |\n", "| total_timesteps | 6798852 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "Eval num_timesteps=6800000, episode_reward=307.60 +/- 84.72\n", "Episode length: 1256.50 +/- 229.74\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.26e+03 |\n", "| mean_reward | 308 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0898 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.27e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6340 |\n", "| fps | 108 |\n", "| time_elapsed | 2793 |\n", "| total_timesteps | 6802891 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6344 |\n", "| fps | 108 |\n", "| time_elapsed | 2829 |\n", "| total_timesteps | 6807353 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0458 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.26e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6348 |\n", "| fps | 108 |\n", "| time_elapsed | 2865 |\n", "| total_timesteps | 6811849 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.176 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6352 |\n", "| fps | 109 |\n", "| time_elapsed | 2900 |\n", "| total_timesteps | 6816127 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.384 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.25e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6356 |\n", "| fps | 109 |\n", "| time_elapsed | 2937 |\n", "| total_timesteps | 6820763 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0644 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6360 |\n", "| fps | 109 |\n", "| time_elapsed | 2971 |\n", "| total_timesteps | 6824985 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.155 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6364 |\n", "| fps | 109 |\n", "| time_elapsed | 3007 |\n", "| total_timesteps | 6829529 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0852 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6368 |\n", "| fps | 109 |\n", "| time_elapsed | 3048 |\n", "| total_timesteps | 6834555 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0807 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6372 |\n", "| fps | 109 |\n", "| time_elapsed | 3088 |\n", "| total_timesteps | 6839511 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.158 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6376 |\n", "| fps | 110 |\n", "| time_elapsed | 3130 |\n", "| total_timesteps | 6844817 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.175 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6380 |\n", "| fps | 110 |\n", "| time_elapsed | 3168 |\n", "| total_timesteps | 6849521 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0805 |\n", "----------------------------------\n", "Eval num_timesteps=6850000, episode_reward=369.20 +/- 49.56\n", "Episode length: 1443.20 +/- 126.83\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.44e+03 |\n", "| mean_reward | 369 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.222 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.23e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6384 |\n", "| fps | 109 |\n", "| time_elapsed | 3243 |\n", "| total_timesteps | 6854516 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0564 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.24e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6388 |\n", "| fps | 109 |\n", "| time_elapsed | 3288 |\n", "| total_timesteps | 6860012 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0442 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.22e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6392 |\n", "| fps | 109 |\n", "| time_elapsed | 3323 |\n", "| total_timesteps | 6864225 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0874 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6396 |\n", "| fps | 109 |\n", "| time_elapsed | 3355 |\n", "| total_timesteps | 6868201 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.71 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6400 |\n", "| fps | 109 |\n", "| time_elapsed | 3396 |\n", "| total_timesteps | 6873163 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0936 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6404 |\n", "| fps | 109 |\n", "| time_elapsed | 3430 |\n", "| total_timesteps | 6877251 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.31 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6408 |\n", "| fps | 110 |\n", "| time_elapsed | 3468 |\n", "| total_timesteps | 6881947 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.295 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6412 |\n", "| fps | 110 |\n", "| time_elapsed | 3518 |\n", "| total_timesteps | 6888073 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.183 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6416 |\n", "| fps | 110 |\n", "| time_elapsed | 3561 |\n", "| total_timesteps | 6893225 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6420 |\n", "| fps | 110 |\n", "| time_elapsed | 3608 |\n", "| total_timesteps | 6898979 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0523 |\n", "----------------------------------\n", "Eval num_timesteps=6900000, episode_reward=294.40 +/- 141.68\n", "Episode length: 1173.20 +/- 344.32\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 294 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.256 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6424 |\n", "| fps | 109 |\n", "| time_elapsed | 3676 |\n", "| total_timesteps | 6903953 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.173 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6428 |\n", "| fps | 109 |\n", "| time_elapsed | 3710 |\n", "| total_timesteps | 6908185 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0559 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6432 |\n", "| fps | 110 |\n", "| time_elapsed | 3751 |\n", "| total_timesteps | 6913035 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.072 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6436 |\n", "| fps | 110 |\n", "| time_elapsed | 3787 |\n", "| total_timesteps | 6917505 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6440 |\n", "| fps | 110 |\n", "| time_elapsed | 3821 |\n", "| total_timesteps | 6921675 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6444 |\n", "| fps | 110 |\n", "| time_elapsed | 3861 |\n", "| total_timesteps | 6926517 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0861 |\n", "----------------------------------\n", "Eval num_timesteps=6950000, episode_reward=105.00 +/- 64.37\n", "Episode length: 775.00 +/- 205.44\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 775 |\n", "| mean_reward | 105 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 6950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.433 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6448 |\n", "| fps | 110 |\n", "| time_elapsed | 4124 |\n", "| total_timesteps | 6956855 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0405 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6452 |\n", "| fps | 110 |\n", "| time_elapsed | 4160 |\n", "| total_timesteps | 6961234 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.23 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6456 |\n", "| fps | 110 |\n", "| time_elapsed | 4188 |\n", "| total_timesteps | 6964754 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0324 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6460 |\n", "| fps | 111 |\n", "| time_elapsed | 4218 |\n", "| total_timesteps | 6968356 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.273 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6464 |\n", "| fps | 111 |\n", "| time_elapsed | 4254 |\n", "| total_timesteps | 6972807 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0339 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6468 |\n", "| fps | 111 |\n", "| time_elapsed | 4293 |\n", "| total_timesteps | 6977601 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0537 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6472 |\n", "| fps | 111 |\n", "| time_elapsed | 4341 |\n", "| total_timesteps | 6983071 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0356 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6476 |\n", "| fps | 111 |\n", "| time_elapsed | 4386 |\n", "| total_timesteps | 6987801 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6480 |\n", "| fps | 111 |\n", "| time_elapsed | 4421 |\n", "| total_timesteps | 6992183 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0528 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6484 |\n", "| fps | 111 |\n", "| time_elapsed | 4462 |\n", "| total_timesteps | 6997237 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.475 |\n", "----------------------------------\n", "Eval num_timesteps=7000000, episode_reward=350.80 +/- 86.85\n", "Episode length: 1191.20 +/- 127.98\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.19e+03 |\n", "| mean_reward | 351 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0751 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6488 |\n", "| fps | 110 |\n", "| time_elapsed | 4525 |\n", "| total_timesteps | 7001603 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0412 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6492 |\n", "| fps | 110 |\n", "| time_elapsed | 4566 |\n", "| total_timesteps | 7006747 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0624 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6496 |\n", "| fps | 111 |\n", "| time_elapsed | 4606 |\n", "| total_timesteps | 7011739 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0789 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6500 |\n", "| fps | 111 |\n", "| time_elapsed | 4647 |\n", "| total_timesteps | 7016805 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.053 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6504 |\n", "| fps | 111 |\n", "| time_elapsed | 4684 |\n", "| total_timesteps | 7021393 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6508 |\n", "| fps | 111 |\n", "| time_elapsed | 4722 |\n", "| total_timesteps | 7026073 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0606 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6512 |\n", "| fps | 111 |\n", "| time_elapsed | 4758 |\n", "| total_timesteps | 7030527 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0761 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6516 |\n", "| fps | 111 |\n", "| time_elapsed | 4799 |\n", "| total_timesteps | 7035579 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0814 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6520 |\n", "| fps | 111 |\n", "| time_elapsed | 4836 |\n", "| total_timesteps | 7040155 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0877 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6524 |\n", "| fps | 111 |\n", "| time_elapsed | 4876 |\n", "| total_timesteps | 7045049 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0496 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6528 |\n", "| fps | 111 |\n", "| time_elapsed | 4915 |\n", "| total_timesteps | 7049939 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0451 |\n", "----------------------------------\n", "Eval num_timesteps=7050000, episode_reward=367.40 +/- 69.00\n", "Episode length: 1262.00 +/- 112.29\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.26e+03 |\n", "| mean_reward | 367 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0588 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6532 |\n", "| fps | 111 |\n", "| time_elapsed | 4979 |\n", "| total_timesteps | 7054113 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0556 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6536 |\n", "| fps | 111 |\n", "| time_elapsed | 5016 |\n", "| total_timesteps | 7058683 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.488 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6540 |\n", "| fps | 111 |\n", "| time_elapsed | 5053 |\n", "| total_timesteps | 7063335 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0629 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6544 |\n", "| fps | 111 |\n", "| time_elapsed | 5094 |\n", "| total_timesteps | 7068475 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.108 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6548 |\n", "| fps | 111 |\n", "| time_elapsed | 5136 |\n", "| total_timesteps | 7073601 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.172 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6552 |\n", "| fps | 111 |\n", "| time_elapsed | 5179 |\n", "| total_timesteps | 7078935 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0962 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6556 |\n", "| fps | 111 |\n", "| time_elapsed | 5216 |\n", "| total_timesteps | 7083551 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0298 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6560 |\n", "| fps | 111 |\n", "| time_elapsed | 5259 |\n", "| total_timesteps | 7088879 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.157 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6564 |\n", "| fps | 112 |\n", "| time_elapsed | 5300 |\n", "| total_timesteps | 7093923 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0908 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6568 |\n", "| fps | 112 |\n", "| time_elapsed | 5333 |\n", "| total_timesteps | 7098081 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.147 |\n", "----------------------------------\n", "Eval num_timesteps=7100000, episode_reward=182.50 +/- 97.27\n", "Episode length: 3862.80 +/- 7713.26\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.86e+03 |\n", "| mean_reward | 182 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7100000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0981 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6572 |\n", "| fps | 110 |\n", "| time_elapsed | 5460 |\n", "| total_timesteps | 7102711 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0311 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6576 |\n", "| fps | 110 |\n", "| time_elapsed | 5495 |\n", "| total_timesteps | 7107070 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6580 |\n", "| fps | 110 |\n", "| time_elapsed | 5533 |\n", "| total_timesteps | 7111720 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0816 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6584 |\n", "| fps | 110 |\n", "| time_elapsed | 5569 |\n", "| total_timesteps | 7116220 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6588 |\n", "| fps | 110 |\n", "| time_elapsed | 5608 |\n", "| total_timesteps | 7121062 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.48 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 256 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6592 |\n", "| fps | 110 |\n", "| time_elapsed | 5648 |\n", "| total_timesteps | 7125960 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.041 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6596 |\n", "| fps | 110 |\n", "| time_elapsed | 5686 |\n", "| total_timesteps | 7130746 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0441 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6600 |\n", "| fps | 111 |\n", "| time_elapsed | 5726 |\n", "| total_timesteps | 7135646 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0939 |\n", "----------------------------------\n", "Eval num_timesteps=7150000, episode_reward=321.00 +/- 115.59\n", "Episode length: 1212.60 +/- 160.95\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.21e+03 |\n", "| mean_reward | 321 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0577 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6604 |\n", "| fps | 110 |\n", "| time_elapsed | 5885 |\n", "| total_timesteps | 7151984 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.082 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 254 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6608 |\n", "| fps | 110 |\n", "| time_elapsed | 5923 |\n", "| total_timesteps | 7156578 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6612 |\n", "| fps | 110 |\n", "| time_elapsed | 5958 |\n", "| total_timesteps | 7160688 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.504 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6616 |\n", "| fps | 110 |\n", "| time_elapsed | 5998 |\n", "| total_timesteps | 7165368 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6620 |\n", "| fps | 110 |\n", "| time_elapsed | 6043 |\n", "| total_timesteps | 7170662 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0704 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 248 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6624 |\n", "| fps | 111 |\n", "| time_elapsed | 6080 |\n", "| total_timesteps | 7175136 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0416 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6628 |\n", "| fps | 111 |\n", "| time_elapsed | 6123 |\n", "| total_timesteps | 7180360 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0879 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.35e+03 |\n", "| ep_rew_mean | 255 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6632 |\n", "| fps | 111 |\n", "| time_elapsed | 6191 |\n", "| total_timesteps | 7188702 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0556 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 255 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6636 |\n", "| fps | 111 |\n", "| time_elapsed | 6224 |\n", "| total_timesteps | 7192744 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0502 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.34e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6640 |\n", "| fps | 111 |\n", "| time_elapsed | 6258 |\n", "| total_timesteps | 7196868 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.113 |\n", "----------------------------------\n", "Eval num_timesteps=7200000, episode_reward=281.80 +/- 58.15\n", "Episode length: 1240.60 +/- 176.29\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.24e+03 |\n", "| mean_reward | 282 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6644 |\n", "| fps | 110 |\n", "| time_elapsed | 6329 |\n", "| total_timesteps | 7201890 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0386 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.33e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6648 |\n", "| fps | 110 |\n", "| time_elapsed | 6369 |\n", "| total_timesteps | 7206716 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.077 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6652 |\n", "| fps | 111 |\n", "| time_elapsed | 6405 |\n", "| total_timesteps | 7211218 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.116 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.32e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6656 |\n", "| fps | 111 |\n", "| time_elapsed | 6443 |\n", "| total_timesteps | 7215870 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0531 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.31e+03 |\n", "| ep_rew_mean | 239 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6660 |\n", "| fps | 111 |\n", "| time_elapsed | 6482 |\n", "| total_timesteps | 7220182 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.387 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6664 |\n", "| fps | 111 |\n", "| time_elapsed | 6519 |\n", "| total_timesteps | 7223772 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0774 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6668 |\n", "| fps | 111 |\n", "| time_elapsed | 6556 |\n", "| total_timesteps | 7228294 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0827 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6672 |\n", "| fps | 111 |\n", "| time_elapsed | 6590 |\n", "| total_timesteps | 7232452 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0576 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 239 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6676 |\n", "| fps | 111 |\n", "| time_elapsed | 6630 |\n", "| total_timesteps | 7237378 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.166 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 241 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6680 |\n", "| fps | 111 |\n", "| time_elapsed | 6665 |\n", "| total_timesteps | 7241622 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0994 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6684 |\n", "| fps | 111 |\n", "| time_elapsed | 6704 |\n", "| total_timesteps | 7246382 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.18 |\n", "----------------------------------\n", "Eval num_timesteps=7250000, episode_reward=128.40 +/- 109.68\n", "Episode length: 993.80 +/- 265.13\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 994 |\n", "| mean_reward | 128 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7250000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.185 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.3e+03 |\n", "| ep_rew_mean | 248 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6688 |\n", "| fps | 110 |\n", "| time_elapsed | 6763 |\n", "| total_timesteps | 7250670 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0485 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.55e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6692 |\n", "| fps | 111 |\n", "| time_elapsed | 7010 |\n", "| total_timesteps | 7281041 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0363 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.56e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6696 |\n", "| fps | 111 |\n", "| time_elapsed | 7053 |\n", "| total_timesteps | 7286347 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0393 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.55e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6700 |\n", "| fps | 111 |\n", "| time_elapsed | 7088 |\n", "| total_timesteps | 7290753 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0348 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6704 |\n", "| fps | 111 |\n", "| time_elapsed | 7124 |\n", "| total_timesteps | 7295251 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0718 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 246 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6708 |\n", "| fps | 111 |\n", "| time_elapsed | 7158 |\n", "| total_timesteps | 7299445 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0268 |\n", "----------------------------------\n", "Eval num_timesteps=7300000, episode_reward=374.50 +/- 142.25\n", "Episode length: 1200.80 +/- 104.30\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.2e+03 |\n", "| mean_reward | 374 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0421 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.44e+03 |\n", "| ep_rew_mean | 256 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6712 |\n", "| fps | 111 |\n", "| time_elapsed | 7226 |\n", "| total_timesteps | 7304221 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0624 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6716 |\n", "| fps | 111 |\n", "| time_elapsed | 7262 |\n", "| total_timesteps | 7308757 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0147 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6720 |\n", "| fps | 111 |\n", "| time_elapsed | 7299 |\n", "| total_timesteps | 7313303 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.032 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 260 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6724 |\n", "| fps | 111 |\n", "| time_elapsed | 7335 |\n", "| total_timesteps | 7317809 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0836 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6728 |\n", "| fps | 111 |\n", "| time_elapsed | 7378 |\n", "| total_timesteps | 7323163 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0891 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6732 |\n", "| fps | 111 |\n", "| time_elapsed | 7414 |\n", "| total_timesteps | 7327469 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0629 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6736 |\n", "| fps | 111 |\n", "| time_elapsed | 7451 |\n", "| total_timesteps | 7331851 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0572 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6740 |\n", "| fps | 111 |\n", "| time_elapsed | 7491 |\n", "| total_timesteps | 7336793 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0808 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6744 |\n", "| fps | 111 |\n", "| time_elapsed | 7525 |\n", "| total_timesteps | 7340935 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6748 |\n", "| fps | 111 |\n", "| time_elapsed | 7561 |\n", "| total_timesteps | 7345295 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "Eval num_timesteps=7350000, episode_reward=137.40 +/- 58.51\n", "Episode length: 1129.80 +/- 99.01\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.13e+03 |\n", "| mean_reward | 137 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.39e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6752 |\n", "| fps | 111 |\n", "| time_elapsed | 7632 |\n", "| total_timesteps | 7350595 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6756 |\n", "| fps | 111 |\n", "| time_elapsed | 7674 |\n", "| total_timesteps | 7355707 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.103 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.4e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6760 |\n", "| fps | 111 |\n", "| time_elapsed | 7711 |\n", "| total_timesteps | 7360293 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.39 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6764 |\n", "| fps | 111 |\n", "| time_elapsed | 7746 |\n", "| total_timesteps | 7364547 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0922 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6768 |\n", "| fps | 111 |\n", "| time_elapsed | 7785 |\n", "| total_timesteps | 7369277 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.251 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6772 |\n", "| fps | 111 |\n", "| time_elapsed | 7822 |\n", "| total_timesteps | 7373905 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.093 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.41e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6776 |\n", "| fps | 111 |\n", "| time_elapsed | 7862 |\n", "| total_timesteps | 7378687 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0597 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6780 |\n", "| fps | 111 |\n", "| time_elapsed | 7902 |\n", "| total_timesteps | 7383675 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0561 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.42e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6784 |\n", "| fps | 111 |\n", "| time_elapsed | 7942 |\n", "| total_timesteps | 7388555 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.43e+03 |\n", "| ep_rew_mean | 267 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6788 |\n", "| fps | 111 |\n", "| time_elapsed | 7982 |\n", "| total_timesteps | 7393439 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0483 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6792 |\n", "| fps | 111 |\n", "| time_elapsed | 8026 |\n", "| total_timesteps | 7398759 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0955 |\n", "----------------------------------\n", "Eval num_timesteps=7400000, episode_reward=224.30 +/- 4.27\n", "Episode length: 1109.40 +/- 47.19\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.11e+03 |\n", "| mean_reward | 224 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7400000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6796 |\n", "| fps | 111 |\n", "| time_elapsed | 8094 |\n", "| total_timesteps | 7403847 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.144 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6800 |\n", "| fps | 111 |\n", "| time_elapsed | 8133 |\n", "| total_timesteps | 7408552 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.052 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 277 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6804 |\n", "| fps | 111 |\n", "| time_elapsed | 8171 |\n", "| total_timesteps | 7413308 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.142 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6808 |\n", "| fps | 111 |\n", "| time_elapsed | 8211 |\n", "| total_timesteps | 7418158 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0612 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6812 |\n", "| fps | 111 |\n", "| time_elapsed | 8249 |\n", "| total_timesteps | 7422828 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0302 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6816 |\n", "| fps | 111 |\n", "| time_elapsed | 8289 |\n", "| total_timesteps | 7427642 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6820 |\n", "| fps | 111 |\n", "| time_elapsed | 8329 |\n", "| total_timesteps | 7432515 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.252 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6824 |\n", "| fps | 112 |\n", "| time_elapsed | 8370 |\n", "| total_timesteps | 7437509 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0364 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6828 |\n", "| fps | 112 |\n", "| time_elapsed | 8408 |\n", "| total_timesteps | 7442311 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.118 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6832 |\n", "| fps | 112 |\n", "| time_elapsed | 8447 |\n", "| total_timesteps | 7447103 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0682 |\n", "----------------------------------\n", "Eval num_timesteps=7450000, episode_reward=386.70 +/- 14.66\n", "Episode length: 1251.20 +/- 150.46\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.25e+03 |\n", "| mean_reward | 387 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.215 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6836 |\n", "| fps | 111 |\n", "| time_elapsed | 8517 |\n", "| total_timesteps | 7451981 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0463 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 280 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6840 |\n", "| fps | 111 |\n", "| time_elapsed | 8549 |\n", "| total_timesteps | 7455907 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.147 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6844 |\n", "| fps | 111 |\n", "| time_elapsed | 8586 |\n", "| total_timesteps | 7460163 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0454 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 288 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6848 |\n", "| fps | 111 |\n", "| time_elapsed | 8629 |\n", "| total_timesteps | 7464735 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.375 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6852 |\n", "| fps | 111 |\n", "| time_elapsed | 8665 |\n", "| total_timesteps | 7469237 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0748 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6856 |\n", "| fps | 111 |\n", "| time_elapsed | 8701 |\n", "| total_timesteps | 7473729 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0557 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6860 |\n", "| fps | 111 |\n", "| time_elapsed | 8741 |\n", "| total_timesteps | 7478713 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0381 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 300 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6864 |\n", "| fps | 112 |\n", "| time_elapsed | 8782 |\n", "| total_timesteps | 7483861 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.602 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 301 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6868 |\n", "| fps | 112 |\n", "| time_elapsed | 8832 |\n", "| total_timesteps | 7489991 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.664 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 299 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6872 |\n", "| fps | 112 |\n", "| time_elapsed | 8871 |\n", "| total_timesteps | 7494917 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0782 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 303 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 6876 |\n", "| fps | 112 |\n", "| time_elapsed | 8912 |\n", "| total_timesteps | 7499971 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "Eval num_timesteps=7500000, episode_reward=330.50 +/- 98.32\n", "Episode length: 1339.40 +/- 58.11\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.34e+03 |\n", "| mean_reward | 330 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 7500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "a8249ccc", "metadata": { "execution": { "iopub.execute_input": "2024-05-15T18:27:14.587319Z", "iopub.status.busy": "2024-05-15T18:27:14.586603Z", "iopub.status.idle": "2024-05-15T18:28:06.403750Z", "shell.execute_reply": "2024-05-15T18:28:06.402639Z" }, "papermill": { "duration": 51.845529, "end_time": "2024-05-15T18:28:06.407497", "exception": false, "start_time": "2024-05-15T18:27:14.561968", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 177674491, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 9155.097473, "end_time": "2024-05-15T18:28:09.495993", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-15T15:55:34.398520", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }