{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "17a36f3e", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:42:47.360006Z", "iopub.status.busy": "2024-05-10T21:42:47.359704Z", "iopub.status.idle": "2024-05-10T21:43:52.179248Z", "shell.execute_reply": "2024-05-10T21:43:52.178168Z" }, "papermill": { "duration": 64.827128, "end_time": "2024-05-10T21:43:52.181761", "exception": false, "start_time": "2024-05-10T21:42:47.354633", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "cbe98cd4", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:43:52.190413Z", "iopub.status.busy": "2024-05-10T21:43:52.190109Z", "iopub.status.idle": "2024-05-10T21:44:10.919840Z", "shell.execute_reply": "2024-05-10T21:44:10.918861Z" }, "papermill": { "duration": 18.73666, "end_time": "2024-05-10T21:44:10.922202", "exception": false, "start_time": "2024-05-10T21:43:52.185542", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-10 21:44:00.032024: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-10 21:44:00.032155: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-10 21:44:00.188827: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import PPO\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "POLICY_FILE_NAME = \"ppo_policy_pacman_v2\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 75_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "VIDEO_CALLBACK_FREQ = 375_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_500_000\n", "\n", "# =====Hyperparams=====\n", "# The hyperparams should be defined here, however I want to run this just defaults for this run" ] }, { "cell_type": "code", "execution_count": 3, "id": "3e7de29a", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:10.931147Z", "iopub.status.busy": "2024-05-10T21:44:10.930636Z", "iopub.status.idle": "2024-05-10T21:44:10.940628Z", "shell.execute_reply": "2024-05-10T21:44:10.939799Z" }, "papermill": { "duration": 0.016613, "end_time": "2024-05-10T21:44:10.942587", "exception": false, "start_time": "2024-05-10T21:44:10.925974", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "5f6df545", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:10.950758Z", "iopub.status.busy": "2024-05-10T21:44:10.950450Z", "iopub.status.idle": "2024-05-10T21:44:10.959978Z", "shell.execute_reply": "2024-05-10T21:44:10.959140Z" }, "papermill": { "duration": 0.015703, "end_time": "2024-05-10T21:44:10.961776", "exception": false, "start_time": "2024-05-10T21:44:10.946073", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"n_steps\": self.model.n_steps,\n", "# \"n_envs\": self.model.n_envs,\n", " \"batch_size\": self.model.batch_size,\n", " \"n_epochs\": self.model.n_epochs,\n", " \"gamma\": self.model.gamma,\n", " \"gae_lambda\": self.model.gae_lambda,\n", "# \"clip_range\": self.model.clip_range,\n", " \"ent_coef\": self.model.ent_coef,\n", " \"vf_coef\": self.model.vf_coef,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"target_kl\": self.model.target_kl \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "18a8965d", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:10.970563Z", "iopub.status.busy": "2024-05-10T21:44:10.970263Z", "iopub.status.idle": "2024-05-10T21:44:10.983671Z", "shell.execute_reply": "2024-05-10T21:44:10.982993Z" }, "papermill": { "duration": 0.020263, "end_time": "2024-05-10T21:44:10.985659", "exception": false, "start_time": "2024-05-10T21:44:10.965396", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: PPO, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " \n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "ced959a6", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:10.994380Z", "iopub.status.busy": "2024-05-10T21:44:10.993777Z", "iopub.status.idle": "2024-05-10T21:44:13.955491Z", "shell.execute_reply": "2024-05-10T21:44:13.954678Z" }, "papermill": { "duration": 2.968592, "end_time": "2024-05-10T21:44:13.957779", "exception": false, "start_time": "2024-05-10T21:44:10.989187", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "model = PPO(\n", " \"CnnPolicy\",\n", " train_env,\n", " verbose=1,\n", " tensorboard_log=\"./\",\n", " )" ] }, { "cell_type": "code", "execution_count": 7, "id": "593e332e", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:13.966632Z", "iopub.status.busy": "2024-05-10T21:44:13.966063Z", "iopub.status.idle": "2024-05-10T21:44:13.971782Z", "shell.execute_reply": "2024-05-10T21:44:13.970964Z" }, "papermill": { "duration": 0.011953, "end_time": "2024-05-10T21:44:13.973539", "exception": false, "start_time": "2024-05-10T21:44:13.961586", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "13b8e247", "metadata": { "execution": { "iopub.execute_input": "2024-05-10T21:44:13.982010Z", "iopub.status.busy": "2024-05-10T21:44:13.981693Z", "iopub.status.idle": "2024-05-11T00:41:04.599826Z", "shell.execute_reply": "2024-05-11T00:41:04.598817Z" }, "papermill": { "duration": 10610.624939, "end_time": "2024-05-11T00:41:04.601979", "exception": false, "start_time": "2024-05-10T21:44:13.977040", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 394 |\n", "| ep_rew_mean | 14 |\n", "| time/ | |\n", "| fps | 239 |\n", "| iterations | 1 |\n", "| time_elapsed | 8 |\n", "| total_timesteps | 2048 |\n", "---------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 430 |\n", "| ep_rew_mean | 15.3 |\n", "| time/ | |\n", "| fps | 175 |\n", "| iterations | 2 |\n", "| time_elapsed | 23 |\n", "| total_timesteps | 4096 |\n", "| train/ | |\n", "| approx_kl | 0.0071178726 |\n", "| clip_fraction | 0.0365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.6 |\n", "| explained_variance | 0.00987 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.056 |\n", "| policy_gradient_loss | -0.00219 |\n", "| value_loss | 0.931 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 416 |\n", "| ep_rew_mean | 15.6 |\n", "| time/ | |\n", "| fps | 164 |\n", "| iterations | 3 |\n", "| time_elapsed | 37 |\n", "| total_timesteps | 6144 |\n", "| train/ | |\n", "| approx_kl | 0.012499064 |\n", "| clip_fraction | 0.116 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.59 |\n", "| explained_variance | 0.434 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0393 |\n", "| policy_gradient_loss | -0.00726 |\n", "| value_loss | 0.348 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 425 |\n", "| ep_rew_mean | 16.5 |\n", "| time/ | |\n", "| fps | 160 |\n", "| iterations | 4 |\n", "| time_elapsed | 51 |\n", "| total_timesteps | 8192 |\n", "| train/ | |\n", "| approx_kl | 0.008583425 |\n", "| clip_fraction | 0.0473 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.59 |\n", "| explained_variance | 0.476 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0702 |\n", "| policy_gradient_loss | -0.00624 |\n", "| value_loss | 0.283 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 431 |\n", "| ep_rew_mean | 19.6 |\n", "| time/ | |\n", "| fps | 157 |\n", "| iterations | 5 |\n", "| time_elapsed | 65 |\n", "| total_timesteps | 10240 |\n", "| train/ | |\n", "| approx_kl | 0.009672716 |\n", "| clip_fraction | 0.0925 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.59 |\n", "| explained_variance | 0.587 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.041 |\n", "| policy_gradient_loss | -0.00867 |\n", "| value_loss | 0.246 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 439 |\n", "| ep_rew_mean | 19.4 |\n", "| time/ | |\n", "| fps | 155 |\n", "| iterations | 6 |\n", "| time_elapsed | 79 |\n", "| total_timesteps | 12288 |\n", "| train/ | |\n", "| approx_kl | 0.016034465 |\n", "| clip_fraction | 0.186 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.58 |\n", "| explained_variance | -0.0905 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.335 |\n", "| policy_gradient_loss | -0.000166 |\n", "| value_loss | 4.92 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 438 |\n", "| ep_rew_mean | 18.8 |\n", "| time/ | |\n", "| fps | 153 |\n", "| iterations | 7 |\n", "| time_elapsed | 93 |\n", "| total_timesteps | 14336 |\n", "| train/ | |\n", "| approx_kl | 0.008500081 |\n", "| clip_fraction | 0.126 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.58 |\n", "| explained_variance | 0.389 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0749 |\n", "| policy_gradient_loss | -0.0139 |\n", "| value_loss | 0.292 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 437 |\n", "| ep_rew_mean | 18.4 |\n", "| time/ | |\n", "| fps | 152 |\n", "| iterations | 8 |\n", "| time_elapsed | 107 |\n", "| total_timesteps | 16384 |\n", "| train/ | |\n", "| approx_kl | 0.014555598 |\n", "| clip_fraction | 0.149 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.56 |\n", "| explained_variance | 0.64 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.1 |\n", "| policy_gradient_loss | -0.0175 |\n", "| value_loss | 0.223 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 445 |\n", "| ep_rew_mean | 20.5 |\n", "| time/ | |\n", "| fps | 152 |\n", "| iterations | 9 |\n", "| time_elapsed | 120 |\n", "| total_timesteps | 18432 |\n", "| train/ | |\n", "| approx_kl | 0.014780698 |\n", "| clip_fraction | 0.178 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.53 |\n", "| explained_variance | 0.666 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0861 |\n", "| policy_gradient_loss | -0.0221 |\n", "| value_loss | 0.264 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 460 |\n", "| ep_rew_mean | 21.1 |\n", "| time/ | |\n", "| fps | 151 |\n", "| iterations | 10 |\n", "| time_elapsed | 134 |\n", "| total_timesteps | 20480 |\n", "| train/ | |\n", "| approx_kl | 0.017348997 |\n", "| clip_fraction | 0.169 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.48 |\n", "| explained_variance | 0.377 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.417 |\n", "| policy_gradient_loss | -0.00223 |\n", "| value_loss | 2.49 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 472 |\n", "| ep_rew_mean | 21.5 |\n", "| time/ | |\n", "| fps | 151 |\n", "| iterations | 11 |\n", "| time_elapsed | 149 |\n", "| total_timesteps | 22528 |\n", "| train/ | |\n", "| approx_kl | 0.016820736 |\n", "| clip_fraction | 0.214 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.46 |\n", "| explained_variance | 0.506 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.195 |\n", "| policy_gradient_loss | -0.012 |\n", "| value_loss | 0.845 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 482 |\n", "| ep_rew_mean | 25.7 |\n", "| time/ | |\n", "| fps | 150 |\n", "| iterations | 12 |\n", "| time_elapsed | 162 |\n", "| total_timesteps | 24576 |\n", "| train/ | |\n", "| approx_kl | 0.01654692 |\n", "| clip_fraction | 0.194 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.45 |\n", "| explained_variance | 0.572 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.118 |\n", "| policy_gradient_loss | -0.023 |\n", "| value_loss | 0.57 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 478 |\n", "| ep_rew_mean | 25.2 |\n", "| time/ | |\n", "| fps | 150 |\n", "| iterations | 13 |\n", "| time_elapsed | 176 |\n", "| total_timesteps | 26624 |\n", "| train/ | |\n", "| approx_kl | 0.025725368 |\n", "| clip_fraction | 0.344 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.36 |\n", "| explained_variance | 0.0948 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.52 |\n", "| policy_gradient_loss | 0.00319 |\n", "| value_loss | 16 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 483 |\n", "| ep_rew_mean | 25.5 |\n", "| time/ | |\n", "| fps | 150 |\n", "| iterations | 14 |\n", "| time_elapsed | 190 |\n", "| total_timesteps | 28672 |\n", "| train/ | |\n", "| approx_kl | 0.025509264 |\n", "| clip_fraction | 0.274 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.31 |\n", "| explained_variance | 0.626 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.326 |\n", "| policy_gradient_loss | -0.00371 |\n", "| value_loss | 1.23 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 485 |\n", "| ep_rew_mean | 25.3 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 15 |\n", "| time_elapsed | 204 |\n", "| total_timesteps | 30720 |\n", "| train/ | |\n", "| approx_kl | 0.020923607 |\n", "| clip_fraction | 0.28 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.28 |\n", "| explained_variance | 0.507 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.16 |\n", "| policy_gradient_loss | -0.00123 |\n", "| value_loss | 3.99 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 495 |\n", "| ep_rew_mean | 25.4 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 16 |\n", "| time_elapsed | 218 |\n", "| total_timesteps | 32768 |\n", "| train/ | |\n", "| approx_kl | 0.016227828 |\n", "| clip_fraction | 0.192 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.21 |\n", "| explained_variance | 0.707 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.178 |\n", "| policy_gradient_loss | -0.00649 |\n", "| value_loss | 0.77 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 498 |\n", "| ep_rew_mean | 25.3 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 17 |\n", "| time_elapsed | 232 |\n", "| total_timesteps | 34816 |\n", "| train/ | |\n", "| approx_kl | 0.021795249 |\n", "| clip_fraction | 0.216 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.17 |\n", "| explained_variance | 0.657 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0889 |\n", "| policy_gradient_loss | -0.0164 |\n", "| value_loss | 0.647 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 18 |\n", "| time_elapsed | 246 |\n", "| total_timesteps | 36864 |\n", "| train/ | |\n", "| approx_kl | 0.022020828 |\n", "| clip_fraction | 0.245 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.13 |\n", "| explained_variance | 0.724 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0554 |\n", "| policy_gradient_loss | -0.0204 |\n", "| value_loss | 0.397 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 19 |\n", "| time_elapsed | 260 |\n", "| total_timesteps | 38912 |\n", "| train/ | |\n", "| approx_kl | 0.019522412 |\n", "| clip_fraction | 0.192 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.16 |\n", "| explained_variance | 0.521 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0236 |\n", "| policy_gradient_loss | -0.0272 |\n", "| value_loss | 0.412 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 20 |\n", "| time_elapsed | 274 |\n", "| total_timesteps | 40960 |\n", "| train/ | |\n", "| approx_kl | 0.012278472 |\n", "| clip_fraction | 0.208 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.12 |\n", "| explained_variance | 0.368 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0247 |\n", "| policy_gradient_loss | -0.00213 |\n", "| value_loss | 0.0119 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 21 |\n", "| time_elapsed | 288 |\n", "| total_timesteps | 43008 |\n", "| train/ | |\n", "| approx_kl | 0.0194366 |\n", "| clip_fraction | 0.192 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.76 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0163 |\n", "| policy_gradient_loss | -0.00544 |\n", "| value_loss | 0.00729 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 22 |\n", "| time_elapsed | 302 |\n", "| total_timesteps | 45056 |\n", "| train/ | |\n", "| approx_kl | 0.019239604 |\n", "| clip_fraction | 0.205 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.935 |\n", "| explained_variance | 0.791 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0204 |\n", "| policy_gradient_loss | -0.0251 |\n", "| value_loss | 0.00566 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 149 |\n", "| iterations | 23 |\n", "| time_elapsed | 316 |\n", "| total_timesteps | 47104 |\n", "| train/ | |\n", "| approx_kl | 0.015872424 |\n", "| clip_fraction | 0.13 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.04 |\n", "| explained_variance | 0.643 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0108 |\n", "| policy_gradient_loss | -0.00785 |\n", "| value_loss | 0.00915 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 24 |\n", "| time_elapsed | 330 |\n", "| total_timesteps | 49152 |\n", "| train/ | |\n", "| approx_kl | 0.005610086 |\n", "| clip_fraction | 0.0869 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.743 |\n", "| explained_variance | -1.71 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00584 |\n", "| policy_gradient_loss | -0.00887 |\n", "| value_loss | 0.00497 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 25 |\n", "| time_elapsed | 343 |\n", "| total_timesteps | 51200 |\n", "| train/ | |\n", "| approx_kl | 0.009867394 |\n", "| clip_fraction | 0.0885 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.77 |\n", "| explained_variance | 0.465 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0337 |\n", "| policy_gradient_loss | -0.0212 |\n", "| value_loss | 0.0255 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 26 |\n", "| time_elapsed | 357 |\n", "| total_timesteps | 53248 |\n", "| train/ | |\n", "| approx_kl | 0.009172158 |\n", "| clip_fraction | 0.0889 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.748 |\n", "| explained_variance | 0.283 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0292 |\n", "| policy_gradient_loss | -0.0168 |\n", "| value_loss | 0.00228 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 27 |\n", "| time_elapsed | 371 |\n", "| total_timesteps | 55296 |\n", "| train/ | |\n", "| approx_kl | 0.009952265 |\n", "| clip_fraction | 0.0997 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.806 |\n", "| explained_variance | 0.467 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.035 |\n", "| policy_gradient_loss | -0.0205 |\n", "| value_loss | 0.00867 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 28 |\n", "| time_elapsed | 385 |\n", "| total_timesteps | 57344 |\n", "| train/ | |\n", "| approx_kl | 0.014252097 |\n", "| clip_fraction | 0.111 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.95 |\n", "| explained_variance | 0.602 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0151 |\n", "| policy_gradient_loss | -0.0142 |\n", "| value_loss | 0.00209 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 29 |\n", "| time_elapsed | 399 |\n", "| total_timesteps | 59392 |\n", "| train/ | |\n", "| approx_kl | 0.013166374 |\n", "| clip_fraction | 0.125 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.79 |\n", "| explained_variance | 0.758 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0264 |\n", "| policy_gradient_loss | -0.0143 |\n", "| value_loss | 0.005 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 499 |\n", "| ep_rew_mean | 24.9 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 30 |\n", "| time_elapsed | 413 |\n", "| total_timesteps | 61440 |\n", "| train/ | |\n", "| approx_kl | 0.023709435 |\n", "| clip_fraction | 0.213 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.11 |\n", "| explained_variance | 0.208 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0484 |\n", "| policy_gradient_loss | -0.0291 |\n", "| value_loss | 0.00768 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 25.2 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 31 |\n", "| time_elapsed | 426 |\n", "| total_timesteps | 63488 |\n", "| train/ | |\n", "| approx_kl | 0.028808502 |\n", "| clip_fraction | 0.249 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.21 |\n", "| explained_variance | 0.275 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00893 |\n", "| policy_gradient_loss | -0.0307 |\n", "| value_loss | 0.0747 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 840 |\n", "| ep_rew_mean | 25.2 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 32 |\n", "| time_elapsed | 440 |\n", "| total_timesteps | 65536 |\n", "| train/ | |\n", "| approx_kl | 0.022045096 |\n", "| clip_fraction | 0.194 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.2 |\n", "| explained_variance | 0.338 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0133 |\n", "| policy_gradient_loss | -0.0321 |\n", "| value_loss | 0.0563 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 831 |\n", "| ep_rew_mean | 27 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 33 |\n", "| time_elapsed | 454 |\n", "| total_timesteps | 67584 |\n", "| train/ | |\n", "| approx_kl | 0.037613478 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.28 |\n", "| explained_variance | 0.25 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00612 |\n", "| policy_gradient_loss | -0.0453 |\n", "| value_loss | 0.539 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 816 |\n", "| ep_rew_mean | 26.8 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 34 |\n", "| time_elapsed | 468 |\n", "| total_timesteps | 69632 |\n", "| train/ | |\n", "| approx_kl | 0.059649788 |\n", "| clip_fraction | 0.473 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.00353 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.204 |\n", "| policy_gradient_loss | 0.025 |\n", "| value_loss | 9.51 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 803 |\n", "| ep_rew_mean | 26.7 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 35 |\n", "| time_elapsed | 482 |\n", "| total_timesteps | 71680 |\n", "| train/ | |\n", "| approx_kl | 0.04336705 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | -0.233 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0125 |\n", "| policy_gradient_loss | -0.0379 |\n", "| value_loss | 0.69 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 791 |\n", "| ep_rew_mean | 27.1 |\n", "| time/ | |\n", "| fps | 148 |\n", "| iterations | 36 |\n", "| time_elapsed | 496 |\n", "| total_timesteps | 73728 |\n", "| train/ | |\n", "| approx_kl | 0.06368456 |\n", "| clip_fraction | 0.358 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.314 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0825 |\n", "| policy_gradient_loss | -0.0474 |\n", "| value_loss | 0.38 |\n", "----------------------------------------\n", "Eval num_timesteps=75000, episode_reward=13.20 +/- 4.40\n", "Episode length: 409.00 +/- 9.00\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 409 |\n", "| mean_reward | 13.2 |\n", "| time/ | |\n", "| total_timesteps | 75000 |\n", "| train/ | |\n", "| approx_kl | 0.06360453 |\n", "| clip_fraction | 0.351 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.15 |\n", "| explained_variance | 0.392 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0728 |\n", "| policy_gradient_loss | -0.0451 |\n", "| value_loss | 0.858 |\n", "----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 776 |\n", "| ep_rew_mean | 26.9 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 37 |\n", "| time_elapsed | 521 |\n", "| total_timesteps | 75776 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 775 |\n", "| ep_rew_mean | 27.5 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 38 |\n", "| time_elapsed | 535 |\n", "| total_timesteps | 77824 |\n", "| train/ | |\n", "| approx_kl | 0.06158152 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.511 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00312 |\n", "| policy_gradient_loss | -0.0584 |\n", "| value_loss | 0.454 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 781 |\n", "| ep_rew_mean | 28.2 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 39 |\n", "| time_elapsed | 549 |\n", "| total_timesteps | 79872 |\n", "| train/ | |\n", "| approx_kl | 0.070321366 |\n", "| clip_fraction | 0.398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.609 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0185 |\n", "| policy_gradient_loss | -0.0422 |\n", "| value_loss | 0.715 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 28.7 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 40 |\n", "| time_elapsed | 563 |\n", "| total_timesteps | 81920 |\n", "| train/ | |\n", "| approx_kl | 0.054008666 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.17 |\n", "| explained_variance | 0.706 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00974 |\n", "| policy_gradient_loss | -0.0441 |\n", "| value_loss | 0.584 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 793 |\n", "| ep_rew_mean | 29.4 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 41 |\n", "| time_elapsed | 577 |\n", "| total_timesteps | 83968 |\n", "| train/ | |\n", "| approx_kl | 0.05221559 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.694 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.095 |\n", "| policy_gradient_loss | -0.0443 |\n", "| value_loss | 0.545 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 803 |\n", "| ep_rew_mean | 30.1 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 42 |\n", "| time_elapsed | 591 |\n", "| total_timesteps | 86016 |\n", "| train/ | |\n", "| approx_kl | 0.057356328 |\n", "| clip_fraction | 0.435 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.22 |\n", "| explained_variance | 0.768 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0693 |\n", "| policy_gradient_loss | -0.0585 |\n", "| value_loss | 0.396 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 809 |\n", "| ep_rew_mean | 30.8 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 43 |\n", "| time_elapsed | 605 |\n", "| total_timesteps | 88064 |\n", "| train/ | |\n", "| approx_kl | 0.05336117 |\n", "| clip_fraction | 0.373 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.16 |\n", "| explained_variance | 0.772 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0568 |\n", "| policy_gradient_loss | -0.0546 |\n", "| value_loss | 0.414 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 812 |\n", "| ep_rew_mean | 31.1 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 44 |\n", "| time_elapsed | 619 |\n", "| total_timesteps | 90112 |\n", "| train/ | |\n", "| approx_kl | 0.055956278 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.737 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0385 |\n", "| policy_gradient_loss | -0.0502 |\n", "| value_loss | 0.419 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 819 |\n", "| ep_rew_mean | 30.9 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 45 |\n", "| time_elapsed | 633 |\n", "| total_timesteps | 92160 |\n", "| train/ | |\n", "| approx_kl | 0.055220544 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.11 |\n", "| explained_variance | 0.766 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.126 |\n", "| policy_gradient_loss | -0.0505 |\n", "| value_loss | 0.486 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 822 |\n", "| ep_rew_mean | 31.4 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 46 |\n", "| time_elapsed | 647 |\n", "| total_timesteps | 94208 |\n", "| train/ | |\n", "| approx_kl | 0.07828819 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.558 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.143 |\n", "| policy_gradient_loss | -0.045 |\n", "| value_loss | 1.23 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 828 |\n", "| ep_rew_mean | 32 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 47 |\n", "| time_elapsed | 661 |\n", "| total_timesteps | 96256 |\n", "| train/ | |\n", "| approx_kl | 0.063016854 |\n", "| clip_fraction | 0.421 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.875 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0056 |\n", "| policy_gradient_loss | -0.0511 |\n", "| value_loss | 0.298 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 832 |\n", "| ep_rew_mean | 32.8 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 48 |\n", "| time_elapsed | 675 |\n", "| total_timesteps | 98304 |\n", "| train/ | |\n", "| approx_kl | 0.06781979 |\n", "| clip_fraction | 0.419 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.863 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0255 |\n", "| policy_gradient_loss | -0.0648 |\n", "| value_loss | 0.294 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 840 |\n", "| ep_rew_mean | 33.7 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 49 |\n", "| time_elapsed | 689 |\n", "| total_timesteps | 100352 |\n", "| train/ | |\n", "| approx_kl | 0.05764684 |\n", "| clip_fraction | 0.403 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.748 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0134 |\n", "| policy_gradient_loss | -0.0449 |\n", "| value_loss | 0.558 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 841 |\n", "| ep_rew_mean | 33.2 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 50 |\n", "| time_elapsed | 703 |\n", "| total_timesteps | 102400 |\n", "| train/ | |\n", "| approx_kl | 0.065886185 |\n", "| clip_fraction | 0.386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | 0.775 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0668 |\n", "| policy_gradient_loss | -0.0517 |\n", "| value_loss | 0.691 |\n", "-----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 845 |\n", "| ep_rew_mean | 33.8 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 51 |\n", "| time_elapsed | 717 |\n", "| total_timesteps | 104448 |\n", "| train/ | |\n", "| approx_kl | 0.07448 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.11 |\n", "| explained_variance | 0.777 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0403 |\n", "| policy_gradient_loss | -0.0451 |\n", "| value_loss | 0.547 |\n", "--------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 845 |\n", "| ep_rew_mean | 34.1 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 52 |\n", "| time_elapsed | 731 |\n", "| total_timesteps | 106496 |\n", "| train/ | |\n", "| approx_kl | 0.060482673 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.15 |\n", "| explained_variance | 0.862 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.102 |\n", "| policy_gradient_loss | -0.0455 |\n", "| value_loss | 0.331 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 847 |\n", "| ep_rew_mean | 33.3 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 53 |\n", "| time_elapsed | 745 |\n", "| total_timesteps | 108544 |\n", "| train/ | |\n", "| approx_kl | 0.06975438 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0111 |\n", "| policy_gradient_loss | -0.0563 |\n", "| value_loss | 0.288 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 853 |\n", "| ep_rew_mean | 33.3 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 54 |\n", "| time_elapsed | 759 |\n", "| total_timesteps | 110592 |\n", "| train/ | |\n", "| approx_kl | 0.055687957 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.928 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0213 |\n", "| policy_gradient_loss | -0.064 |\n", "| value_loss | 0.273 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 858 |\n", "| ep_rew_mean | 34 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 55 |\n", "| time_elapsed | 773 |\n", "| total_timesteps | 112640 |\n", "| train/ | |\n", "| approx_kl | 0.07719825 |\n", "| clip_fraction | 0.434 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.935 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00253 |\n", "| policy_gradient_loss | -0.0673 |\n", "| value_loss | 0.259 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 854 |\n", "| ep_rew_mean | 33.5 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 56 |\n", "| time_elapsed | 787 |\n", "| total_timesteps | 114688 |\n", "| train/ | |\n", "| approx_kl | 0.09108654 |\n", "| clip_fraction | 0.435 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.03 |\n", "| explained_variance | 0.924 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0011 |\n", "| policy_gradient_loss | -0.0724 |\n", "| value_loss | 0.259 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 34.2 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 57 |\n", "| time_elapsed | 801 |\n", "| total_timesteps | 116736 |\n", "| train/ | |\n", "| approx_kl | 0.105817586 |\n", "| clip_fraction | 0.454 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.05 |\n", "| explained_variance | 0.88 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0624 |\n", "| policy_gradient_loss | -0.0632 |\n", "| value_loss | 0.215 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 34.7 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 58 |\n", "| time_elapsed | 815 |\n", "| total_timesteps | 118784 |\n", "| train/ | |\n", "| approx_kl | 0.11113761 |\n", "| clip_fraction | 0.472 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.05 |\n", "| explained_variance | 0.868 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0827 |\n", "| policy_gradient_loss | -0.0717 |\n", "| value_loss | 0.184 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 35.7 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 59 |\n", "| time_elapsed | 829 |\n", "| total_timesteps | 120832 |\n", "| train/ | |\n", "| approx_kl | 0.113389514 |\n", "| clip_fraction | 0.468 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0459 |\n", "| policy_gradient_loss | -0.0708 |\n", "| value_loss | 0.247 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 36.2 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 60 |\n", "| time_elapsed | 843 |\n", "| total_timesteps | 122880 |\n", "| train/ | |\n", "| approx_kl | 0.10444394 |\n", "| clip_fraction | 0.488 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.993 |\n", "| explained_variance | 0.508 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0187 |\n", "| policy_gradient_loss | -0.0419 |\n", "| value_loss | 1.11 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 600 |\n", "| ep_rew_mean | 36.9 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 61 |\n", "| time_elapsed | 857 |\n", "| total_timesteps | 124928 |\n", "| train/ | |\n", "| approx_kl | 0.081873715 |\n", "| clip_fraction | 0.455 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.833 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00215 |\n", "| policy_gradient_loss | -0.0603 |\n", "| value_loss | 0.319 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 604 |\n", "| ep_rew_mean | 37.8 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 62 |\n", "| time_elapsed | 871 |\n", "| total_timesteps | 126976 |\n", "| train/ | |\n", "| approx_kl | 0.11305898 |\n", "| clip_fraction | 0.478 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.01 |\n", "| explained_variance | 0.823 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0124 |\n", "| policy_gradient_loss | -0.0663 |\n", "| value_loss | 0.317 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 604 |\n", "| ep_rew_mean | 37.1 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 63 |\n", "| time_elapsed | 885 |\n", "| total_timesteps | 129024 |\n", "| train/ | |\n", "| approx_kl | 0.09423873 |\n", "| clip_fraction | 0.454 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.987 |\n", "| explained_variance | 0.676 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0664 |\n", "| policy_gradient_loss | -0.0545 |\n", "| value_loss | 0.852 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 606 |\n", "| ep_rew_mean | 38.5 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 64 |\n", "| time_elapsed | 899 |\n", "| total_timesteps | 131072 |\n", "| train/ | |\n", "| approx_kl | 0.065398686 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.936 |\n", "| explained_variance | 0.552 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.155 |\n", "| policy_gradient_loss | -0.0463 |\n", "| value_loss | 0.813 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 608 |\n", "| ep_rew_mean | 39.2 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 65 |\n", "| time_elapsed | 913 |\n", "| total_timesteps | 133120 |\n", "| train/ | |\n", "| approx_kl | 0.092356816 |\n", "| clip_fraction | 0.451 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.939 |\n", "| explained_variance | 0.335 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.33 |\n", "| policy_gradient_loss | -0.0275 |\n", "| value_loss | 2.53 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 40.3 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 66 |\n", "| time_elapsed | 926 |\n", "| total_timesteps | 135168 |\n", "| train/ | |\n", "| approx_kl | 0.09459061 |\n", "| clip_fraction | 0.409 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.869 |\n", "| explained_variance | 0.834 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0902 |\n", "| policy_gradient_loss | -0.043 |\n", "| value_loss | 0.61 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 630 |\n", "| ep_rew_mean | 45.5 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 67 |\n", "| time_elapsed | 940 |\n", "| total_timesteps | 137216 |\n", "| train/ | |\n", "| approx_kl | 0.11268992 |\n", "| clip_fraction | 0.502 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.96 |\n", "| explained_variance | 0.143 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.377 |\n", "| policy_gradient_loss | 0.0357 |\n", "| value_loss | 10.2 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 638 |\n", "| ep_rew_mean | 47.3 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 68 |\n", "| time_elapsed | 954 |\n", "| total_timesteps | 139264 |\n", "| train/ | |\n", "| approx_kl | 0.29693425 |\n", "| clip_fraction | 0.635 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.857 |\n", "| explained_variance | 0.375 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.48 |\n", "| policy_gradient_loss | 0.0465 |\n", "| value_loss | 37.9 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 622 |\n", "| ep_rew_mean | 45.9 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 69 |\n", "| time_elapsed | 968 |\n", "| total_timesteps | 141312 |\n", "| train/ | |\n", "| approx_kl | 0.34250605 |\n", "| clip_fraction | 0.526 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.669 |\n", "| explained_variance | 0.688 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.66 |\n", "| policy_gradient_loss | 0.0308 |\n", "| value_loss | 10.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 621 |\n", "| ep_rew_mean | 46 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 70 |\n", "| time_elapsed | 982 |\n", "| total_timesteps | 143360 |\n", "| train/ | |\n", "| approx_kl | 0.17991942 |\n", "| clip_fraction | 0.423 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.608 |\n", "| explained_variance | 0.549 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.457 |\n", "| policy_gradient_loss | 0.00742 |\n", "| value_loss | 5 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 615 |\n", "| ep_rew_mean | 46.1 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 71 |\n", "| time_elapsed | 996 |\n", "| total_timesteps | 145408 |\n", "| train/ | |\n", "| approx_kl | 0.1185381 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.667 |\n", "| explained_variance | 0.525 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.986 |\n", "| policy_gradient_loss | 0.0203 |\n", "| value_loss | 5.77 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 614 |\n", "| ep_rew_mean | 47.4 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 72 |\n", "| time_elapsed | 1010 |\n", "| total_timesteps | 147456 |\n", "| train/ | |\n", "| approx_kl | 0.1674037 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.635 |\n", "| explained_variance | 0.794 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.624 |\n", "| policy_gradient_loss | -0.0256 |\n", "| value_loss | 2.5 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 609 |\n", "| ep_rew_mean | 50.9 |\n", "| time/ | |\n", "| fps | 145 |\n", "| iterations | 73 |\n", "| time_elapsed | 1024 |\n", "| total_timesteps | 149504 |\n", "| train/ | |\n", "| approx_kl | 0.13733745 |\n", "| clip_fraction | 0.42 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.668 |\n", "| explained_variance | 0.702 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.2 |\n", "| policy_gradient_loss | 0.0253 |\n", "| value_loss | 10.5 |\n", "----------------------------------------\n", "Eval num_timesteps=150000, episode_reward=103.70 +/- 57.26\n", "Episode length: 553.30 +/- 37.89\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 553 |\n", "| mean_reward | 104 |\n", "| time/ | |\n", "| total_timesteps | 150000 |\n", "| train/ | |\n", "| approx_kl | 0.23443963 |\n", "| clip_fraction | 0.38 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.404 |\n", "| explained_variance | 0.134 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.24 |\n", "| policy_gradient_loss | 0.0351 |\n", "| value_loss | 36.8 |\n", "----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 605 |\n", "| ep_rew_mean | 52 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 74 |\n", "| time_elapsed | 1053 |\n", "| total_timesteps | 151552 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 600 |\n", "| ep_rew_mean | 53.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 75 |\n", "| time_elapsed | 1067 |\n", "| total_timesteps | 153600 |\n", "| train/ | |\n", "| approx_kl | 0.17245752 |\n", "| clip_fraction | 0.258 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.307 |\n", "| explained_variance | 0.655 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.46 |\n", "| policy_gradient_loss | 0.0263 |\n", "| value_loss | 38.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 599 |\n", "| ep_rew_mean | 56.7 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 76 |\n", "| time_elapsed | 1081 |\n", "| total_timesteps | 155648 |\n", "| train/ | |\n", "| approx_kl | 0.15141985 |\n", "| clip_fraction | 0.248 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.323 |\n", "| explained_variance | 0.618 |\n", "| learning_rate | 0.0003 |\n", "| loss | 10.8 |\n", "| policy_gradient_loss | 0.0282 |\n", "| value_loss | 25.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 603 |\n", "| ep_rew_mean | 57.6 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 77 |\n", "| time_elapsed | 1095 |\n", "| total_timesteps | 157696 |\n", "| train/ | |\n", "| approx_kl | 0.27037132 |\n", "| clip_fraction | 0.238 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.277 |\n", "| explained_variance | 0.641 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12 |\n", "| policy_gradient_loss | 0.0015 |\n", "| value_loss | 25.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 596 |\n", "| ep_rew_mean | 57.3 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 78 |\n", "| time_elapsed | 1109 |\n", "| total_timesteps | 159744 |\n", "| train/ | |\n", "| approx_kl | 0.12758471 |\n", "| clip_fraction | 0.225 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.358 |\n", "| explained_variance | 0.763 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.82 |\n", "| policy_gradient_loss | 0.0117 |\n", "| value_loss | 20.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 583 |\n", "| ep_rew_mean | 56.2 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 79 |\n", "| time_elapsed | 1123 |\n", "| total_timesteps | 161792 |\n", "| train/ | |\n", "| approx_kl | 0.25191346 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.5 |\n", "| explained_variance | 0.621 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.13 |\n", "| policy_gradient_loss | -0.0115 |\n", "| value_loss | 5.36 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 568 |\n", "| ep_rew_mean | 55.3 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 80 |\n", "| time_elapsed | 1137 |\n", "| total_timesteps | 163840 |\n", "| train/ | |\n", "| approx_kl | 0.3467046 |\n", "| clip_fraction | 0.31 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.461 |\n", "| explained_variance | 0.652 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.1 |\n", "| policy_gradient_loss | -0.031 |\n", "| value_loss | 3.23 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 565 |\n", "| ep_rew_mean | 58.3 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 81 |\n", "| time_elapsed | 1151 |\n", "| total_timesteps | 165888 |\n", "| train/ | |\n", "| approx_kl | 0.12935388 |\n", "| clip_fraction | 0.317 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.572 |\n", "| explained_variance | 0.82 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.253 |\n", "| policy_gradient_loss | -0.0199 |\n", "| value_loss | 1.55 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 557 |\n", "| ep_rew_mean | 58.4 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 82 |\n", "| time_elapsed | 1165 |\n", "| total_timesteps | 167936 |\n", "| train/ | |\n", "| approx_kl | 0.13863291 |\n", "| clip_fraction | 0.473 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.709 |\n", "| explained_variance | 0.0826 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.34 |\n", "| policy_gradient_loss | 0.0468 |\n", "| value_loss | 26.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 554 |\n", "| ep_rew_mean | 58.3 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 83 |\n", "| time_elapsed | 1178 |\n", "| total_timesteps | 169984 |\n", "| train/ | |\n", "| approx_kl | 0.33827913 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.561 |\n", "| explained_variance | 0.569 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.997 |\n", "| policy_gradient_loss | -0.0142 |\n", "| value_loss | 6.91 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 551 |\n", "| ep_rew_mean | 58.7 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 84 |\n", "| time_elapsed | 1193 |\n", "| total_timesteps | 172032 |\n", "| train/ | |\n", "| approx_kl | 0.08615722 |\n", "| clip_fraction | 0.335 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.646 |\n", "| explained_variance | 0.628 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.351 |\n", "| policy_gradient_loss | -0.0227 |\n", "| value_loss | 1.39 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 549 |\n", "| ep_rew_mean | 58.6 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 85 |\n", "| time_elapsed | 1206 |\n", "| total_timesteps | 174080 |\n", "| train/ | |\n", "| approx_kl | 0.083379656 |\n", "| clip_fraction | 0.284 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.559 |\n", "| explained_variance | 0.45 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.434 |\n", "| policy_gradient_loss | -0.0019 |\n", "| value_loss | 5.06 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 547 |\n", "| ep_rew_mean | 58 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 86 |\n", "| time_elapsed | 1220 |\n", "| total_timesteps | 176128 |\n", "| train/ | |\n", "| approx_kl | 0.113125876 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.678 |\n", "| explained_variance | 0.653 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0239 |\n", "| policy_gradient_loss | -0.0261 |\n", "| value_loss | 0.859 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 535 |\n", "| ep_rew_mean | 56.8 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 87 |\n", "| time_elapsed | 1234 |\n", "| total_timesteps | 178176 |\n", "| train/ | |\n", "| approx_kl | 0.070039235 |\n", "| clip_fraction | 0.334 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.683 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0686 |\n", "| policy_gradient_loss | -0.0338 |\n", "| value_loss | 0.498 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 540 |\n", "| ep_rew_mean | 57.4 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 88 |\n", "| time_elapsed | 1248 |\n", "| total_timesteps | 180224 |\n", "| train/ | |\n", "| approx_kl | 0.07448134 |\n", "| clip_fraction | 0.304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.653 |\n", "| explained_variance | 0.747 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.05 |\n", "| policy_gradient_loss | -0.0383 |\n", "| value_loss | 0.683 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 543 |\n", "| ep_rew_mean | 57.2 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 89 |\n", "| time_elapsed | 1262 |\n", "| total_timesteps | 182272 |\n", "| train/ | |\n", "| approx_kl | 0.09344685 |\n", "| clip_fraction | 0.35 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.706 |\n", "| explained_variance | 0.799 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0297 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.511 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 545 |\n", "| ep_rew_mean | 58.6 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 90 |\n", "| time_elapsed | 1276 |\n", "| total_timesteps | 184320 |\n", "| train/ | |\n", "| approx_kl | 0.06980311 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.847 |\n", "| explained_variance | 0.683 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0505 |\n", "| policy_gradient_loss | -0.0387 |\n", "| value_loss | 1.06 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 538 |\n", "| ep_rew_mean | 57.2 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 91 |\n", "| time_elapsed | 1290 |\n", "| total_timesteps | 186368 |\n", "| train/ | |\n", "| approx_kl | 0.19598146 |\n", "| clip_fraction | 0.495 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.72 |\n", "| explained_variance | 0.0013 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0904 |\n", "| policy_gradient_loss | 0.0219 |\n", "| value_loss | 7.64 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 539 |\n", "| ep_rew_mean | 56.4 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 92 |\n", "| time_elapsed | 1304 |\n", "| total_timesteps | 188416 |\n", "| train/ | |\n", "| approx_kl | 0.11159618 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.709 |\n", "| explained_variance | 0.722 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0891 |\n", "| policy_gradient_loss | -0.043 |\n", "| value_loss | 0.581 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 541 |\n", "| ep_rew_mean | 55.5 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 93 |\n", "| time_elapsed | 1318 |\n", "| total_timesteps | 190464 |\n", "| train/ | |\n", "| approx_kl | 0.099714816 |\n", "| clip_fraction | 0.412 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.838 |\n", "| explained_variance | 0.589 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.147 |\n", "| policy_gradient_loss | -0.0389 |\n", "| value_loss | 0.855 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 538 |\n", "| ep_rew_mean | 51.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 94 |\n", "| time_elapsed | 1332 |\n", "| total_timesteps | 192512 |\n", "| train/ | |\n", "| approx_kl | 0.074619025 |\n", "| clip_fraction | 0.373 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.838 |\n", "| explained_variance | 0.761 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0858 |\n", "| policy_gradient_loss | -0.0403 |\n", "| value_loss | 0.73 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 541 |\n", "| ep_rew_mean | 51.8 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 95 |\n", "| time_elapsed | 1346 |\n", "| total_timesteps | 194560 |\n", "| train/ | |\n", "| approx_kl | 0.07863231 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.735 |\n", "| explained_variance | 0.672 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.126 |\n", "| policy_gradient_loss | -0.0441 |\n", "| value_loss | 0.771 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 552 |\n", "| ep_rew_mean | 52.7 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 96 |\n", "| time_elapsed | 1360 |\n", "| total_timesteps | 196608 |\n", "| train/ | |\n", "| approx_kl | 0.078673385 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.849 |\n", "| explained_variance | 0.759 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.103 |\n", "| policy_gradient_loss | -0.0473 |\n", "| value_loss | 0.523 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 561 |\n", "| ep_rew_mean | 53.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 97 |\n", "| time_elapsed | 1374 |\n", "| total_timesteps | 198656 |\n", "| train/ | |\n", "| approx_kl | 0.06958656 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.883 |\n", "| explained_variance | 0.701 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.23 |\n", "| policy_gradient_loss | -0.0375 |\n", "| value_loss | 0.652 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 565 |\n", "| ep_rew_mean | 54.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 98 |\n", "| time_elapsed | 1388 |\n", "| total_timesteps | 200704 |\n", "| train/ | |\n", "| approx_kl | 0.05332349 |\n", "| clip_fraction | 0.335 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.831 |\n", "| explained_variance | 0.62 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.51 |\n", "| policy_gradient_loss | -0.00702 |\n", "| value_loss | 1.94 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 577 |\n", "| ep_rew_mean | 56.4 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 99 |\n", "| time_elapsed | 1402 |\n", "| total_timesteps | 202752 |\n", "| train/ | |\n", "| approx_kl | 0.07593148 |\n", "| clip_fraction | 0.408 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.923 |\n", "| explained_variance | 0.789 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0549 |\n", "| policy_gradient_loss | -0.0203 |\n", "| value_loss | 1.58 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 579 |\n", "| ep_rew_mean | 56.5 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 100 |\n", "| time_elapsed | 1416 |\n", "| total_timesteps | 204800 |\n", "| train/ | |\n", "| approx_kl | 0.086013794 |\n", "| clip_fraction | 0.382 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.801 |\n", "| explained_variance | 0.798 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.152 |\n", "| policy_gradient_loss | -0.0305 |\n", "| value_loss | 0.96 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 579 |\n", "| ep_rew_mean | 56.5 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 101 |\n", "| time_elapsed | 1430 |\n", "| total_timesteps | 206848 |\n", "| train/ | |\n", "| approx_kl | 0.071443856 |\n", "| clip_fraction | 0.471 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.15 |\n", "| explained_variance | 0.897 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.018 |\n", "| policy_gradient_loss | -0.0433 |\n", "| value_loss | 0.263 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 579 |\n", "| ep_rew_mean | 56.5 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 102 |\n", "| time_elapsed | 1443 |\n", "| total_timesteps | 208896 |\n", "| train/ | |\n", "| approx_kl | 0.050583325 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.24 |\n", "| explained_variance | 0.801 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0599 |\n", "| policy_gradient_loss | -0.0382 |\n", "| value_loss | 0.0337 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 637 |\n", "| ep_rew_mean | 57.2 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 103 |\n", "| time_elapsed | 1458 |\n", "| total_timesteps | 210944 |\n", "| train/ | |\n", "| approx_kl | 0.04321297 |\n", "| clip_fraction | 0.37 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | -1.88 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0614 |\n", "| policy_gradient_loss | -0.0484 |\n", "| value_loss | 0.0163 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 643 |\n", "| ep_rew_mean | 57.7 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 104 |\n", "| time_elapsed | 1472 |\n", "| total_timesteps | 212992 |\n", "| train/ | |\n", "| approx_kl | 0.30551288 |\n", "| clip_fraction | 0.584 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.743 |\n", "| explained_variance | 0.243 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.604 |\n", "| policy_gradient_loss | 0.0368 |\n", "| value_loss | 11.5 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 654 |\n", "| ep_rew_mean | 55.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 105 |\n", "| time_elapsed | 1486 |\n", "| total_timesteps | 215040 |\n", "| train/ | |\n", "| approx_kl | 0.092521176 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.809 |\n", "| explained_variance | 0.452 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.288 |\n", "| policy_gradient_loss | -0.00225 |\n", "| value_loss | 1.45 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 660 |\n", "| ep_rew_mean | 54.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 106 |\n", "| time_elapsed | 1500 |\n", "| total_timesteps | 217088 |\n", "| train/ | |\n", "| approx_kl | 0.12184173 |\n", "| clip_fraction | 0.485 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.962 |\n", "| explained_variance | 0.616 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0229 |\n", "| policy_gradient_loss | -0.0548 |\n", "| value_loss | 0.521 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 663 |\n", "| ep_rew_mean | 53.9 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 107 |\n", "| time_elapsed | 1514 |\n", "| total_timesteps | 219136 |\n", "| train/ | |\n", "| approx_kl | 0.07295338 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.85 |\n", "| explained_variance | 0.746 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0421 |\n", "| policy_gradient_loss | -0.045 |\n", "| value_loss | 0.77 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 669 |\n", "| ep_rew_mean | 54.7 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 108 |\n", "| time_elapsed | 1527 |\n", "| total_timesteps | 221184 |\n", "| train/ | |\n", "| approx_kl | 0.08259947 |\n", "| clip_fraction | 0.389 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.882 |\n", "| explained_variance | 0.702 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.104 |\n", "| policy_gradient_loss | -0.0273 |\n", "| value_loss | 1.28 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 675 |\n", "| ep_rew_mean | 53.4 |\n", "| time/ | |\n", "| fps | 144 |\n", "| iterations | 109 |\n", "| time_elapsed | 1541 |\n", "| total_timesteps | 223232 |\n", "| train/ | |\n", "| approx_kl | 0.09154592 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.927 |\n", "| explained_variance | 0.727 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00675 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.576 |\n", "----------------------------------------\n", "Eval num_timesteps=225000, episode_reward=86.80 +/- 23.20\n", "Episode length: 864.50 +/- 120.70\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 864 |\n", "| mean_reward | 86.8 |\n", "| time/ | |\n", "| total_timesteps | 225000 |\n", "| train/ | |\n", "| approx_kl | 0.10651243 |\n", "| clip_fraction | 0.499 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.674 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0725 |\n", "| policy_gradient_loss | -0.0629 |\n", "| value_loss | 0.522 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 682 |\n", "| ep_rew_mean | 52.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 110 |\n", "| time_elapsed | 1578 |\n", "| total_timesteps | 225280 |\n", "---------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 693 |\n", "| ep_rew_mean | 53.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 111 |\n", "| time_elapsed | 1592 |\n", "| total_timesteps | 227328 |\n", "| train/ | |\n", "| approx_kl | 0.095396966 |\n", "| clip_fraction | 0.456 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.991 |\n", "| explained_variance | 0.875 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0177 |\n", "| policy_gradient_loss | -0.0583 |\n", "| value_loss | 0.446 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 705 |\n", "| ep_rew_mean | 54.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 112 |\n", "| time_elapsed | 1606 |\n", "| total_timesteps | 229376 |\n", "| train/ | |\n", "| approx_kl | 0.07236557 |\n", "| clip_fraction | 0.386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.885 |\n", "| explained_variance | 0.666 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0615 |\n", "| policy_gradient_loss | -0.0333 |\n", "| value_loss | 0.957 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 57.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 113 |\n", "| time_elapsed | 1620 |\n", "| total_timesteps | 231424 |\n", "| train/ | |\n", "| approx_kl | 0.06923866 |\n", "| clip_fraction | 0.395 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.992 |\n", "| explained_variance | 0.831 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0197 |\n", "| policy_gradient_loss | -0.05 |\n", "| value_loss | 0.583 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 724 |\n", "| ep_rew_mean | 58.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 114 |\n", "| time_elapsed | 1634 |\n", "| total_timesteps | 233472 |\n", "| train/ | |\n", "| approx_kl | 0.161993 |\n", "| clip_fraction | 0.503 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.797 |\n", "| explained_variance | 0.389 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.14 |\n", "| policy_gradient_loss | 0.0318 |\n", "| value_loss | 8.11 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 732 |\n", "| ep_rew_mean | 59.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 115 |\n", "| time_elapsed | 1648 |\n", "| total_timesteps | 235520 |\n", "| train/ | |\n", "| approx_kl | 0.08298081 |\n", "| clip_fraction | 0.413 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.765 |\n", "| explained_variance | 0.69 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00126 |\n", "| policy_gradient_loss | -0.0328 |\n", "| value_loss | 1.04 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 743 |\n", "| ep_rew_mean | 60.2 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 116 |\n", "| time_elapsed | 1662 |\n", "| total_timesteps | 237568 |\n", "| train/ | |\n", "| approx_kl | 0.08679047 |\n", "| clip_fraction | 0.41 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.821 |\n", "| explained_variance | 0.874 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0409 |\n", "| policy_gradient_loss | -0.0483 |\n", "| value_loss | 0.411 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 754 |\n", "| ep_rew_mean | 62.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 117 |\n", "| time_elapsed | 1676 |\n", "| total_timesteps | 239616 |\n", "| train/ | |\n", "| approx_kl | 0.09812245 |\n", "| clip_fraction | 0.386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.796 |\n", "| explained_variance | 0.869 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.265 |\n", "| policy_gradient_loss | -0.0482 |\n", "| value_loss | 0.823 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 759 |\n", "| ep_rew_mean | 60.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 118 |\n", "| time_elapsed | 1690 |\n", "| total_timesteps | 241664 |\n", "| train/ | |\n", "| approx_kl | 0.09138046 |\n", "| clip_fraction | 0.378 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.747 |\n", "| explained_variance | 0.313 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.354 |\n", "| policy_gradient_loss | 0.00171 |\n", "| value_loss | 2.58 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 763 |\n", "| ep_rew_mean | 60.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 119 |\n", "| time_elapsed | 1704 |\n", "| total_timesteps | 243712 |\n", "| train/ | |\n", "| approx_kl | 0.0839825 |\n", "| clip_fraction | 0.397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.866 |\n", "| explained_variance | 0.583 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.274 |\n", "| policy_gradient_loss | -0.0159 |\n", "| value_loss | 2.14 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 775 |\n", "| ep_rew_mean | 62.3 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 120 |\n", "| time_elapsed | 1718 |\n", "| total_timesteps | 245760 |\n", "| train/ | |\n", "| approx_kl | 0.08232975 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.774 |\n", "| explained_variance | 0.728 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.49 |\n", "| policy_gradient_loss | -0.0355 |\n", "| value_loss | 1.34 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 782 |\n", "| ep_rew_mean | 65.9 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 121 |\n", "| time_elapsed | 1732 |\n", "| total_timesteps | 247808 |\n", "| train/ | |\n", "| approx_kl | 0.092763826 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.827 |\n", "| explained_variance | 0.797 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.14 |\n", "| policy_gradient_loss | -0.0395 |\n", "| value_loss | 0.776 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 786 |\n", "| ep_rew_mean | 66 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 122 |\n", "| time_elapsed | 1746 |\n", "| total_timesteps | 249856 |\n", "| train/ | |\n", "| approx_kl | 2.7165825 |\n", "| clip_fraction | 0.698 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.334 |\n", "| explained_variance | 0.135 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.95 |\n", "| policy_gradient_loss | 0.0854 |\n", "| value_loss | 57.9 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 66.4 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 123 |\n", "| time_elapsed | 1760 |\n", "| total_timesteps | 251904 |\n", "| train/ | |\n", "| approx_kl | 0.12073946 |\n", "| clip_fraction | 0.276 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.348 |\n", "| explained_variance | 0.58 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.387 |\n", "| policy_gradient_loss | 0.0261 |\n", "| value_loss | 2.78 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 791 |\n", "| ep_rew_mean | 66.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 124 |\n", "| time_elapsed | 1774 |\n", "| total_timesteps | 253952 |\n", "| train/ | |\n", "| approx_kl | 0.16262615 |\n", "| clip_fraction | 0.257 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.352 |\n", "| explained_variance | 0.769 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.157 |\n", "| policy_gradient_loss | -0.0159 |\n", "| value_loss | 1.16 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 799 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 125 |\n", "| time_elapsed | 1788 |\n", "| total_timesteps | 256000 |\n", "| train/ | |\n", "| approx_kl | 0.09536018 |\n", "| clip_fraction | 0.278 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.469 |\n", "| explained_variance | 0.366 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0947 |\n", "| policy_gradient_loss | 0.00485 |\n", "| value_loss | 2.83 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 809 |\n", "| ep_rew_mean | 68.1 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 126 |\n", "| time_elapsed | 1802 |\n", "| total_timesteps | 258048 |\n", "| train/ | |\n", "| approx_kl | 0.102560155 |\n", "| clip_fraction | 0.372 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.718 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.173 |\n", "| policy_gradient_loss | -0.0353 |\n", "| value_loss | 0.667 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 819 |\n", "| ep_rew_mean | 71.6 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 127 |\n", "| time_elapsed | 1816 |\n", "| total_timesteps | 260096 |\n", "| train/ | |\n", "| approx_kl | 0.086536564 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.743 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.446 |\n", "| policy_gradient_loss | -0.0292 |\n", "| value_loss | 1.68 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 816 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 128 |\n", "| time_elapsed | 1830 |\n", "| total_timesteps | 262144 |\n", "| train/ | |\n", "| approx_kl | 0.4169265 |\n", "| clip_fraction | 0.528 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.515 |\n", "| explained_variance | 0.34 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.51 |\n", "| policy_gradient_loss | 0.0825 |\n", "| value_loss | 33 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 817 |\n", "| ep_rew_mean | 73.4 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 129 |\n", "| time_elapsed | 1844 |\n", "| total_timesteps | 264192 |\n", "| train/ | |\n", "| approx_kl | 0.5078336 |\n", "| clip_fraction | 0.45 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.402 |\n", "| explained_variance | 0.382 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.75 |\n", "| policy_gradient_loss | 0.0803 |\n", "| value_loss | 15 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 823 |\n", "| ep_rew_mean | 72.8 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 130 |\n", "| time_elapsed | 1858 |\n", "| total_timesteps | 266240 |\n", "| train/ | |\n", "| approx_kl | 0.1712541 |\n", "| clip_fraction | 0.314 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.424 |\n", "| explained_variance | 0.512 |\n", "| learning_rate | 0.0003 |\n", "| loss | 16.9 |\n", "| policy_gradient_loss | 0.0209 |\n", "| value_loss | 24 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 829 |\n", "| ep_rew_mean | 74.6 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 131 |\n", "| time_elapsed | 1872 |\n", "| total_timesteps | 268288 |\n", "| train/ | |\n", "| approx_kl | 0.13985217 |\n", "| clip_fraction | 0.334 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.488 |\n", "| explained_variance | 0.902 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.225 |\n", "| policy_gradient_loss | -0.00948 |\n", "| value_loss | 1.96 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 832 |\n", "| ep_rew_mean | 77.4 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 132 |\n", "| time_elapsed | 1886 |\n", "| total_timesteps | 270336 |\n", "| train/ | |\n", "| approx_kl | 0.11218135 |\n", "| clip_fraction | 0.299 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.407 |\n", "| explained_variance | 0.786 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.983 |\n", "| policy_gradient_loss | 0.00469 |\n", "| value_loss | 7.35 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 838 |\n", "| ep_rew_mean | 81.1 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 133 |\n", "| time_elapsed | 1900 |\n", "| total_timesteps | 272384 |\n", "| train/ | |\n", "| approx_kl | 0.16696078 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.451 |\n", "| explained_variance | 0.831 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.748 |\n", "| policy_gradient_loss | 0.00582 |\n", "| value_loss | 7.08 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 836 |\n", "| ep_rew_mean | 84.8 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 134 |\n", "| time_elapsed | 1914 |\n", "| total_timesteps | 274432 |\n", "| train/ | |\n", "| approx_kl | 0.4695661 |\n", "| clip_fraction | 0.424 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.341 |\n", "| explained_variance | 0.627 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.02 |\n", "| policy_gradient_loss | 0.0681 |\n", "| value_loss | 39 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 841 |\n", "| ep_rew_mean | 86.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 135 |\n", "| time_elapsed | 1928 |\n", "| total_timesteps | 276480 |\n", "| train/ | |\n", "| approx_kl | 0.52759147 |\n", "| clip_fraction | 0.465 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.4 |\n", "| explained_variance | 0.885 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.572 |\n", "| policy_gradient_loss | 0.025 |\n", "| value_loss | 11.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 841 |\n", "| ep_rew_mean | 88.2 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 136 |\n", "| time_elapsed | 1942 |\n", "| total_timesteps | 278528 |\n", "| train/ | |\n", "| approx_kl | 0.14955196 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.438 |\n", "| explained_variance | 0.919 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.37 |\n", "| policy_gradient_loss | -0.0141 |\n", "| value_loss | 3.35 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 841 |\n", "| ep_rew_mean | 90.2 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 137 |\n", "| time_elapsed | 1956 |\n", "| total_timesteps | 280576 |\n", "| train/ | |\n", "| approx_kl | 0.32031983 |\n", "| clip_fraction | 0.364 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.362 |\n", "| explained_variance | 0.9 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.92 |\n", "| policy_gradient_loss | 0.0041 |\n", "| value_loss | 12.5 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 91 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 138 |\n", "| time_elapsed | 1970 |\n", "| total_timesteps | 282624 |\n", "| train/ | |\n", "| approx_kl | 0.18896851 |\n", "| clip_fraction | 0.305 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.383 |\n", "| explained_variance | 0.906 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.46 |\n", "| policy_gradient_loss | 0.0164 |\n", "| value_loss | 13.1 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 838 |\n", "| ep_rew_mean | 96.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 139 |\n", "| time_elapsed | 1984 |\n", "| total_timesteps | 284672 |\n", "| train/ | |\n", "| approx_kl | 0.26287025 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.419 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.989 |\n", "| policy_gradient_loss | 0.0067 |\n", "| value_loss | 6.54 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 839 |\n", "| ep_rew_mean | 99.7 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 140 |\n", "| time_elapsed | 1997 |\n", "| total_timesteps | 286720 |\n", "| train/ | |\n", "| approx_kl | 0.1999823 |\n", "| clip_fraction | 0.296 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.347 |\n", "| explained_variance | 0.802 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.31 |\n", "| policy_gradient_loss | 0.0378 |\n", "| value_loss | 30 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 779 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 141 |\n", "| time_elapsed | 2011 |\n", "| total_timesteps | 288768 |\n", "| train/ | |\n", "| approx_kl | 0.20004278 |\n", "| clip_fraction | 0.262 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.347 |\n", "| explained_variance | 0.907 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.02 |\n", "| policy_gradient_loss | 0.00275 |\n", "| value_loss | 23.4 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 772 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 142 |\n", "| time_elapsed | 2025 |\n", "| total_timesteps | 290816 |\n", "| train/ | |\n", "| approx_kl | 0.32036144 |\n", "| clip_fraction | 0.292 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.321 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 14.9 |\n", "| policy_gradient_loss | 0.00891 |\n", "| value_loss | 29.2 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 766 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 143 |\n", "| time_elapsed | 2040 |\n", "| total_timesteps | 292864 |\n", "| train/ | |\n", "| approx_kl | 0.85077524 |\n", "| clip_fraction | 0.232 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.165 |\n", "| explained_variance | 0.932 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12.8 |\n", "| policy_gradient_loss | 0.0228 |\n", "| value_loss | 27.1 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 768 |\n", "| ep_rew_mean | 117 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 144 |\n", "| time_elapsed | 2054 |\n", "| total_timesteps | 294912 |\n", "| train/ | |\n", "| approx_kl | 0.3532856 |\n", "| clip_fraction | 0.269 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.273 |\n", "| explained_variance | 0.8 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.83 |\n", "| policy_gradient_loss | 0.00987 |\n", "| value_loss | 29.9 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 766 |\n", "| ep_rew_mean | 119 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 145 |\n", "| time_elapsed | 2068 |\n", "| total_timesteps | 296960 |\n", "| train/ | |\n", "| approx_kl | 0.27137476 |\n", "| clip_fraction | 0.304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.297 |\n", "| explained_variance | 0.922 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.18 |\n", "| policy_gradient_loss | 0.0111 |\n", "| value_loss | 25.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 763 |\n", "| ep_rew_mean | 122 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 146 |\n", "| time_elapsed | 2082 |\n", "| total_timesteps | 299008 |\n", "| train/ | |\n", "| approx_kl | 0.27127343 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.328 |\n", "| explained_variance | 0.915 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.48 |\n", "| policy_gradient_loss | 0.0426 |\n", "| value_loss | 20.9 |\n", "----------------------------------------\n", "Eval num_timesteps=300000, episode_reward=56.00 +/- 29.63\n", "Episode length: 788.00 +/- 76.30\n", "---------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 788 |\n", "| mean_reward | 56 |\n", "| time/ | |\n", "| total_timesteps | 300000 |\n", "| train/ | |\n", "| approx_kl | 0.6411332 |\n", "| clip_fraction | 0.332 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.299 |\n", "| explained_variance | 0.916 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2 |\n", "| policy_gradient_loss | -0.0166 |\n", "| value_loss | 9.13 |\n", "---------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 756 |\n", "| ep_rew_mean | 121 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 147 |\n", "| time_elapsed | 2116 |\n", "| total_timesteps | 301056 |\n", "---------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 121 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 148 |\n", "| time_elapsed | 2130 |\n", "| total_timesteps | 303104 |\n", "| train/ | |\n", "| approx_kl | 0.1651521 |\n", "| clip_fraction | 0.242 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.298 |\n", "| explained_variance | 0.891 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.6 |\n", "| policy_gradient_loss | 0.0161 |\n", "| value_loss | 12.5 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 117 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 149 |\n", "| time_elapsed | 2144 |\n", "| total_timesteps | 305152 |\n", "| train/ | |\n", "| approx_kl | 0.2521631 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.414 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.53 |\n", "| policy_gradient_loss | 0.0114 |\n", "| value_loss | 5.84 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 722 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 150 |\n", "| time_elapsed | 2158 |\n", "| total_timesteps | 307200 |\n", "| train/ | |\n", "| approx_kl | 0.18310647 |\n", "| clip_fraction | 0.315 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.416 |\n", "| explained_variance | 0.856 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.26 |\n", "| policy_gradient_loss | -0.00314 |\n", "| value_loss | 6.49 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 717 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 151 |\n", "| time_elapsed | 2172 |\n", "| total_timesteps | 309248 |\n", "| train/ | |\n", "| approx_kl | 0.23910785 |\n", "| clip_fraction | 0.315 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.473 |\n", "| explained_variance | 0.869 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.779 |\n", "| policy_gradient_loss | -0.0194 |\n", "| value_loss | 3.43 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 152 |\n", "| time_elapsed | 2186 |\n", "| total_timesteps | 311296 |\n", "| train/ | |\n", "| approx_kl | 0.14821649 |\n", "| clip_fraction | 0.398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.635 |\n", "| explained_variance | 0.928 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.831 |\n", "| policy_gradient_loss | -0.00999 |\n", "| value_loss | 2.08 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 713 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 153 |\n", "| time_elapsed | 2200 |\n", "| total_timesteps | 313344 |\n", "| train/ | |\n", "| approx_kl | 0.22995046 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.659 |\n", "| explained_variance | 0.951 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0765 |\n", "| policy_gradient_loss | -0.019 |\n", "| value_loss | 0.922 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 154 |\n", "| time_elapsed | 2214 |\n", "| total_timesteps | 315392 |\n", "| train/ | |\n", "| approx_kl | 0.32138792 |\n", "| clip_fraction | 0.476 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.517 |\n", "| explained_variance | 0.608 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.656 |\n", "| policy_gradient_loss | 0.0364 |\n", "| value_loss | 16.4 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 710 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 155 |\n", "| time_elapsed | 2228 |\n", "| total_timesteps | 317440 |\n", "| train/ | |\n", "| approx_kl | 0.12759377 |\n", "| clip_fraction | 0.316 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.489 |\n", "| explained_variance | 0.761 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.164 |\n", "| policy_gradient_loss | -0.00619 |\n", "| value_loss | 2.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 706 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 156 |\n", "| time_elapsed | 2242 |\n", "| total_timesteps | 319488 |\n", "| train/ | |\n", "| approx_kl | 0.22897053 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.409 |\n", "| explained_variance | 0.632 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.03 |\n", "| policy_gradient_loss | 0.0558 |\n", "| value_loss | 9.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 703 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 157 |\n", "| time_elapsed | 2256 |\n", "| total_timesteps | 321536 |\n", "| train/ | |\n", "| approx_kl | 0.15875757 |\n", "| clip_fraction | 0.345 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.472 |\n", "| explained_variance | 0.923 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.208 |\n", "| policy_gradient_loss | -0.012 |\n", "| value_loss | 1.21 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 706 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 158 |\n", "| time_elapsed | 2270 |\n", "| total_timesteps | 323584 |\n", "| train/ | |\n", "| approx_kl | 0.13802974 |\n", "| clip_fraction | 0.274 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.374 |\n", "| explained_variance | 0.848 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.29 |\n", "| policy_gradient_loss | -0.0012 |\n", "| value_loss | 6.95 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 701 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 159 |\n", "| time_elapsed | 2284 |\n", "| total_timesteps | 325632 |\n", "| train/ | |\n", "| approx_kl | 0.12416622 |\n", "| clip_fraction | 0.319 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.478 |\n", "| explained_variance | 0.885 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.23 |\n", "| policy_gradient_loss | 0.00389 |\n", "| value_loss | 4.42 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 694 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 160 |\n", "| time_elapsed | 2298 |\n", "| total_timesteps | 327680 |\n", "| train/ | |\n", "| approx_kl | 0.059179455 |\n", "| clip_fraction | 0.255 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.479 |\n", "| explained_variance | 0.924 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.616 |\n", "| policy_gradient_loss | 0.0013 |\n", "| value_loss | 3.97 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 698 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 161 |\n", "| time_elapsed | 2312 |\n", "| total_timesteps | 329728 |\n", "| train/ | |\n", "| approx_kl | 0.085761696 |\n", "| clip_fraction | 0.288 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.476 |\n", "| explained_variance | 0.868 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.772 |\n", "| policy_gradient_loss | -0.00698 |\n", "| value_loss | 2.99 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 704 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 162 |\n", "| time_elapsed | 2326 |\n", "| total_timesteps | 331776 |\n", "| train/ | |\n", "| approx_kl | 0.0834842 |\n", "| clip_fraction | 0.293 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.52 |\n", "| explained_variance | 0.947 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.124 |\n", "| policy_gradient_loss | -0.0184 |\n", "| value_loss | 2.71 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 704 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 163 |\n", "| time_elapsed | 2340 |\n", "| total_timesteps | 333824 |\n", "| train/ | |\n", "| approx_kl | 0.1324242 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.568 |\n", "| explained_variance | 0.933 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.205 |\n", "| policy_gradient_loss | -0.0084 |\n", "| value_loss | 1.66 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 697 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 164 |\n", "| time_elapsed | 2354 |\n", "| total_timesteps | 335872 |\n", "| train/ | |\n", "| approx_kl | 0.086936265 |\n", "| clip_fraction | 0.318 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.579 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0431 |\n", "| policy_gradient_loss | -0.0177 |\n", "| value_loss | 1.21 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 704 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 165 |\n", "| time_elapsed | 2368 |\n", "| total_timesteps | 337920 |\n", "| train/ | |\n", "| approx_kl | 0.16887346 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.557 |\n", "| explained_variance | 0.733 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.74 |\n", "| policy_gradient_loss | 0.0104 |\n", "| value_loss | 9.73 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 704 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 166 |\n", "| time_elapsed | 2382 |\n", "| total_timesteps | 339968 |\n", "| train/ | |\n", "| approx_kl | 0.11344146 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.574 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.143 |\n", "| policy_gradient_loss | -0.0244 |\n", "| value_loss | 1.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 698 |\n", "| ep_rew_mean | 107 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 167 |\n", "| time_elapsed | 2396 |\n", "| total_timesteps | 342016 |\n", "| train/ | |\n", "| approx_kl | 0.12337445 |\n", "| clip_fraction | 0.299 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.503 |\n", "| explained_variance | 0.879 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.63 |\n", "| policy_gradient_loss | -0.00707 |\n", "| value_loss | 2.08 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 698 |\n", "| ep_rew_mean | 103 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 168 |\n", "| time_elapsed | 2410 |\n", "| total_timesteps | 344064 |\n", "| train/ | |\n", "| approx_kl | 0.11831692 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.608 |\n", "| explained_variance | 0.817 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.338 |\n", "| policy_gradient_loss | -0.0205 |\n", "| value_loss | 2.42 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 692 |\n", "| ep_rew_mean | 103 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 169 |\n", "| time_elapsed | 2424 |\n", "| total_timesteps | 346112 |\n", "| train/ | |\n", "| approx_kl | 0.11209379 |\n", "| clip_fraction | 0.355 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.656 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.188 |\n", "| policy_gradient_loss | -0.00833 |\n", "| value_loss | 1.82 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 693 |\n", "| ep_rew_mean | 101 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 170 |\n", "| time_elapsed | 2438 |\n", "| total_timesteps | 348160 |\n", "| train/ | |\n", "| approx_kl | 0.11177954 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.528 |\n", "| explained_variance | 0.727 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.138 |\n", "| policy_gradient_loss | -0.00404 |\n", "| value_loss | 2.09 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 692 |\n", "| ep_rew_mean | 99.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 171 |\n", "| time_elapsed | 2452 |\n", "| total_timesteps | 350208 |\n", "| train/ | |\n", "| approx_kl | 0.07096104 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.753 |\n", "| explained_variance | 0.86 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.53 |\n", "| policy_gradient_loss | -0.0105 |\n", "| value_loss | 2.93 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 690 |\n", "| ep_rew_mean | 99.4 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 172 |\n", "| time_elapsed | 2466 |\n", "| total_timesteps | 352256 |\n", "| train/ | |\n", "| approx_kl | 0.16316283 |\n", "| clip_fraction | 0.409 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.533 |\n", "| explained_variance | 0.539 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.56 |\n", "| policy_gradient_loss | 0.037 |\n", "| value_loss | 8 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 690 |\n", "| ep_rew_mean | 93.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 173 |\n", "| time_elapsed | 2480 |\n", "| total_timesteps | 354304 |\n", "| train/ | |\n", "| approx_kl | 0.0751086 |\n", "| clip_fraction | 0.307 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.561 |\n", "| explained_variance | 0.849 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.778 |\n", "| policy_gradient_loss | -0.0122 |\n", "| value_loss | 3.35 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 686 |\n", "| ep_rew_mean | 88 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 174 |\n", "| time_elapsed | 2494 |\n", "| total_timesteps | 356352 |\n", "| train/ | |\n", "| approx_kl | 0.0889614 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.699 |\n", "| explained_variance | 0.866 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.096 |\n", "| policy_gradient_loss | -0.0271 |\n", "| value_loss | 0.688 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 695 |\n", "| ep_rew_mean | 84.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 175 |\n", "| time_elapsed | 2507 |\n", "| total_timesteps | 358400 |\n", "| train/ | |\n", "| approx_kl | 0.055181906 |\n", "| clip_fraction | 0.315 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.623 |\n", "| explained_variance | 0.73 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.22 |\n", "| policy_gradient_loss | 0.00521 |\n", "| value_loss | 9.6 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 700 |\n", "| ep_rew_mean | 79 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 176 |\n", "| time_elapsed | 2521 |\n", "| total_timesteps | 360448 |\n", "| train/ | |\n", "| approx_kl | 0.08130774 |\n", "| clip_fraction | 0.331 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.665 |\n", "| explained_variance | 0.873 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.182 |\n", "| policy_gradient_loss | -0.0326 |\n", "| value_loss | 1.63 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 705 |\n", "| ep_rew_mean | 77.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 177 |\n", "| time_elapsed | 2535 |\n", "| total_timesteps | 362496 |\n", "| train/ | |\n", "| approx_kl | 0.05172968 |\n", "| clip_fraction | 0.311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.785 |\n", "| explained_variance | 0.868 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.21 |\n", "| policy_gradient_loss | -0.0273 |\n", "| value_loss | 1.29 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 710 |\n", "| ep_rew_mean | 76.2 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 178 |\n", "| time_elapsed | 2549 |\n", "| total_timesteps | 364544 |\n", "| train/ | |\n", "| approx_kl | 0.056812525 |\n", "| clip_fraction | 0.312 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.691 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.162 |\n", "| policy_gradient_loss | -0.0273 |\n", "| value_loss | 0.725 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 72.2 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 179 |\n", "| time_elapsed | 2563 |\n", "| total_timesteps | 366592 |\n", "| train/ | |\n", "| approx_kl | 0.06368926 |\n", "| clip_fraction | 0.306 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.696 |\n", "| explained_variance | 0.914 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.198 |\n", "| policy_gradient_loss | -0.0282 |\n", "| value_loss | 1.55 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 72.2 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 180 |\n", "| time_elapsed | 2577 |\n", "| total_timesteps | 368640 |\n", "| train/ | |\n", "| approx_kl | 0.062431186 |\n", "| clip_fraction | 0.365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.772 |\n", "| explained_variance | 0.865 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.105 |\n", "| policy_gradient_loss | -0.0308 |\n", "| value_loss | 0.745 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 724 |\n", "| ep_rew_mean | 69.5 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 181 |\n", "| time_elapsed | 2591 |\n", "| total_timesteps | 370688 |\n", "| train/ | |\n", "| approx_kl | 0.15092671 |\n", "| clip_fraction | 0.386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.554 |\n", "| explained_variance | 0.242 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.928 |\n", "| policy_gradient_loss | 0.0337 |\n", "| value_loss | 18.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 727 |\n", "| ep_rew_mean | 70.1 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 182 |\n", "| time_elapsed | 2605 |\n", "| total_timesteps | 372736 |\n", "| train/ | |\n", "| approx_kl | 0.10485208 |\n", "| clip_fraction | 0.38 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.638 |\n", "| explained_variance | 0.721 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.237 |\n", "| policy_gradient_loss | 0.00143 |\n", "| value_loss | 2.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 732 |\n", "| ep_rew_mean | 71.3 |\n", "| time/ | |\n", "| fps | 143 |\n", "| iterations | 183 |\n", "| time_elapsed | 2619 |\n", "| total_timesteps | 374784 |\n", "| train/ | |\n", "| approx_kl | 0.09096664 |\n", "| clip_fraction | 0.315 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.565 |\n", "| explained_variance | 0.89 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.106 |\n", "| policy_gradient_loss | -0.0101 |\n", "| value_loss | 1.27 |\n", "----------------------------------------\n", "Eval num_timesteps=375000, episode_reward=103.30 +/- 34.53\n", "Episode length: 902.80 +/- 158.02\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 903 |\n", "| mean_reward | 103 |\n", "| time/ | |\n", "| total_timesteps | 375000 |\n", "| train/ | |\n", "| approx_kl | 0.08411345 |\n", "| clip_fraction | 0.322 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.62 |\n", "| explained_variance | 0.955 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.344 |\n", "| policy_gradient_loss | -0.0198 |\n", "| value_loss | 1.01 |\n", "----------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 736 |\n", "| ep_rew_mean | 73.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 184 |\n", "| time_elapsed | 2659 |\n", "| total_timesteps | 376832 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 743 |\n", "| ep_rew_mean | 74.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 185 |\n", "| time_elapsed | 2679 |\n", "| total_timesteps | 378880 |\n", "| train/ | |\n", "| approx_kl | 0.05271803 |\n", "| clip_fraction | 0.292 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.697 |\n", "| explained_variance | 0.978 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.358 |\n", "| policy_gradient_loss | -0.0212 |\n", "| value_loss | 0.843 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 754 |\n", "| ep_rew_mean | 77.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 186 |\n", "| time_elapsed | 2693 |\n", "| total_timesteps | 380928 |\n", "| train/ | |\n", "| approx_kl | 0.0652138 |\n", "| clip_fraction | 0.327 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.703 |\n", "| explained_variance | 0.989 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.257 |\n", "| policy_gradient_loss | -0.0227 |\n", "| value_loss | 0.675 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 762 |\n", "| ep_rew_mean | 78.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 187 |\n", "| time_elapsed | 2707 |\n", "| total_timesteps | 382976 |\n", "| train/ | |\n", "| approx_kl | 0.059400246 |\n", "| clip_fraction | 0.309 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.717 |\n", "| explained_variance | 0.99 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.6 |\n", "| policy_gradient_loss | -0.0329 |\n", "| value_loss | 0.88 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 770 |\n", "| ep_rew_mean | 83.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 188 |\n", "| time_elapsed | 2721 |\n", "| total_timesteps | 385024 |\n", "| train/ | |\n", "| approx_kl | 0.13708091 |\n", "| clip_fraction | 0.355 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.774 |\n", "| explained_variance | 0.937 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.364 |\n", "| policy_gradient_loss | -0.0358 |\n", "| value_loss | 2.25 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 775 |\n", "| ep_rew_mean | 85.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 189 |\n", "| time_elapsed | 2735 |\n", "| total_timesteps | 387072 |\n", "| train/ | |\n", "| approx_kl | 0.823418 |\n", "| clip_fraction | 0.522 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.438 |\n", "| explained_variance | 0.434 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.07 |\n", "| policy_gradient_loss | 0.0655 |\n", "| value_loss | 36.7 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 778 |\n", "| ep_rew_mean | 86.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 190 |\n", "| time_elapsed | 2749 |\n", "| total_timesteps | 389120 |\n", "| train/ | |\n", "| approx_kl | 0.16349491 |\n", "| clip_fraction | 0.308 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.428 |\n", "| explained_variance | 0.933 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.148 |\n", "| policy_gradient_loss | -0.0159 |\n", "| value_loss | 1.97 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 781 |\n", "| ep_rew_mean | 85.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 191 |\n", "| time_elapsed | 2763 |\n", "| total_timesteps | 391168 |\n", "| train/ | |\n", "| approx_kl | 0.09203384 |\n", "| clip_fraction | 0.33 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.575 |\n", "| explained_variance | 0.895 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.45 |\n", "| policy_gradient_loss | -0.0195 |\n", "| value_loss | 2.73 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 782 |\n", "| ep_rew_mean | 86.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 192 |\n", "| time_elapsed | 2777 |\n", "| total_timesteps | 393216 |\n", "| train/ | |\n", "| approx_kl | 0.1650383 |\n", "| clip_fraction | 0.349 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.516 |\n", "| explained_variance | 0.749 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.17 |\n", "| policy_gradient_loss | 0.0268 |\n", "| value_loss | 17.9 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 782 |\n", "| ep_rew_mean | 89.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 193 |\n", "| time_elapsed | 2791 |\n", "| total_timesteps | 395264 |\n", "| train/ | |\n", "| approx_kl | 0.079302534 |\n", "| clip_fraction | 0.259 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.461 |\n", "| explained_variance | 0.917 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.375 |\n", "| policy_gradient_loss | -0.0189 |\n", "| value_loss | 2.66 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 93.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 194 |\n", "| time_elapsed | 2805 |\n", "| total_timesteps | 397312 |\n", "| train/ | |\n", "| approx_kl | 0.18946514 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.415 |\n", "| explained_variance | 0.578 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.17 |\n", "| policy_gradient_loss | 0.0463 |\n", "| value_loss | 29.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 789 |\n", "| ep_rew_mean | 92.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 195 |\n", "| time_elapsed | 2819 |\n", "| total_timesteps | 399360 |\n", "| train/ | |\n", "| approx_kl | 0.20338261 |\n", "| clip_fraction | 0.299 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.402 |\n", "| explained_variance | 0.808 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.373 |\n", "| policy_gradient_loss | -0.00127 |\n", "| value_loss | 4.6 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 92.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 196 |\n", "| time_elapsed | 2833 |\n", "| total_timesteps | 401408 |\n", "| train/ | |\n", "| approx_kl | 0.13632123 |\n", "| clip_fraction | 0.333 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.504 |\n", "| explained_variance | 0.881 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.333 |\n", "| policy_gradient_loss | -0.0327 |\n", "| value_loss | 1.68 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 94.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 197 |\n", "| time_elapsed | 2847 |\n", "| total_timesteps | 403456 |\n", "| train/ | |\n", "| approx_kl | 0.13202608 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.544 |\n", "| explained_variance | 0.894 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0433 |\n", "| policy_gradient_loss | -0.0207 |\n", "| value_loss | 1.77 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 803 |\n", "| ep_rew_mean | 101 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 198 |\n", "| time_elapsed | 2861 |\n", "| total_timesteps | 405504 |\n", "| train/ | |\n", "| approx_kl | 0.18809372 |\n", "| clip_fraction | 0.363 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.39 |\n", "| explained_variance | 0.709 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.52 |\n", "| policy_gradient_loss | 0.0208 |\n", "| value_loss | 17 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 806 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 199 |\n", "| time_elapsed | 2875 |\n", "| total_timesteps | 407552 |\n", "| train/ | |\n", "| approx_kl | 0.12793893 |\n", "| clip_fraction | 0.252 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.316 |\n", "| explained_variance | 0.904 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.88 |\n", "| policy_gradient_loss | 0.00796 |\n", "| value_loss | 31.4 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 804 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 200 |\n", "| time_elapsed | 2889 |\n", "| total_timesteps | 409600 |\n", "| train/ | |\n", "| approx_kl | 0.4776439 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.268 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.05 |\n", "| policy_gradient_loss | 0.0244 |\n", "| value_loss | 30.3 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 201 |\n", "| time_elapsed | 2904 |\n", "| total_timesteps | 411648 |\n", "| train/ | |\n", "| approx_kl | 0.14395808 |\n", "| clip_fraction | 0.236 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.278 |\n", "| explained_variance | 0.962 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.96 |\n", "| policy_gradient_loss | 0.0203 |\n", "| value_loss | 14.5 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 202 |\n", "| time_elapsed | 2918 |\n", "| total_timesteps | 413696 |\n", "| train/ | |\n", "| approx_kl | 0.29212752 |\n", "| clip_fraction | 0.236 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.243 |\n", "| explained_variance | 0.837 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.58 |\n", "| policy_gradient_loss | 0.00281 |\n", "| value_loss | 8.03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 109 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 203 |\n", "| time_elapsed | 2932 |\n", "| total_timesteps | 415744 |\n", "| train/ | |\n", "| approx_kl | 0.24438578 |\n", "| clip_fraction | 0.292 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.37 |\n", "| explained_variance | 0.798 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.875 |\n", "| policy_gradient_loss | 0.00617 |\n", "| value_loss | 12.5 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 109 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 204 |\n", "| time_elapsed | 2946 |\n", "| total_timesteps | 417792 |\n", "| train/ | |\n", "| approx_kl | 2.0556433 |\n", "| clip_fraction | 0.289 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.25 |\n", "| explained_variance | 0.826 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0103 |\n", "| policy_gradient_loss | -0.0316 |\n", "| value_loss | 1.21 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 788 |\n", "| ep_rew_mean | 108 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 205 |\n", "| time_elapsed | 2960 |\n", "| total_timesteps | 419840 |\n", "| train/ | |\n", "| approx_kl | 0.15585083 |\n", "| clip_fraction | 0.35 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.456 |\n", "| explained_variance | 0.804 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.127 |\n", "| policy_gradient_loss | -0.0126 |\n", "| value_loss | 1.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 780 |\n", "| ep_rew_mean | 106 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 206 |\n", "| time_elapsed | 2974 |\n", "| total_timesteps | 421888 |\n", "| train/ | |\n", "| approx_kl | 0.09997189 |\n", "| clip_fraction | 0.313 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.57 |\n", "| explained_variance | 0.847 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0126 |\n", "| policy_gradient_loss | -0.0228 |\n", "| value_loss | 0.518 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 776 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 207 |\n", "| time_elapsed | 2988 |\n", "| total_timesteps | 423936 |\n", "| train/ | |\n", "| approx_kl | 0.119062 |\n", "| clip_fraction | 0.333 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.471 |\n", "| explained_variance | 0.842 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0273 |\n", "| policy_gradient_loss | -0.0331 |\n", "| value_loss | 0.459 |\n", "--------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 768 |\n", "| ep_rew_mean | 102 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 208 |\n", "| time_elapsed | 3002 |\n", "| total_timesteps | 425984 |\n", "| train/ | |\n", "| approx_kl | 0.112563185 |\n", "| clip_fraction | 0.318 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.522 |\n", "| explained_variance | 0.931 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0324 |\n", "| policy_gradient_loss | -0.0401 |\n", "| value_loss | 0.242 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 760 |\n", "| ep_rew_mean | 99.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 209 |\n", "| time_elapsed | 3016 |\n", "| total_timesteps | 428032 |\n", "| train/ | |\n", "| approx_kl | 0.12738952 |\n", "| clip_fraction | 0.289 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.514 |\n", "| explained_variance | 0.886 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00513 |\n", "| policy_gradient_loss | -0.039 |\n", "| value_loss | 0.277 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 760 |\n", "| ep_rew_mean | 98.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 210 |\n", "| time_elapsed | 3030 |\n", "| total_timesteps | 430080 |\n", "| train/ | |\n", "| approx_kl | 0.09323422 |\n", "| clip_fraction | 0.287 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.502 |\n", "| explained_variance | 0.921 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0182 |\n", "| policy_gradient_loss | -0.0426 |\n", "| value_loss | 0.266 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 95.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 211 |\n", "| time_elapsed | 3044 |\n", "| total_timesteps | 432128 |\n", "| train/ | |\n", "| approx_kl | 0.1112321 |\n", "| clip_fraction | 0.33 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.538 |\n", "| explained_variance | 0.932 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0712 |\n", "| policy_gradient_loss | -0.0543 |\n", "| value_loss | 0.173 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 212 |\n", "| time_elapsed | 3058 |\n", "| total_timesteps | 434176 |\n", "| train/ | |\n", "| approx_kl | 0.08908244 |\n", "| clip_fraction | 0.275 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.528 |\n", "| explained_variance | 0.89 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0554 |\n", "| policy_gradient_loss | -0.0402 |\n", "| value_loss | 0.224 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 213 |\n", "| time_elapsed | 3072 |\n", "| total_timesteps | 436224 |\n", "| train/ | |\n", "| approx_kl | 0.051466674 |\n", "| clip_fraction | 0.308 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.914 |\n", "| explained_variance | 0.843 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00799 |\n", "| policy_gradient_loss | -0.0325 |\n", "| value_loss | 0.149 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 214 |\n", "| time_elapsed | 3085 |\n", "| total_timesteps | 438272 |\n", "| train/ | |\n", "| approx_kl | 0.030942123 |\n", "| clip_fraction | 0.267 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.914 |\n", "| explained_variance | -0.895 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0252 |\n", "| policy_gradient_loss | -0.0261 |\n", "| value_loss | 0.0116 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 215 |\n", "| time_elapsed | 3099 |\n", "| total_timesteps | 440320 |\n", "| train/ | |\n", "| approx_kl | 0.051406134 |\n", "| clip_fraction | 0.221 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.89 |\n", "| explained_variance | -0.145 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0473 |\n", "| policy_gradient_loss | -0.0185 |\n", "| value_loss | 0.00544 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 216 |\n", "| time_elapsed | 3113 |\n", "| total_timesteps | 442368 |\n", "| train/ | |\n", "| approx_kl | 0.06445207 |\n", "| clip_fraction | 0.23 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.858 |\n", "| explained_variance | 0.299 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00372 |\n", "| policy_gradient_loss | -0.0231 |\n", "| value_loss | 0.0143 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 217 |\n", "| time_elapsed | 3127 |\n", "| total_timesteps | 444416 |\n", "| train/ | |\n", "| approx_kl | 0.046427976 |\n", "| clip_fraction | 0.266 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.915 |\n", "| explained_variance | -0.405 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0427 |\n", "| policy_gradient_loss | -0.0303 |\n", "| value_loss | 0.0106 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 218 |\n", "| time_elapsed | 3141 |\n", "| total_timesteps | 446464 |\n", "| train/ | |\n", "| approx_kl | 0.05969965 |\n", "| clip_fraction | 0.268 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.906 |\n", "| explained_variance | -0.691 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0314 |\n", "| policy_gradient_loss | -0.0376 |\n", "| value_loss | 0.00494 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 902 |\n", "| ep_rew_mean | 96.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 219 |\n", "| time_elapsed | 3154 |\n", "| total_timesteps | 448512 |\n", "| train/ | |\n", "| approx_kl | 0.050744288 |\n", "| clip_fraction | 0.266 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.879 |\n", "| explained_variance | -0.318 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.077 |\n", "| policy_gradient_loss | -0.0411 |\n", "| value_loss | 0.00321 |\n", "-----------------------------------------\n", "Eval num_timesteps=450000, episode_reward=23.30 +/- 7.63\n", "Episode length: 560.00 +/- 78.65\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 560 |\n", "| mean_reward | 23.3 |\n", "| time/ | |\n", "| total_timesteps | 450000 |\n", "| train/ | |\n", "| approx_kl | 0.14469627 |\n", "| clip_fraction | 0.299 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.772 |\n", "| explained_variance | 0.253 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0787 |\n", "| policy_gradient_loss | -0.041 |\n", "| value_loss | 0.068 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 886 |\n", "| ep_rew_mean | 94.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 220 |\n", "| time_elapsed | 3183 |\n", "| total_timesteps | 450560 |\n", "---------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 880 |\n", "| ep_rew_mean | 94 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 221 |\n", "| time_elapsed | 3197 |\n", "| total_timesteps | 452608 |\n", "| train/ | |\n", "| approx_kl | 0.1725603 |\n", "| clip_fraction | 0.359 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.487 |\n", "| explained_variance | 0.841 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00311 |\n", "| policy_gradient_loss | -0.0482 |\n", "| value_loss | 0.184 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 864 |\n", "| ep_rew_mean | 92.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 222 |\n", "| time_elapsed | 3210 |\n", "| total_timesteps | 454656 |\n", "| train/ | |\n", "| approx_kl | 0.15279655 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.548 |\n", "| explained_variance | 0.656 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0264 |\n", "| policy_gradient_loss | -0.0372 |\n", "| value_loss | 0.387 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 89.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 223 |\n", "| time_elapsed | 3224 |\n", "| total_timesteps | 456704 |\n", "| train/ | |\n", "| approx_kl | 0.12697223 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.534 |\n", "| explained_variance | 0.843 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0362 |\n", "| policy_gradient_loss | -0.0536 |\n", "| value_loss | 0.268 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 855 |\n", "| ep_rew_mean | 88 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 224 |\n", "| time_elapsed | 3238 |\n", "| total_timesteps | 458752 |\n", "| train/ | |\n", "| approx_kl | 0.22036548 |\n", "| clip_fraction | 0.453 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.722 |\n", "| explained_variance | 0.79 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0416 |\n", "| policy_gradient_loss | -0.0678 |\n", "| value_loss | 0.255 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 86.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 225 |\n", "| time_elapsed | 3252 |\n", "| total_timesteps | 460800 |\n", "| train/ | |\n", "| approx_kl | 0.23476651 |\n", "| clip_fraction | 0.481 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.681 |\n", "| explained_variance | 0.669 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0584 |\n", "| policy_gradient_loss | -0.0524 |\n", "| value_loss | 0.564 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 849 |\n", "| ep_rew_mean | 85.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 226 |\n", "| time_elapsed | 3266 |\n", "| total_timesteps | 462848 |\n", "| train/ | |\n", "| approx_kl | 0.18730597 |\n", "| clip_fraction | 0.435 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.686 |\n", "| explained_variance | 0.84 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.042 |\n", "| policy_gradient_loss | -0.0706 |\n", "| value_loss | 0.33 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 845 |\n", "| ep_rew_mean | 83.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 227 |\n", "| time_elapsed | 3280 |\n", "| total_timesteps | 464896 |\n", "| train/ | |\n", "| approx_kl | 0.22970642 |\n", "| clip_fraction | 0.389 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.528 |\n", "| explained_variance | 0.398 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.156 |\n", "| policy_gradient_loss | 0.000897 |\n", "| value_loss | 2.23 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 83.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 228 |\n", "| time_elapsed | 3294 |\n", "| total_timesteps | 466944 |\n", "| train/ | |\n", "| approx_kl | 0.23550135 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.573 |\n", "| explained_variance | 0.664 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0653 |\n", "| policy_gradient_loss | -0.0503 |\n", "| value_loss | 0.392 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 847 |\n", "| ep_rew_mean | 81.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 229 |\n", "| time_elapsed | 3308 |\n", "| total_timesteps | 468992 |\n", "| train/ | |\n", "| approx_kl | 0.19395173 |\n", "| clip_fraction | 0.423 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.675 |\n", "| explained_variance | 0.769 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0335 |\n", "| policy_gradient_loss | -0.066 |\n", "| value_loss | 0.302 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 848 |\n", "| ep_rew_mean | 78.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 230 |\n", "| time_elapsed | 3322 |\n", "| total_timesteps | 471040 |\n", "| train/ | |\n", "| approx_kl | 0.1349074 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.658 |\n", "| explained_variance | 0.359 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.165 |\n", "| policy_gradient_loss | -0.018 |\n", "| value_loss | 1.75 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 846 |\n", "| ep_rew_mean | 78.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 231 |\n", "| time_elapsed | 3336 |\n", "| total_timesteps | 473088 |\n", "| train/ | |\n", "| approx_kl | 0.092993096 |\n", "| clip_fraction | 0.373 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.67 |\n", "| explained_variance | 0.372 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.153 |\n", "| policy_gradient_loss | 0.00868 |\n", "| value_loss | 4.82 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 78.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 232 |\n", "| time_elapsed | 3350 |\n", "| total_timesteps | 475136 |\n", "| train/ | |\n", "| approx_kl | 0.16597915 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.496 |\n", "| explained_variance | 0.092 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.362 |\n", "| policy_gradient_loss | 0.00883 |\n", "| value_loss | 9.48 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 845 |\n", "| ep_rew_mean | 81 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 233 |\n", "| time_elapsed | 3364 |\n", "| total_timesteps | 477184 |\n", "| train/ | |\n", "| approx_kl | 0.29766554 |\n", "| clip_fraction | 0.382 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.482 |\n", "| explained_variance | 0.78 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.888 |\n", "| policy_gradient_loss | -0.0184 |\n", "| value_loss | 2.42 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 852 |\n", "| ep_rew_mean | 83.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 234 |\n", "| time_elapsed | 3378 |\n", "| total_timesteps | 479232 |\n", "| train/ | |\n", "| approx_kl | 0.15732828 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.572 |\n", "| explained_variance | 0.192 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.469 |\n", "| policy_gradient_loss | 0.0325 |\n", "| value_loss | 22.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 81 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 235 |\n", "| time_elapsed | 3392 |\n", "| total_timesteps | 481280 |\n", "| train/ | |\n", "| approx_kl | 0.21427096 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.437 |\n", "| explained_variance | 0.517 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.95 |\n", "| policy_gradient_loss | 0.0144 |\n", "| value_loss | 13.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 865 |\n", "| ep_rew_mean | 80.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 236 |\n", "| time_elapsed | 3406 |\n", "| total_timesteps | 483328 |\n", "| train/ | |\n", "| approx_kl | 0.33670786 |\n", "| clip_fraction | 0.404 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.507 |\n", "| explained_variance | 0.619 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.407 |\n", "| policy_gradient_loss | 0.0087 |\n", "| value_loss | 2.26 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 867 |\n", "| ep_rew_mean | 80.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 237 |\n", "| time_elapsed | 3419 |\n", "| total_timesteps | 485376 |\n", "| train/ | |\n", "| approx_kl | 0.17191638 |\n", "| clip_fraction | 0.363 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.471 |\n", "| explained_variance | 0.609 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.5 |\n", "| policy_gradient_loss | 0.0142 |\n", "| value_loss | 9.67 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 866 |\n", "| ep_rew_mean | 81.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 238 |\n", "| time_elapsed | 3433 |\n", "| total_timesteps | 487424 |\n", "| train/ | |\n", "| approx_kl | 0.26880112 |\n", "| clip_fraction | 0.3 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.425 |\n", "| explained_variance | 0.803 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.314 |\n", "| policy_gradient_loss | -0.00524 |\n", "| value_loss | 1.88 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 868 |\n", "| ep_rew_mean | 81.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 239 |\n", "| time_elapsed | 3447 |\n", "| total_timesteps | 489472 |\n", "| train/ | |\n", "| approx_kl | 0.110899135 |\n", "| clip_fraction | 0.364 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.62 |\n", "| explained_variance | 0.808 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.155 |\n", "| policy_gradient_loss | -0.000502 |\n", "| value_loss | 1.96 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 872 |\n", "| ep_rew_mean | 73.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 240 |\n", "| time_elapsed | 3461 |\n", "| total_timesteps | 491520 |\n", "| train/ | |\n", "| approx_kl | 0.16109535 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.462 |\n", "| explained_variance | 0.824 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0407 |\n", "| policy_gradient_loss | -0.0209 |\n", "| value_loss | 1.42 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 875 |\n", "| ep_rew_mean | 70.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 241 |\n", "| time_elapsed | 3475 |\n", "| total_timesteps | 493568 |\n", "| train/ | |\n", "| approx_kl | 0.22145508 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.475 |\n", "| explained_variance | 0.698 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0537 |\n", "| policy_gradient_loss | -0.0304 |\n", "| value_loss | 0.959 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 873 |\n", "| ep_rew_mean | 69.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 242 |\n", "| time_elapsed | 3489 |\n", "| total_timesteps | 495616 |\n", "| train/ | |\n", "| approx_kl | 0.114001535 |\n", "| clip_fraction | 0.292 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.485 |\n", "| explained_variance | 0.768 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.171 |\n", "| policy_gradient_loss | 0.00376 |\n", "| value_loss | 4.52 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 875 |\n", "| ep_rew_mean | 61.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 243 |\n", "| time_elapsed | 3503 |\n", "| total_timesteps | 497664 |\n", "| train/ | |\n", "| approx_kl | 0.2734463 |\n", "| clip_fraction | 0.365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.46 |\n", "| explained_variance | 0.467 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.391 |\n", "| policy_gradient_loss | 0.00549 |\n", "| value_loss | 14.1 |\n", "---------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 876 |\n", "| ep_rew_mean | 60.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 244 |\n", "| time_elapsed | 3517 |\n", "| total_timesteps | 499712 |\n", "| train/ | |\n", "| approx_kl | 0.283512 |\n", "| clip_fraction | 0.33 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.4 |\n", "| explained_variance | 0.855 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0294 |\n", "| policy_gradient_loss | -0.0339 |\n", "| value_loss | 0.551 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 877 |\n", "| ep_rew_mean | 60.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 245 |\n", "| time_elapsed | 3531 |\n", "| total_timesteps | 501760 |\n", "| train/ | |\n", "| approx_kl | 0.15000038 |\n", "| clip_fraction | 0.308 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.497 |\n", "| explained_variance | 0.725 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.109 |\n", "| policy_gradient_loss | -0.0246 |\n", "| value_loss | 1.74 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 886 |\n", "| ep_rew_mean | 61.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 246 |\n", "| time_elapsed | 3545 |\n", "| total_timesteps | 503808 |\n", "| train/ | |\n", "| approx_kl | 0.16524383 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.566 |\n", "| explained_variance | 0.899 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0391 |\n", "| policy_gradient_loss | -0.043 |\n", "| value_loss | 0.375 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 889 |\n", "| ep_rew_mean | 62.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 247 |\n", "| time_elapsed | 3559 |\n", "| total_timesteps | 505856 |\n", "| train/ | |\n", "| approx_kl | 0.32864904 |\n", "| clip_fraction | 0.414 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.574 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0233 |\n", "| policy_gradient_loss | -0.0443 |\n", "| value_loss | 0.267 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 883 |\n", "| ep_rew_mean | 63.4 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 248 |\n", "| time_elapsed | 3573 |\n", "| total_timesteps | 507904 |\n", "| train/ | |\n", "| approx_kl | 0.23856065 |\n", "| clip_fraction | 0.444 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.619 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.324 |\n", "| policy_gradient_loss | -0.00557 |\n", "| value_loss | 2.35 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 885 |\n", "| ep_rew_mean | 64 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 249 |\n", "| time_elapsed | 3587 |\n", "| total_timesteps | 509952 |\n", "| train/ | |\n", "| approx_kl | 0.17311545 |\n", "| clip_fraction | 0.4 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.603 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0863 |\n", "| policy_gradient_loss | -0.0459 |\n", "| value_loss | 0.375 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 891 |\n", "| ep_rew_mean | 65.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 250 |\n", "| time_elapsed | 3602 |\n", "| total_timesteps | 512000 |\n", "| train/ | |\n", "| approx_kl | 0.13060352 |\n", "| clip_fraction | 0.394 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.67 |\n", "| explained_variance | 0.954 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0195 |\n", "| policy_gradient_loss | -0.0546 |\n", "| value_loss | 0.217 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 898 |\n", "| ep_rew_mean | 66.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 251 |\n", "| time_elapsed | 3617 |\n", "| total_timesteps | 514048 |\n", "| train/ | |\n", "| approx_kl | 0.11306321 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.714 |\n", "| explained_variance | 0.944 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0392 |\n", "| policy_gradient_loss | -0.0505 |\n", "| value_loss | 0.303 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 903 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 252 |\n", "| time_elapsed | 3631 |\n", "| total_timesteps | 516096 |\n", "| train/ | |\n", "| approx_kl | 0.25834468 |\n", "| clip_fraction | 0.492 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.611 |\n", "| explained_variance | 0.209 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.03 |\n", "| policy_gradient_loss | 0.0346 |\n", "| value_loss | 25 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 912 |\n", "| ep_rew_mean | 71.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 253 |\n", "| time_elapsed | 3645 |\n", "| total_timesteps | 518144 |\n", "| train/ | |\n", "| approx_kl | 0.20559761 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.625 |\n", "| explained_variance | 0.784 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0144 |\n", "| policy_gradient_loss | -0.021 |\n", "| value_loss | 0.52 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 918 |\n", "| ep_rew_mean | 72.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 254 |\n", "| time_elapsed | 3659 |\n", "| total_timesteps | 520192 |\n", "| train/ | |\n", "| approx_kl | 0.19994293 |\n", "| clip_fraction | 0.423 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.655 |\n", "| explained_variance | 0.823 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0267 |\n", "| policy_gradient_loss | -0.0444 |\n", "| value_loss | 0.591 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 920 |\n", "| ep_rew_mean | 73.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 255 |\n", "| time_elapsed | 3673 |\n", "| total_timesteps | 522240 |\n", "| train/ | |\n", "| approx_kl | 0.17831133 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.571 |\n", "| explained_variance | 0.766 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.122 |\n", "| policy_gradient_loss | -0.0311 |\n", "| value_loss | 0.793 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 926 |\n", "| ep_rew_mean | 75.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 256 |\n", "| time_elapsed | 3687 |\n", "| total_timesteps | 524288 |\n", "| train/ | |\n", "| approx_kl | 0.1771544 |\n", "| clip_fraction | 0.37 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.581 |\n", "| explained_variance | 0.936 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.153 |\n", "| policy_gradient_loss | -0.048 |\n", "| value_loss | 0.322 |\n", "---------------------------------------\n", "Eval num_timesteps=525000, episode_reward=53.20 +/- 6.69\n", "Episode length: 699.30 +/- 48.35\n", "---------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 699 |\n", "| mean_reward | 53.2 |\n", "| time/ | |\n", "| total_timesteps | 525000 |\n", "| train/ | |\n", "| approx_kl | 0.2124804 |\n", "| clip_fraction | 0.423 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.612 |\n", "| explained_variance | 0.554 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00673 |\n", "| policy_gradient_loss | -0.0159 |\n", "| value_loss | 1.91 |\n", "---------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 774 |\n", "| ep_rew_mean | 75.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 257 |\n", "| time_elapsed | 3718 |\n", "| total_timesteps | 526336 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 77.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 258 |\n", "| time_elapsed | 3732 |\n", "| total_timesteps | 528384 |\n", "| train/ | |\n", "| approx_kl | 0.19495898 |\n", "| clip_fraction | 0.394 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.581 |\n", "| explained_variance | 0.847 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.133 |\n", "| policy_gradient_loss | -0.0355 |\n", "| value_loss | 0.76 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 77.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 259 |\n", "| time_elapsed | 3746 |\n", "| total_timesteps | 530432 |\n", "| train/ | |\n", "| approx_kl | 0.15111619 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.634 |\n", "| explained_variance | 0.896 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0849 |\n", "| policy_gradient_loss | -0.0442 |\n", "| value_loss | 0.437 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 787 |\n", "| ep_rew_mean | 78.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 260 |\n", "| time_elapsed | 3760 |\n", "| total_timesteps | 532480 |\n", "| train/ | |\n", "| approx_kl | 0.15151629 |\n", "| clip_fraction | 0.407 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.688 |\n", "| explained_variance | 0.858 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00565 |\n", "| policy_gradient_loss | -0.0544 |\n", "| value_loss | 0.412 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 791 |\n", "| ep_rew_mean | 79.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 261 |\n", "| time_elapsed | 3774 |\n", "| total_timesteps | 534528 |\n", "| train/ | |\n", "| approx_kl | 0.16386682 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.705 |\n", "| explained_variance | 0.949 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0402 |\n", "| policy_gradient_loss | -0.0537 |\n", "| value_loss | 0.214 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 80 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 262 |\n", "| time_elapsed | 3788 |\n", "| total_timesteps | 536576 |\n", "| train/ | |\n", "| approx_kl | 0.12218474 |\n", "| clip_fraction | 0.319 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.652 |\n", "| explained_variance | 0.972 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0494 |\n", "| policy_gradient_loss | -0.0469 |\n", "| value_loss | 0.221 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 789 |\n", "| ep_rew_mean | 79.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 263 |\n", "| time_elapsed | 3802 |\n", "| total_timesteps | 538624 |\n", "| train/ | |\n", "| approx_kl | 0.07587215 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.828 |\n", "| explained_variance | 0.715 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0937 |\n", "| policy_gradient_loss | -0.0123 |\n", "| value_loss | 1.41 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 791 |\n", "| ep_rew_mean | 79.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 264 |\n", "| time_elapsed | 3816 |\n", "| total_timesteps | 540672 |\n", "| train/ | |\n", "| approx_kl | 0.09355261 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.807 |\n", "| explained_variance | 0.727 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0285 |\n", "| policy_gradient_loss | -0.0398 |\n", "| value_loss | 1.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 801 |\n", "| ep_rew_mean | 80 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 265 |\n", "| time_elapsed | 3830 |\n", "| total_timesteps | 542720 |\n", "| train/ | |\n", "| approx_kl | 0.11839047 |\n", "| clip_fraction | 0.451 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.817 |\n", "| explained_variance | 0.931 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0374 |\n", "| policy_gradient_loss | -0.0649 |\n", "| value_loss | 0.225 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 807 |\n", "| ep_rew_mean | 81.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 266 |\n", "| time_elapsed | 3844 |\n", "| total_timesteps | 544768 |\n", "| train/ | |\n", "| approx_kl | 0.11753229 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.78 |\n", "| explained_variance | 0.961 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00135 |\n", "| policy_gradient_loss | -0.0597 |\n", "| value_loss | 0.234 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 81 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 267 |\n", "| time_elapsed | 3858 |\n", "| total_timesteps | 546816 |\n", "| train/ | |\n", "| approx_kl | 0.19439082 |\n", "| clip_fraction | 0.442 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.714 |\n", "| explained_variance | 0.729 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0501 |\n", "| policy_gradient_loss | 0.000984 |\n", "| value_loss | 1.66 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 793 |\n", "| ep_rew_mean | 81.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 268 |\n", "| time_elapsed | 3872 |\n", "| total_timesteps | 548864 |\n", "| train/ | |\n", "| approx_kl | 0.25076616 |\n", "| clip_fraction | 0.403 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.609 |\n", "| explained_variance | 0.806 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0264 |\n", "| policy_gradient_loss | -0.0383 |\n", "| value_loss | 0.814 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 80.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 269 |\n", "| time_elapsed | 3886 |\n", "| total_timesteps | 550912 |\n", "| train/ | |\n", "| approx_kl | 0.12450211 |\n", "| clip_fraction | 0.345 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.749 |\n", "| explained_variance | 0.864 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0261 |\n", "| policy_gradient_loss | -0.0349 |\n", "| value_loss | 0.936 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 79.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 270 |\n", "| time_elapsed | 3900 |\n", "| total_timesteps | 552960 |\n", "| train/ | |\n", "| approx_kl | 0.13953425 |\n", "| clip_fraction | 0.371 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.689 |\n", "| explained_variance | 0.879 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00508 |\n", "| policy_gradient_loss | -0.0423 |\n", "| value_loss | 0.683 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 78.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 271 |\n", "| time_elapsed | 3914 |\n", "| total_timesteps | 555008 |\n", "| train/ | |\n", "| approx_kl | 0.10602571 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.726 |\n", "| explained_variance | 0.884 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0389 |\n", "| policy_gradient_loss | -0.0367 |\n", "| value_loss | 0.557 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 804 |\n", "| ep_rew_mean | 75.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 272 |\n", "| time_elapsed | 3928 |\n", "| total_timesteps | 557056 |\n", "| train/ | |\n", "| approx_kl | 0.08438948 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.724 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0635 |\n", "| policy_gradient_loss | -0.0474 |\n", "| value_loss | 0.559 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 72.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 273 |\n", "| time_elapsed | 3942 |\n", "| total_timesteps | 559104 |\n", "| train/ | |\n", "| approx_kl | 0.10472953 |\n", "| clip_fraction | 0.344 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.723 |\n", "| explained_variance | 0.925 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00548 |\n", "| policy_gradient_loss | -0.0442 |\n", "| value_loss | 0.409 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 71.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 274 |\n", "| time_elapsed | 3956 |\n", "| total_timesteps | 561152 |\n", "| train/ | |\n", "| approx_kl | 0.19112884 |\n", "| clip_fraction | 0.361 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.648 |\n", "| explained_variance | 0.9 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0322 |\n", "| policy_gradient_loss | -0.0443 |\n", "| value_loss | 0.611 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 70.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 275 |\n", "| time_elapsed | 3970 |\n", "| total_timesteps | 563200 |\n", "| train/ | |\n", "| approx_kl | 0.123812795 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.573 |\n", "| explained_variance | 0.83 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.445 |\n", "| policy_gradient_loss | -0.014 |\n", "| value_loss | 1.68 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 780 |\n", "| ep_rew_mean | 69.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 276 |\n", "| time_elapsed | 3984 |\n", "| total_timesteps | 565248 |\n", "| train/ | |\n", "| approx_kl | 0.11958617 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.633 |\n", "| explained_variance | 0.932 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0356 |\n", "| policy_gradient_loss | -0.0366 |\n", "| value_loss | 0.499 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 777 |\n", "| ep_rew_mean | 70.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 277 |\n", "| time_elapsed | 3998 |\n", "| total_timesteps | 567296 |\n", "| train/ | |\n", "| approx_kl | 0.12103133 |\n", "| clip_fraction | 0.303 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.581 |\n", "| explained_variance | 0.953 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0569 |\n", "| policy_gradient_loss | -0.043 |\n", "| value_loss | 0.402 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 778 |\n", "| ep_rew_mean | 70.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 278 |\n", "| time_elapsed | 4011 |\n", "| total_timesteps | 569344 |\n", "| train/ | |\n", "| approx_kl | 0.1561046 |\n", "| clip_fraction | 0.384 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.641 |\n", "| explained_variance | 0.93 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0105 |\n", "| policy_gradient_loss | -0.0384 |\n", "| value_loss | 0.633 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 776 |\n", "| ep_rew_mean | 70.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 279 |\n", "| time_elapsed | 4025 |\n", "| total_timesteps | 571392 |\n", "| train/ | |\n", "| approx_kl | 0.15041755 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.674 |\n", "| explained_variance | 0.95 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.117 |\n", "| policy_gradient_loss | -0.0423 |\n", "| value_loss | 0.49 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 779 |\n", "| ep_rew_mean | 68.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 280 |\n", "| time_elapsed | 4039 |\n", "| total_timesteps | 573440 |\n", "| train/ | |\n", "| approx_kl | 0.18496981 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.544 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0258 |\n", "| policy_gradient_loss | -0.0422 |\n", "| value_loss | 0.372 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 69.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 281 |\n", "| time_elapsed | 4053 |\n", "| total_timesteps | 575488 |\n", "| train/ | |\n", "| approx_kl | 0.12353152 |\n", "| clip_fraction | 0.359 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.592 |\n", "| explained_variance | 0.751 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0756 |\n", "| policy_gradient_loss | -0.0165 |\n", "| value_loss | 1.94 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 786 |\n", "| ep_rew_mean | 70 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 282 |\n", "| time_elapsed | 4067 |\n", "| total_timesteps | 577536 |\n", "| train/ | |\n", "| approx_kl | 0.11868166 |\n", "| clip_fraction | 0.333 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.615 |\n", "| explained_variance | 0.939 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0103 |\n", "| policy_gradient_loss | -0.0377 |\n", "| value_loss | 0.496 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 70.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 283 |\n", "| time_elapsed | 4081 |\n", "| total_timesteps | 579584 |\n", "| train/ | |\n", "| approx_kl | 0.112555414 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.656 |\n", "| explained_variance | 0.916 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00201 |\n", "| policy_gradient_loss | -0.0418 |\n", "| value_loss | 0.354 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 782 |\n", "| ep_rew_mean | 71.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 284 |\n", "| time_elapsed | 4095 |\n", "| total_timesteps | 581632 |\n", "| train/ | |\n", "| approx_kl | 0.124224626 |\n", "| clip_fraction | 0.398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.706 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0371 |\n", "| policy_gradient_loss | -0.0532 |\n", "| value_loss | 0.347 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 285 |\n", "| time_elapsed | 4109 |\n", "| total_timesteps | 583680 |\n", "| train/ | |\n", "| approx_kl | 0.14682588 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.553 |\n", "| explained_variance | 0.796 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.404 |\n", "| policy_gradient_loss | -0.0065 |\n", "| value_loss | 2.69 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 788 |\n", "| ep_rew_mean | 72.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 286 |\n", "| time_elapsed | 4122 |\n", "| total_timesteps | 585728 |\n", "| train/ | |\n", "| approx_kl | 0.18065211 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.678 |\n", "| explained_variance | 0.801 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0453 |\n", "| policy_gradient_loss | -0.0147 |\n", "| value_loss | 1.87 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 789 |\n", "| ep_rew_mean | 73.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 287 |\n", "| time_elapsed | 4136 |\n", "| total_timesteps | 587776 |\n", "| train/ | |\n", "| approx_kl | 0.19162913 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.626 |\n", "| explained_variance | 0.888 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0305 |\n", "| policy_gradient_loss | -0.0283 |\n", "| value_loss | 1 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 794 |\n", "| ep_rew_mean | 75 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 288 |\n", "| time_elapsed | 4150 |\n", "| total_timesteps | 589824 |\n", "| train/ | |\n", "| approx_kl | 0.19998117 |\n", "| clip_fraction | 0.381 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.594 |\n", "| explained_variance | 0.944 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0452 |\n", "| policy_gradient_loss | -0.0466 |\n", "| value_loss | 0.349 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 74.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 289 |\n", "| time_elapsed | 4164 |\n", "| total_timesteps | 591872 |\n", "| train/ | |\n", "| approx_kl | 0.4376191 |\n", "| clip_fraction | 0.43 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.47 |\n", "| explained_variance | 0.451 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.189 |\n", "| policy_gradient_loss | 0.0527 |\n", "| value_loss | 8.91 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 72.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 290 |\n", "| time_elapsed | 4178 |\n", "| total_timesteps | 593920 |\n", "| train/ | |\n", "| approx_kl | 0.2288987 |\n", "| clip_fraction | 0.371 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.471 |\n", "| explained_variance | 0.835 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0306 |\n", "| policy_gradient_loss | -0.0228 |\n", "| value_loss | 0.726 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 72.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 291 |\n", "| time_elapsed | 4192 |\n", "| total_timesteps | 595968 |\n", "| train/ | |\n", "| approx_kl | 0.20477101 |\n", "| clip_fraction | 0.378 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.609 |\n", "| explained_variance | 0.924 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0447 |\n", "| policy_gradient_loss | -0.0305 |\n", "| value_loss | 0.672 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 801 |\n", "| ep_rew_mean | 73.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 292 |\n", "| time_elapsed | 4206 |\n", "| total_timesteps | 598016 |\n", "| train/ | |\n", "| approx_kl | 0.1159951 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.663 |\n", "| explained_variance | 0.919 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0189 |\n", "| policy_gradient_loss | -0.0413 |\n", "| value_loss | 0.465 |\n", "---------------------------------------\n", "Eval num_timesteps=600000, episode_reward=84.40 +/- 32.26\n", "Episode length: 799.40 +/- 133.57\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 799 |\n", "| mean_reward | 84.4 |\n", "| time/ | |\n", "| total_timesteps | 600000 |\n", "| train/ | |\n", "| approx_kl | 0.11906023 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.693 |\n", "| explained_variance | 0.815 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.264 |\n", "| policy_gradient_loss | -0.0125 |\n", "| value_loss | 1.12 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 74.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 293 |\n", "| time_elapsed | 4240 |\n", "| total_timesteps | 600064 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 803 |\n", "| ep_rew_mean | 76.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 294 |\n", "| time_elapsed | 4254 |\n", "| total_timesteps | 602112 |\n", "| train/ | |\n", "| approx_kl | 0.15987408 |\n", "| clip_fraction | 0.331 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.506 |\n", "| explained_variance | 0.846 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.493 |\n", "| policy_gradient_loss | -4e-05 |\n", "| value_loss | 2.66 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 803 |\n", "| ep_rew_mean | 77.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 295 |\n", "| time_elapsed | 4268 |\n", "| total_timesteps | 604160 |\n", "| train/ | |\n", "| approx_kl | 0.3191484 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.514 |\n", "| explained_variance | 0.707 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0768 |\n", "| policy_gradient_loss | 0.00962 |\n", "| value_loss | 7.63 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 805 |\n", "| ep_rew_mean | 82.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 296 |\n", "| time_elapsed | 4282 |\n", "| total_timesteps | 606208 |\n", "| train/ | |\n", "| approx_kl | 0.77782774 |\n", "| clip_fraction | 0.514 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.416 |\n", "| explained_variance | 0.287 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.631 |\n", "| policy_gradient_loss | 0.0608 |\n", "| value_loss | 16.3 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 807 |\n", "| ep_rew_mean | 82.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 297 |\n", "| time_elapsed | 4295 |\n", "| total_timesteps | 608256 |\n", "| train/ | |\n", "| approx_kl | 0.2872932 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.373 |\n", "| explained_variance | 0.374 |\n", "| learning_rate | 0.0003 |\n", "| loss | 28 |\n", "| policy_gradient_loss | 0.0454 |\n", "| value_loss | 28.4 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 810 |\n", "| ep_rew_mean | 82.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 298 |\n", "| time_elapsed | 4309 |\n", "| total_timesteps | 610304 |\n", "| train/ | |\n", "| approx_kl | 0.20201617 |\n", "| clip_fraction | 0.335 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.421 |\n", "| explained_variance | 0.751 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.53 |\n", "| policy_gradient_loss | -0.00745 |\n", "| value_loss | 1.72 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 819 |\n", "| ep_rew_mean | 83.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 299 |\n", "| time_elapsed | 4323 |\n", "| total_timesteps | 612352 |\n", "| train/ | |\n", "| approx_kl | 0.23204334 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.433 |\n", "| explained_variance | 0.628 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.146 |\n", "| policy_gradient_loss | -0.0243 |\n", "| value_loss | 1.68 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 824 |\n", "| ep_rew_mean | 84.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 300 |\n", "| time_elapsed | 4337 |\n", "| total_timesteps | 614400 |\n", "| train/ | |\n", "| approx_kl | 0.13762027 |\n", "| clip_fraction | 0.377 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.555 |\n", "| explained_variance | 0.566 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.137 |\n", "| policy_gradient_loss | 0.00532 |\n", "| value_loss | 3.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 829 |\n", "| ep_rew_mean | 85.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 301 |\n", "| time_elapsed | 4351 |\n", "| total_timesteps | 616448 |\n", "| train/ | |\n", "| approx_kl | 0.29512656 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.425 |\n", "| explained_variance | 0.646 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.387 |\n", "| policy_gradient_loss | 0.0319 |\n", "| value_loss | 2.68 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 835 |\n", "| ep_rew_mean | 86.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 302 |\n", "| time_elapsed | 4364 |\n", "| total_timesteps | 618496 |\n", "| train/ | |\n", "| approx_kl | 0.23680757 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.458 |\n", "| explained_variance | 0.864 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.332 |\n", "| policy_gradient_loss | -0.0157 |\n", "| value_loss | 1.65 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 834 |\n", "| ep_rew_mean | 86 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 303 |\n", "| time_elapsed | 4378 |\n", "| total_timesteps | 620544 |\n", "| train/ | |\n", "| approx_kl | 0.18148969 |\n", "| clip_fraction | 0.408 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.635 |\n", "| explained_variance | 0.834 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0837 |\n", "| policy_gradient_loss | -0.00321 |\n", "| value_loss | 1.58 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 836 |\n", "| ep_rew_mean | 86.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 304 |\n", "| time_elapsed | 4392 |\n", "| total_timesteps | 622592 |\n", "| train/ | |\n", "| approx_kl | 0.13355091 |\n", "| clip_fraction | 0.324 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.51 |\n", "| explained_variance | 0.891 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.246 |\n", "| policy_gradient_loss | 0.00545 |\n", "| value_loss | 1.75 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 828 |\n", "| ep_rew_mean | 87 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 305 |\n", "| time_elapsed | 4406 |\n", "| total_timesteps | 624640 |\n", "| train/ | |\n", "| approx_kl | 0.14093286 |\n", "| clip_fraction | 0.353 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.567 |\n", "| explained_variance | 0.891 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.333 |\n", "| policy_gradient_loss | -0.0228 |\n", "| value_loss | 2.36 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 829 |\n", "| ep_rew_mean | 87.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 306 |\n", "| time_elapsed | 4420 |\n", "| total_timesteps | 626688 |\n", "| train/ | |\n", "| approx_kl | 0.12960583 |\n", "| clip_fraction | 0.306 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.479 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.632 |\n", "| policy_gradient_loss | -0.016 |\n", "| value_loss | 2.59 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 831 |\n", "| ep_rew_mean | 87.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 307 |\n", "| time_elapsed | 4434 |\n", "| total_timesteps | 628736 |\n", "| train/ | |\n", "| approx_kl | 0.09003037 |\n", "| clip_fraction | 0.304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.56 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.23 |\n", "| policy_gradient_loss | -0.0208 |\n", "| value_loss | 3.11 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 830 |\n", "| ep_rew_mean | 87 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 308 |\n", "| time_elapsed | 4448 |\n", "| total_timesteps | 630784 |\n", "| train/ | |\n", "| approx_kl | 0.1210104 |\n", "| clip_fraction | 0.308 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.524 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.443 |\n", "| policy_gradient_loss | -0.0194 |\n", "| value_loss | 1.35 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 830 |\n", "| ep_rew_mean | 87.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 309 |\n", "| time_elapsed | 4462 |\n", "| total_timesteps | 632832 |\n", "| train/ | |\n", "| approx_kl | 0.11163107 |\n", "| clip_fraction | 0.296 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.615 |\n", "| explained_variance | 0.914 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.127 |\n", "| policy_gradient_loss | -0.0264 |\n", "| value_loss | 1.44 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 829 |\n", "| ep_rew_mean | 88.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 310 |\n", "| time_elapsed | 4476 |\n", "| total_timesteps | 634880 |\n", "| train/ | |\n", "| approx_kl | 0.18767926 |\n", "| clip_fraction | 0.468 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.574 |\n", "| explained_variance | 0.531 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.49 |\n", "| policy_gradient_loss | 0.0317 |\n", "| value_loss | 7.34 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 824 |\n", "| ep_rew_mean | 87.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 311 |\n", "| time_elapsed | 4490 |\n", "| total_timesteps | 636928 |\n", "| train/ | |\n", "| approx_kl | 0.1403477 |\n", "| clip_fraction | 0.364 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.648 |\n", "| explained_variance | 0.611 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.29 |\n", "| policy_gradient_loss | 0.00407 |\n", "| value_loss | 4.1 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 818 |\n", "| ep_rew_mean | 86.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 312 |\n", "| time_elapsed | 4504 |\n", "| total_timesteps | 638976 |\n", "| train/ | |\n", "| approx_kl | 0.10685725 |\n", "| clip_fraction | 0.34 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.619 |\n", "| explained_variance | 0.691 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.403 |\n", "| policy_gradient_loss | -0.0302 |\n", "| value_loss | 1.22 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 818 |\n", "| ep_rew_mean | 86.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 313 |\n", "| time_elapsed | 4518 |\n", "| total_timesteps | 641024 |\n", "| train/ | |\n", "| approx_kl | 0.087928936 |\n", "| clip_fraction | 0.355 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.659 |\n", "| explained_variance | 0.885 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0576 |\n", "| policy_gradient_loss | -0.0277 |\n", "| value_loss | 0.62 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 824 |\n", "| ep_rew_mean | 86.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 314 |\n", "| time_elapsed | 4532 |\n", "| total_timesteps | 643072 |\n", "| train/ | |\n", "| approx_kl | 0.14054653 |\n", "| clip_fraction | 0.297 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.606 |\n", "| explained_variance | 0.72 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.102 |\n", "| policy_gradient_loss | -0.0255 |\n", "| value_loss | 1.01 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 830 |\n", "| ep_rew_mean | 86.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 315 |\n", "| time_elapsed | 4546 |\n", "| total_timesteps | 645120 |\n", "| train/ | |\n", "| approx_kl | 0.07798655 |\n", "| clip_fraction | 0.355 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.732 |\n", "| explained_variance | 0.667 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.208 |\n", "| policy_gradient_loss | -0.00841 |\n", "| value_loss | 1.47 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 825 |\n", "| ep_rew_mean | 84.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 316 |\n", "| time_elapsed | 4560 |\n", "| total_timesteps | 647168 |\n", "| train/ | |\n", "| approx_kl | 0.11004632 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.743 |\n", "| explained_variance | 0.686 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0441 |\n", "| policy_gradient_loss | -0.0421 |\n", "| value_loss | 0.592 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 827 |\n", "| ep_rew_mean | 84.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 317 |\n", "| time_elapsed | 4574 |\n", "| total_timesteps | 649216 |\n", "| train/ | |\n", "| approx_kl | 0.09930132 |\n", "| clip_fraction | 0.287 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.575 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0457 |\n", "| policy_gradient_loss | -0.0336 |\n", "| value_loss | 0.383 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 828 |\n", "| ep_rew_mean | 83.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 318 |\n", "| time_elapsed | 4587 |\n", "| total_timesteps | 651264 |\n", "| train/ | |\n", "| approx_kl | 0.1408749 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.628 |\n", "| explained_variance | 0.759 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.039 |\n", "| policy_gradient_loss | -0.0347 |\n", "| value_loss | 0.638 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 828 |\n", "| ep_rew_mean | 82.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 319 |\n", "| time_elapsed | 4601 |\n", "| total_timesteps | 653312 |\n", "| train/ | |\n", "| approx_kl | 0.118636385 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.672 |\n", "| explained_variance | 0.796 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0624 |\n", "| policy_gradient_loss | -0.039 |\n", "| value_loss | 0.493 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 827 |\n", "| ep_rew_mean | 82.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 320 |\n", "| time_elapsed | 4615 |\n", "| total_timesteps | 655360 |\n", "| train/ | |\n", "| approx_kl | 0.105207935 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.795 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00112 |\n", "| policy_gradient_loss | -0.0469 |\n", "| value_loss | 0.33 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 819 |\n", "| ep_rew_mean | 80.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 321 |\n", "| time_elapsed | 4629 |\n", "| total_timesteps | 657408 |\n", "| train/ | |\n", "| approx_kl | 0.084277906 |\n", "| clip_fraction | 0.399 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.798 |\n", "| explained_variance | 0.933 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00168 |\n", "| policy_gradient_loss | -0.0618 |\n", "| value_loss | 0.26 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 819 |\n", "| ep_rew_mean | 80.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 322 |\n", "| time_elapsed | 4643 |\n", "| total_timesteps | 659456 |\n", "| train/ | |\n", "| approx_kl | 0.07468955 |\n", "| clip_fraction | 0.374 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.779 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0509 |\n", "| policy_gradient_loss | -0.0585 |\n", "| value_loss | 0.234 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 817 |\n", "| ep_rew_mean | 79.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 323 |\n", "| time_elapsed | 4657 |\n", "| total_timesteps | 661504 |\n", "| train/ | |\n", "| approx_kl | 0.07765938 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.825 |\n", "| explained_variance | 0.915 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.032 |\n", "| policy_gradient_loss | -0.0545 |\n", "| value_loss | 0.227 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 823 |\n", "| ep_rew_mean | 79.4 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 324 |\n", "| time_elapsed | 4671 |\n", "| total_timesteps | 663552 |\n", "| train/ | |\n", "| approx_kl | 0.09598839 |\n", "| clip_fraction | 0.36 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.772 |\n", "| explained_variance | 0.942 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0641 |\n", "| policy_gradient_loss | -0.0551 |\n", "| value_loss | 0.281 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 820 |\n", "| ep_rew_mean | 78.6 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 325 |\n", "| time_elapsed | 4685 |\n", "| total_timesteps | 665600 |\n", "| train/ | |\n", "| approx_kl | 0.09120271 |\n", "| clip_fraction | 0.414 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.915 |\n", "| explained_variance | 0.925 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0758 |\n", "| policy_gradient_loss | -0.0635 |\n", "| value_loss | 0.229 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 77.8 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 326 |\n", "| time_elapsed | 4699 |\n", "| total_timesteps | 667648 |\n", "| train/ | |\n", "| approx_kl | 0.10672728 |\n", "| clip_fraction | 0.45 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.928 |\n", "| explained_variance | 0.838 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0259 |\n", "| policy_gradient_loss | -0.0412 |\n", "| value_loss | 0.539 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 808 |\n", "| ep_rew_mean | 77.3 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 327 |\n", "| time_elapsed | 4713 |\n", "| total_timesteps | 669696 |\n", "| train/ | |\n", "| approx_kl | 0.09779258 |\n", "| clip_fraction | 0.381 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.802 |\n", "| explained_variance | 0.807 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.204 |\n", "| policy_gradient_loss | -0.0458 |\n", "| value_loss | 0.476 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 75 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 328 |\n", "| time_elapsed | 4727 |\n", "| total_timesteps | 671744 |\n", "| train/ | |\n", "| approx_kl | 0.08577522 |\n", "| clip_fraction | 0.38 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.833 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0991 |\n", "| policy_gradient_loss | -0.053 |\n", "| value_loss | 0.313 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 74.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 329 |\n", "| time_elapsed | 4741 |\n", "| total_timesteps | 673792 |\n", "| train/ | |\n", "| approx_kl | 0.09775613 |\n", "| clip_fraction | 0.324 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.72 |\n", "| explained_variance | 0.906 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0424 |\n", "| policy_gradient_loss | -0.0532 |\n", "| value_loss | 0.34 |\n", "----------------------------------------\n", "Eval num_timesteps=675000, episode_reward=41.80 +/- 5.72\n", "Episode length: 572.60 +/- 45.77\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 573 |\n", "| mean_reward | 41.8 |\n", "| time/ | |\n", "| total_timesteps | 675000 |\n", "| train/ | |\n", "| approx_kl | 0.11765081 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.743 |\n", "| explained_variance | 0.899 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0304 |\n", "| policy_gradient_loss | -0.047 |\n", "| value_loss | 0.467 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 73.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 330 |\n", "| time_elapsed | 4769 |\n", "| total_timesteps | 675840 |\n", "---------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 72.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 331 |\n", "| time_elapsed | 4783 |\n", "| total_timesteps | 677888 |\n", "| train/ | |\n", "| approx_kl | 0.123655856 |\n", "| clip_fraction | 0.408 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.856 |\n", "| explained_variance | 0.914 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0054 |\n", "| policy_gradient_loss | -0.0596 |\n", "| value_loss | 0.246 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 771 |\n", "| ep_rew_mean | 69 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 332 |\n", "| time_elapsed | 4797 |\n", "| total_timesteps | 679936 |\n", "| train/ | |\n", "| approx_kl | 0.093154036 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.814 |\n", "| explained_variance | 0.909 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00285 |\n", "| policy_gradient_loss | -0.0586 |\n", "| value_loss | 0.311 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 765 |\n", "| ep_rew_mean | 66.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 333 |\n", "| time_elapsed | 4811 |\n", "| total_timesteps | 681984 |\n", "| train/ | |\n", "| approx_kl | 0.109328866 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.687 |\n", "| explained_variance | 0.939 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0185 |\n", "| policy_gradient_loss | -0.048 |\n", "| value_loss | 0.257 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 62 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 334 |\n", "| time_elapsed | 4825 |\n", "| total_timesteps | 684032 |\n", "| train/ | |\n", "| approx_kl | 0.094571054 |\n", "| clip_fraction | 0.36 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.802 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0247 |\n", "| policy_gradient_loss | -0.0512 |\n", "| value_loss | 0.213 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 62.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 335 |\n", "| time_elapsed | 4839 |\n", "| total_timesteps | 686080 |\n", "| train/ | |\n", "| approx_kl | 0.0805347 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.726 |\n", "| explained_variance | 0.957 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.02 |\n", "| policy_gradient_loss | -0.0501 |\n", "| value_loss | 0.202 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 750 |\n", "| ep_rew_mean | 61.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 336 |\n", "| time_elapsed | 4853 |\n", "| total_timesteps | 688128 |\n", "| train/ | |\n", "| approx_kl | 0.124914706 |\n", "| clip_fraction | 0.423 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.821 |\n", "| explained_variance | 0.933 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0526 |\n", "| policy_gradient_loss | -0.0686 |\n", "| value_loss | 0.227 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 61.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 337 |\n", "| time_elapsed | 4867 |\n", "| total_timesteps | 690176 |\n", "| train/ | |\n", "| approx_kl | 0.11365993 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.704 |\n", "| explained_variance | 0.959 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0226 |\n", "| policy_gradient_loss | -0.0628 |\n", "| value_loss | 0.202 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 60.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 338 |\n", "| time_elapsed | 4881 |\n", "| total_timesteps | 692224 |\n", "| train/ | |\n", "| approx_kl | 0.13683085 |\n", "| clip_fraction | 0.431 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.79 |\n", "| explained_variance | 0.931 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0404 |\n", "| policy_gradient_loss | -0.073 |\n", "| value_loss | 0.293 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 752 |\n", "| ep_rew_mean | 60.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 339 |\n", "| time_elapsed | 4895 |\n", "| total_timesteps | 694272 |\n", "| train/ | |\n", "| approx_kl | 0.12813875 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.701 |\n", "| explained_variance | 0.944 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0125 |\n", "| policy_gradient_loss | -0.0614 |\n", "| value_loss | 0.254 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 759 |\n", "| ep_rew_mean | 60.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 340 |\n", "| time_elapsed | 4909 |\n", "| total_timesteps | 696320 |\n", "| train/ | |\n", "| approx_kl | 0.09842213 |\n", "| clip_fraction | 0.373 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.801 |\n", "| explained_variance | 0.958 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0336 |\n", "| policy_gradient_loss | -0.0576 |\n", "| value_loss | 0.189 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 758 |\n", "| ep_rew_mean | 59.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 341 |\n", "| time_elapsed | 4923 |\n", "| total_timesteps | 698368 |\n", "| train/ | |\n", "| approx_kl | 0.12130609 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.733 |\n", "| explained_variance | 0.94 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0022 |\n", "| policy_gradient_loss | -0.0548 |\n", "| value_loss | 0.236 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 58.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 342 |\n", "| time_elapsed | 4937 |\n", "| total_timesteps | 700416 |\n", "| train/ | |\n", "| approx_kl | 0.2279495 |\n", "| clip_fraction | 0.397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.691 |\n", "| explained_variance | 0.951 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00163 |\n", "| policy_gradient_loss | -0.057 |\n", "| value_loss | 0.228 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 756 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 343 |\n", "| time_elapsed | 4951 |\n", "| total_timesteps | 702464 |\n", "| train/ | |\n", "| approx_kl | 0.15320998 |\n", "| clip_fraction | 0.357 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.662 |\n", "| explained_variance | 0.74 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0805 |\n", "| policy_gradient_loss | -0.0347 |\n", "| value_loss | 0.619 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 760 |\n", "| ep_rew_mean | 58.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 344 |\n", "| time_elapsed | 4964 |\n", "| total_timesteps | 704512 |\n", "| train/ | |\n", "| approx_kl | 0.15027474 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.754 |\n", "| explained_variance | 0.899 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00548 |\n", "| policy_gradient_loss | -0.0574 |\n", "| value_loss | 0.263 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 762 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 345 |\n", "| time_elapsed | 4978 |\n", "| total_timesteps | 706560 |\n", "| train/ | |\n", "| approx_kl | 0.23094313 |\n", "| clip_fraction | 0.41 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.652 |\n", "| explained_variance | 0.89 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0289 |\n", "| policy_gradient_loss | -0.0591 |\n", "| value_loss | 0.318 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 762 |\n", "| ep_rew_mean | 56.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 346 |\n", "| time_elapsed | 4992 |\n", "| total_timesteps | 708608 |\n", "| train/ | |\n", "| approx_kl | 0.2064158 |\n", "| clip_fraction | 0.4 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.639 |\n", "| explained_variance | 0.862 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0792 |\n", "| policy_gradient_loss | -0.0575 |\n", "| value_loss | 0.368 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 761 |\n", "| ep_rew_mean | 56 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 347 |\n", "| time_elapsed | 5006 |\n", "| total_timesteps | 710656 |\n", "| train/ | |\n", "| approx_kl | 0.19109753 |\n", "| clip_fraction | 0.377 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.616 |\n", "| explained_variance | 0.939 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0523 |\n", "| policy_gradient_loss | -0.0572 |\n", "| value_loss | 0.229 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 769 |\n", "| ep_rew_mean | 56.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 348 |\n", "| time_elapsed | 5020 |\n", "| total_timesteps | 712704 |\n", "| train/ | |\n", "| approx_kl | 0.15536407 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.619 |\n", "| explained_variance | 0.846 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0595 |\n", "| policy_gradient_loss | -0.0492 |\n", "| value_loss | 0.514 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 773 |\n", "| ep_rew_mean | 57.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 349 |\n", "| time_elapsed | 5034 |\n", "| total_timesteps | 714752 |\n", "| train/ | |\n", "| approx_kl | 0.20184529 |\n", "| clip_fraction | 0.412 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.67 |\n", "| explained_variance | 0.92 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0338 |\n", "| policy_gradient_loss | -0.0573 |\n", "| value_loss | 0.229 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 781 |\n", "| ep_rew_mean | 57.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 350 |\n", "| time_elapsed | 5048 |\n", "| total_timesteps | 716800 |\n", "| train/ | |\n", "| approx_kl | 0.21323843 |\n", "| clip_fraction | 0.447 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.746 |\n", "| explained_variance | 0.895 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0434 |\n", "| policy_gradient_loss | -0.0669 |\n", "| value_loss | 0.256 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 788 |\n", "| ep_rew_mean | 58.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 351 |\n", "| time_elapsed | 5062 |\n", "| total_timesteps | 718848 |\n", "| train/ | |\n", "| approx_kl | 0.23458937 |\n", "| clip_fraction | 0.46 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.905 |\n", "| explained_variance | 0.909 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0998 |\n", "| policy_gradient_loss | -0.0635 |\n", "| value_loss | 0.177 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 59.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 352 |\n", "| time_elapsed | 5076 |\n", "| total_timesteps | 720896 |\n", "| train/ | |\n", "| approx_kl | 0.15706933 |\n", "| clip_fraction | 0.422 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.792 |\n", "| explained_variance | 0.888 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0207 |\n", "| policy_gradient_loss | -0.0628 |\n", "| value_loss | 0.258 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 60.4 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 353 |\n", "| time_elapsed | 5090 |\n", "| total_timesteps | 722944 |\n", "| train/ | |\n", "| approx_kl | 0.27062505 |\n", "| clip_fraction | 0.413 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.683 |\n", "| explained_variance | 0.816 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0305 |\n", "| policy_gradient_loss | -0.0521 |\n", "| value_loss | 0.649 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 61.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 354 |\n", "| time_elapsed | 5104 |\n", "| total_timesteps | 724992 |\n", "| train/ | |\n", "| approx_kl | 0.17802241 |\n", "| clip_fraction | 0.39 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.722 |\n", "| explained_variance | 0.668 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.247 |\n", "| policy_gradient_loss | -0.0231 |\n", "| value_loss | 1.38 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 61.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 355 |\n", "| time_elapsed | 5118 |\n", "| total_timesteps | 727040 |\n", "| train/ | |\n", "| approx_kl | 0.1866923 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.662 |\n", "| explained_variance | 0.735 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0424 |\n", "| policy_gradient_loss | -0.037 |\n", "| value_loss | 0.886 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 802 |\n", "| ep_rew_mean | 62 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 356 |\n", "| time_elapsed | 5132 |\n", "| total_timesteps | 729088 |\n", "| train/ | |\n", "| approx_kl | 0.15238863 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.715 |\n", "| explained_variance | 0.756 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0621 |\n", "| policy_gradient_loss | -0.0432 |\n", "| value_loss | 0.615 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 810 |\n", "| ep_rew_mean | 63.9 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 357 |\n", "| time_elapsed | 5146 |\n", "| total_timesteps | 731136 |\n", "| train/ | |\n", "| approx_kl | 0.21199457 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.691 |\n", "| explained_variance | 0.899 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00281 |\n", "| policy_gradient_loss | -0.0416 |\n", "| value_loss | 0.433 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 816 |\n", "| ep_rew_mean | 66.7 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 358 |\n", "| time_elapsed | 5160 |\n", "| total_timesteps | 733184 |\n", "| train/ | |\n", "| approx_kl | 0.70963174 |\n", "| clip_fraction | 0.602 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.572 |\n", "| explained_variance | 0.122 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.618 |\n", "| policy_gradient_loss | 0.0644 |\n", "| value_loss | 14.5 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67.1 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 359 |\n", "| time_elapsed | 5174 |\n", "| total_timesteps | 735232 |\n", "| train/ | |\n", "| approx_kl | 0.22513458 |\n", "| clip_fraction | 0.413 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.451 |\n", "| explained_variance | 0.488 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.183 |\n", "| policy_gradient_loss | 0.023 |\n", "| value_loss | 1.95 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 360 |\n", "| time_elapsed | 5188 |\n", "| total_timesteps | 737280 |\n", "| train/ | |\n", "| approx_kl | 0.49808502 |\n", "| clip_fraction | 0.391 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.492 |\n", "| explained_variance | 0.402 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0257 |\n", "| policy_gradient_loss | -0.0252 |\n", "| value_loss | 1.08 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 361 |\n", "| time_elapsed | 5202 |\n", "| total_timesteps | 739328 |\n", "| train/ | |\n", "| approx_kl | 0.19948304 |\n", "| clip_fraction | 0.554 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.676 |\n", "| explained_variance | 0.4 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.37 |\n", "| policy_gradient_loss | 0.0389 |\n", "| value_loss | 9.58 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 362 |\n", "| time_elapsed | 5216 |\n", "| total_timesteps | 741376 |\n", "| train/ | |\n", "| approx_kl | 0.057439484 |\n", "| clip_fraction | 0.398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.823 |\n", "| explained_variance | -0.714 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00457 |\n", "| policy_gradient_loss | 0.0122 |\n", "| value_loss | 0.0761 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 363 |\n", "| time_elapsed | 5230 |\n", "| total_timesteps | 743424 |\n", "| train/ | |\n", "| approx_kl | 0.020314857 |\n", "| clip_fraction | 0.261 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.88 |\n", "| explained_variance | -0.117 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0108 |\n", "| policy_gradient_loss | -0.0021 |\n", "| value_loss | 0.0518 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 364 |\n", "| time_elapsed | 5244 |\n", "| total_timesteps | 745472 |\n", "| train/ | |\n", "| approx_kl | 0.044410482 |\n", "| clip_fraction | 0.269 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.864 |\n", "| explained_variance | -0.0301 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00426 |\n", "| policy_gradient_loss | -0.0069 |\n", "| value_loss | 0.0474 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 365 |\n", "| time_elapsed | 5258 |\n", "| total_timesteps | 747520 |\n", "| train/ | |\n", "| approx_kl | 0.061541207 |\n", "| clip_fraction | 0.317 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.916 |\n", "| explained_variance | -0.577 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0545 |\n", "| policy_gradient_loss | -0.0149 |\n", "| value_loss | 0.0271 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 142 |\n", "| iterations | 366 |\n", "| time_elapsed | 5272 |\n", "| total_timesteps | 749568 |\n", "| train/ | |\n", "| approx_kl | 0.03265925 |\n", "| clip_fraction | 0.278 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.945 |\n", "| explained_variance | -1.57 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0431 |\n", "| policy_gradient_loss | -0.0198 |\n", "| value_loss | 0.0161 |\n", "----------------------------------------\n", "Eval num_timesteps=750000, episode_reward=104.50 +/- 48.81\n", "Episode length: 899.70 +/- 202.33\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 900 |\n", "| mean_reward | 104 |\n", "| time/ | |\n", "| total_timesteps | 750000 |\n", "| train/ | |\n", "| approx_kl | 0.03056224 |\n", "| clip_fraction | 0.238 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.959 |\n", "| explained_variance | -2.11 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0236 |\n", "| policy_gradient_loss | -0.0211 |\n", "| value_loss | 0.0125 |\n", "----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 67 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 367 |\n", "| time_elapsed | 5311 |\n", "| total_timesteps | 751616 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 68.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 368 |\n", "| time_elapsed | 5328 |\n", "| total_timesteps | 753664 |\n", "| train/ | |\n", "| approx_kl | 0.03360254 |\n", "| clip_fraction | 0.244 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.9 |\n", "| explained_variance | 0.602 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00841 |\n", "| policy_gradient_loss | -0.0271 |\n", "| value_loss | 0.0489 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 974 |\n", "| ep_rew_mean | 69.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 369 |\n", "| time_elapsed | 5342 |\n", "| total_timesteps | 755712 |\n", "| train/ | |\n", "| approx_kl | 0.2092076 |\n", "| clip_fraction | 0.421 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.638 |\n", "| explained_variance | 0.821 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00541 |\n", "| policy_gradient_loss | -0.0515 |\n", "| value_loss | 0.543 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 978 |\n", "| ep_rew_mean | 70 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 370 |\n", "| time_elapsed | 5356 |\n", "| total_timesteps | 757760 |\n", "| train/ | |\n", "| approx_kl | 0.16421121 |\n", "| clip_fraction | 0.429 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.711 |\n", "| explained_variance | 0.568 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.207 |\n", "| policy_gradient_loss | -0.0343 |\n", "| value_loss | 1.48 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 983 |\n", "| ep_rew_mean | 70.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 371 |\n", "| time_elapsed | 5370 |\n", "| total_timesteps | 759808 |\n", "| train/ | |\n", "| approx_kl | 0.17415446 |\n", "| clip_fraction | 0.424 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.699 |\n", "| explained_variance | 0.754 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0487 |\n", "| policy_gradient_loss | -0.0516 |\n", "| value_loss | 0.603 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 987 |\n", "| ep_rew_mean | 71.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 372 |\n", "| time_elapsed | 5383 |\n", "| total_timesteps | 761856 |\n", "| train/ | |\n", "| approx_kl | 0.11751805 |\n", "| clip_fraction | 0.4 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.758 |\n", "| explained_variance | 0.732 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0892 |\n", "| policy_gradient_loss | -0.0538 |\n", "| value_loss | 0.437 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 995 |\n", "| ep_rew_mean | 73.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 373 |\n", "| time_elapsed | 5397 |\n", "| total_timesteps | 763904 |\n", "| train/ | |\n", "| approx_kl | 0.20681235 |\n", "| clip_fraction | 0.431 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.757 |\n", "| explained_variance | 0.821 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0502 |\n", "| policy_gradient_loss | -0.0526 |\n", "| value_loss | 0.531 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 76.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 374 |\n", "| time_elapsed | 5411 |\n", "| total_timesteps | 765952 |\n", "| train/ | |\n", "| approx_kl | 0.3240975 |\n", "| clip_fraction | 0.525 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.549 |\n", "| explained_variance | 0.455 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.04 |\n", "| policy_gradient_loss | 0.052 |\n", "| value_loss | 7.88 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 76.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 375 |\n", "| time_elapsed | 5426 |\n", "| total_timesteps | 768000 |\n", "| train/ | |\n", "| approx_kl | 0.41297093 |\n", "| clip_fraction | 0.506 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.48 |\n", "| explained_variance | 0.603 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.865 |\n", "| policy_gradient_loss | 0.0628 |\n", "| value_loss | 12.2 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 77 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 376 |\n", "| time_elapsed | 5440 |\n", "| total_timesteps | 770048 |\n", "| train/ | |\n", "| approx_kl | 0.4028127 |\n", "| clip_fraction | 0.389 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.466 |\n", "| explained_variance | 0.725 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.153 |\n", "| policy_gradient_loss | -0.0184 |\n", "| value_loss | 1.74 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 79.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 377 |\n", "| time_elapsed | 5454 |\n", "| total_timesteps | 772096 |\n", "| train/ | |\n", "| approx_kl | 0.30015093 |\n", "| clip_fraction | 0.351 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.467 |\n", "| explained_variance | 0.757 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.359 |\n", "| policy_gradient_loss | -0.0108 |\n", "| value_loss | 1.82 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 80 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 378 |\n", "| time_elapsed | 5468 |\n", "| total_timesteps | 774144 |\n", "| train/ | |\n", "| approx_kl | 0.4139573 |\n", "| clip_fraction | 0.447 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.412 |\n", "| explained_variance | 0.516 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.731 |\n", "| policy_gradient_loss | 0.0254 |\n", "| value_loss | 23 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 80 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 379 |\n", "| time_elapsed | 5482 |\n", "| total_timesteps | 776192 |\n", "| train/ | |\n", "| approx_kl | 0.39012295 |\n", "| clip_fraction | 0.384 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.408 |\n", "| explained_variance | 0.776 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.12 |\n", "| policy_gradient_loss | 0.00896 |\n", "| value_loss | 9.96 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 82.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 380 |\n", "| time_elapsed | 5496 |\n", "| total_timesteps | 778240 |\n", "| train/ | |\n", "| approx_kl | 0.21037388 |\n", "| clip_fraction | 0.329 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.399 |\n", "| explained_variance | 0.637 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.184 |\n", "| policy_gradient_loss | -0.00195 |\n", "| value_loss | 4.27 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 83.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 381 |\n", "| time_elapsed | 5510 |\n", "| total_timesteps | 780288 |\n", "| train/ | |\n", "| approx_kl | 0.28686368 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.418 |\n", "| explained_variance | 0.705 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.735 |\n", "| policy_gradient_loss | 0.0153 |\n", "| value_loss | 7.07 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.02e+03 |\n", "| ep_rew_mean | 83.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 382 |\n", "| time_elapsed | 5524 |\n", "| total_timesteps | 782336 |\n", "| train/ | |\n", "| approx_kl | 0.16726947 |\n", "| clip_fraction | 0.336 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.491 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.388 |\n", "| policy_gradient_loss | 0.00169 |\n", "| value_loss | 3.86 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.02e+03 |\n", "| ep_rew_mean | 83.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 383 |\n", "| time_elapsed | 5538 |\n", "| total_timesteps | 784384 |\n", "| train/ | |\n", "| approx_kl | 0.34179872 |\n", "| clip_fraction | 0.397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.53 |\n", "| explained_variance | 0.84 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.136 |\n", "| policy_gradient_loss | -0.0151 |\n", "| value_loss | 2.03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.02e+03 |\n", "| ep_rew_mean | 82.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 384 |\n", "| time_elapsed | 5552 |\n", "| total_timesteps | 786432 |\n", "| train/ | |\n", "| approx_kl | 0.23733148 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.48 |\n", "| explained_variance | 0.642 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.209 |\n", "| policy_gradient_loss | -0.0119 |\n", "| value_loss | 2.18 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.02e+03 |\n", "| ep_rew_mean | 84.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 385 |\n", "| time_elapsed | 5566 |\n", "| total_timesteps | 788480 |\n", "| train/ | |\n", "| approx_kl | 0.3218306 |\n", "| clip_fraction | 0.387 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.557 |\n", "| explained_variance | 0.791 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.281 |\n", "| policy_gradient_loss | -0.0232 |\n", "| value_loss | 1.94 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 84.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 386 |\n", "| time_elapsed | 5580 |\n", "| total_timesteps | 790528 |\n", "| train/ | |\n", "| approx_kl | 0.1633209 |\n", "| clip_fraction | 0.411 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.569 |\n", "| explained_variance | 0.686 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.823 |\n", "| policy_gradient_loss | 0.00719 |\n", "| value_loss | 5.78 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 85.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 387 |\n", "| time_elapsed | 5594 |\n", "| total_timesteps | 792576 |\n", "| train/ | |\n", "| approx_kl | 0.22367734 |\n", "| clip_fraction | 0.345 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.496 |\n", "| explained_variance | 0.722 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.281 |\n", "| policy_gradient_loss | -0.0154 |\n", "| value_loss | 3.91 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 993 |\n", "| ep_rew_mean | 86.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 388 |\n", "| time_elapsed | 5608 |\n", "| total_timesteps | 794624 |\n", "| train/ | |\n", "| approx_kl | 0.12060967 |\n", "| clip_fraction | 0.282 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.423 |\n", "| explained_variance | 0.841 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.737 |\n", "| policy_gradient_loss | -0.0108 |\n", "| value_loss | 3.06 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 991 |\n", "| ep_rew_mean | 86.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 389 |\n", "| time_elapsed | 5621 |\n", "| total_timesteps | 796672 |\n", "| train/ | |\n", "| approx_kl | 0.17049566 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.488 |\n", "| explained_variance | 0.851 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.272 |\n", "| policy_gradient_loss | -0.0145 |\n", "| value_loss | 3.77 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 993 |\n", "| ep_rew_mean | 89 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 390 |\n", "| time_elapsed | 5635 |\n", "| total_timesteps | 798720 |\n", "| train/ | |\n", "| approx_kl | 0.13605762 |\n", "| clip_fraction | 0.314 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.567 |\n", "| explained_variance | 0.876 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.233 |\n", "| policy_gradient_loss | -0.0144 |\n", "| value_loss | 2.28 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 996 |\n", "| ep_rew_mean | 91.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 391 |\n", "| time_elapsed | 5649 |\n", "| total_timesteps | 800768 |\n", "| train/ | |\n", "| approx_kl | 0.1651541 |\n", "| clip_fraction | 0.332 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.504 |\n", "| explained_variance | 0.863 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.206 |\n", "| policy_gradient_loss | 0.0109 |\n", "| value_loss | 4.43 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 991 |\n", "| ep_rew_mean | 90.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 392 |\n", "| time_elapsed | 5663 |\n", "| total_timesteps | 802816 |\n", "| train/ | |\n", "| approx_kl | 0.28214163 |\n", "| clip_fraction | 0.333 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.348 |\n", "| explained_variance | 0.811 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.32 |\n", "| policy_gradient_loss | 0.0441 |\n", "| value_loss | 7.12 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 991 |\n", "| ep_rew_mean | 91.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 393 |\n", "| time_elapsed | 5677 |\n", "| total_timesteps | 804864 |\n", "| train/ | |\n", "| approx_kl | 0.21800207 |\n", "| clip_fraction | 0.438 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.598 |\n", "| explained_variance | 0.597 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.03 |\n", "| policy_gradient_loss | 0.0187 |\n", "| value_loss | 4.5 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 994 |\n", "| ep_rew_mean | 95.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 394 |\n", "| time_elapsed | 5691 |\n", "| total_timesteps | 806912 |\n", "| train/ | |\n", "| approx_kl | 0.118542634 |\n", "| clip_fraction | 0.33 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.536 |\n", "| explained_variance | 0.924 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.226 |\n", "| policy_gradient_loss | -0.0208 |\n", "| value_loss | 1.61 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 989 |\n", "| ep_rew_mean | 94.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 395 |\n", "| time_elapsed | 5706 |\n", "| total_timesteps | 808960 |\n", "| train/ | |\n", "| approx_kl | 0.88899064 |\n", "| clip_fraction | 0.578 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.508 |\n", "| explained_variance | 0.493 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.13 |\n", "| policy_gradient_loss | 0.0891 |\n", "| value_loss | 29.9 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 982 |\n", "| ep_rew_mean | 96.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 396 |\n", "| time_elapsed | 5719 |\n", "| total_timesteps | 811008 |\n", "| train/ | |\n", "| approx_kl | 0.22632536 |\n", "| clip_fraction | 0.351 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.491 |\n", "| explained_variance | 0.719 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.11 |\n", "| policy_gradient_loss | 0.0212 |\n", "| value_loss | 3.82 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 99.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 397 |\n", "| time_elapsed | 5734 |\n", "| total_timesteps | 813056 |\n", "| train/ | |\n", "| approx_kl | 0.33769712 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.468 |\n", "| explained_variance | 0.268 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.4 |\n", "| policy_gradient_loss | 0.0568 |\n", "| value_loss | 23.6 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 99.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 398 |\n", "| time_elapsed | 5748 |\n", "| total_timesteps | 815104 |\n", "| train/ | |\n", "| approx_kl | 0.23381087 |\n", "| clip_fraction | 0.329 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.397 |\n", "| explained_variance | 0.61 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.2 |\n", "| policy_gradient_loss | 0.0328 |\n", "| value_loss | 18.7 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 958 |\n", "| ep_rew_mean | 98.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 399 |\n", "| time_elapsed | 5762 |\n", "| total_timesteps | 817152 |\n", "| train/ | |\n", "| approx_kl | 0.1882815 |\n", "| clip_fraction | 0.331 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.497 |\n", "| explained_variance | 0.779 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.425 |\n", "| policy_gradient_loss | 0.00811 |\n", "| value_loss | 3.62 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 952 |\n", "| ep_rew_mean | 97.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 400 |\n", "| time_elapsed | 5776 |\n", "| total_timesteps | 819200 |\n", "| train/ | |\n", "| approx_kl | 0.19722398 |\n", "| clip_fraction | 0.426 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.634 |\n", "| explained_variance | 0.603 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.163 |\n", "| policy_gradient_loss | -0.0114 |\n", "| value_loss | 1.54 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 952 |\n", "| ep_rew_mean | 101 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 401 |\n", "| time_elapsed | 5790 |\n", "| total_timesteps | 821248 |\n", "| train/ | |\n", "| approx_kl | 0.22470623 |\n", "| clip_fraction | 0.517 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.619 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.49 |\n", "| policy_gradient_loss | -0.00607 |\n", "| value_loss | 5.07 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 926 |\n", "| ep_rew_mean | 97.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 402 |\n", "| time_elapsed | 5804 |\n", "| total_timesteps | 823296 |\n", "| train/ | |\n", "| approx_kl | 4.4365206 |\n", "| clip_fraction | 0.597 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.294 |\n", "| explained_variance | 0.555 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.01 |\n", "| policy_gradient_loss | 0.162 |\n", "| value_loss | 33.5 |\n", "---------------------------------------\n", "Eval num_timesteps=825000, episode_reward=17.10 +/- 5.80\n", "Episode length: 580.20 +/- 89.35\n", "---------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 580 |\n", "| mean_reward | 17.1 |\n", "| time/ | |\n", "| total_timesteps | 825000 |\n", "| train/ | |\n", "| approx_kl | 2.7867966 |\n", "| clip_fraction | 0.439 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.245 |\n", "| explained_variance | 0.764 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.183 |\n", "| policy_gradient_loss | 0.0252 |\n", "| value_loss | 1.17 |\n", "---------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 911 |\n", "| ep_rew_mean | 93 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 403 |\n", "| time_elapsed | 5833 |\n", "| total_timesteps | 825344 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 752 |\n", "| ep_rew_mean | 90.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 404 |\n", "| time_elapsed | 5847 |\n", "| total_timesteps | 827392 |\n", "| train/ | |\n", "| approx_kl | 0.34639728 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.407 |\n", "| explained_variance | 0.74 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0232 |\n", "| policy_gradient_loss | -0.0125 |\n", "| value_loss | 0.679 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 746 |\n", "| ep_rew_mean | 89.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 405 |\n", "| time_elapsed | 5860 |\n", "| total_timesteps | 829440 |\n", "| train/ | |\n", "| approx_kl | 0.25869176 |\n", "| clip_fraction | 0.357 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.463 |\n", "| explained_variance | 0.859 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0662 |\n", "| policy_gradient_loss | -0.012 |\n", "| value_loss | 0.469 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 740 |\n", "| ep_rew_mean | 89.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 406 |\n", "| time_elapsed | 5874 |\n", "| total_timesteps | 831488 |\n", "| train/ | |\n", "| approx_kl | 0.15037423 |\n", "| clip_fraction | 0.371 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.591 |\n", "| explained_variance | 0.81 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00435 |\n", "| policy_gradient_loss | -0.0212 |\n", "| value_loss | 0.455 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 733 |\n", "| ep_rew_mean | 88.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 407 |\n", "| time_elapsed | 5888 |\n", "| total_timesteps | 833536 |\n", "| train/ | |\n", "| approx_kl | 0.1830619 |\n", "| clip_fraction | 0.47 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.583 |\n", "| explained_variance | 0.226 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.224 |\n", "| policy_gradient_loss | 0.0195 |\n", "| value_loss | 5.02 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 720 |\n", "| ep_rew_mean | 85 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 408 |\n", "| time_elapsed | 5902 |\n", "| total_timesteps | 835584 |\n", "| train/ | |\n", "| approx_kl | 0.13190795 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.659 |\n", "| explained_variance | 0.605 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.017 |\n", "| policy_gradient_loss | -0.0296 |\n", "| value_loss | 0.74 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 84.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 409 |\n", "| time_elapsed | 5916 |\n", "| total_timesteps | 837632 |\n", "| train/ | |\n", "| approx_kl | 0.12244979 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.579 |\n", "| explained_variance | 0.531 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0475 |\n", "| policy_gradient_loss | -0.0269 |\n", "| value_loss | 1.04 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 722 |\n", "| ep_rew_mean | 84.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 410 |\n", "| time_elapsed | 5930 |\n", "| total_timesteps | 839680 |\n", "| train/ | |\n", "| approx_kl | 0.5837582 |\n", "| clip_fraction | 0.311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.301 |\n", "| explained_variance | 0.27 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12.3 |\n", "| policy_gradient_loss | 0.0952 |\n", "| value_loss | 24.9 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 83.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 411 |\n", "| time_elapsed | 5944 |\n", "| total_timesteps | 841728 |\n", "| train/ | |\n", "| approx_kl | 0.25675306 |\n", "| clip_fraction | 0.398 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.582 |\n", "| explained_variance | 0.548 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0195 |\n", "| policy_gradient_loss | -0.0175 |\n", "| value_loss | 0.748 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 81.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 412 |\n", "| time_elapsed | 5958 |\n", "| total_timesteps | 843776 |\n", "| train/ | |\n", "| approx_kl | 0.23823817 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.491 |\n", "| explained_variance | 0.45 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.105 |\n", "| policy_gradient_loss | -0.0154 |\n", "| value_loss | 0.961 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 80.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 413 |\n", "| time_elapsed | 5972 |\n", "| total_timesteps | 845824 |\n", "| train/ | |\n", "| approx_kl | 0.1857103 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.524 |\n", "| explained_variance | 0.525 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.14 |\n", "| policy_gradient_loss | -0.00387 |\n", "| value_loss | 1.56 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 717 |\n", "| ep_rew_mean | 80.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 414 |\n", "| time_elapsed | 5986 |\n", "| total_timesteps | 847872 |\n", "| train/ | |\n", "| approx_kl | 0.14656654 |\n", "| clip_fraction | 0.35 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.6 |\n", "| explained_variance | 0.545 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0247 |\n", "| policy_gradient_loss | -0.0192 |\n", "| value_loss | 0.976 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 80.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 415 |\n", "| time_elapsed | 6000 |\n", "| total_timesteps | 849920 |\n", "| train/ | |\n", "| approx_kl | 0.1246667 |\n", "| clip_fraction | 0.39 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.422 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.167 |\n", "| policy_gradient_loss | 0.00697 |\n", "| value_loss | 3.83 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 713 |\n", "| ep_rew_mean | 81 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 416 |\n", "| time_elapsed | 6014 |\n", "| total_timesteps | 851968 |\n", "| train/ | |\n", "| approx_kl | 0.1442353 |\n", "| clip_fraction | 0.407 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.685 |\n", "| explained_variance | 0.633 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.102 |\n", "| policy_gradient_loss | -0.012 |\n", "| value_loss | 2.11 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 710 |\n", "| ep_rew_mean | 80.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 417 |\n", "| time_elapsed | 6028 |\n", "| total_timesteps | 854016 |\n", "| train/ | |\n", "| approx_kl | 0.1553421 |\n", "| clip_fraction | 0.438 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.755 |\n", "| explained_variance | 0.374 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.676 |\n", "| policy_gradient_loss | -0.00111 |\n", "| value_loss | 5.04 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 81.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 418 |\n", "| time_elapsed | 6042 |\n", "| total_timesteps | 856064 |\n", "| train/ | |\n", "| approx_kl | 0.12162173 |\n", "| clip_fraction | 0.389 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.695 |\n", "| explained_variance | 0.625 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0855 |\n", "| policy_gradient_loss | -0.0303 |\n", "| value_loss | 1.09 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 718 |\n", "| ep_rew_mean | 81.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 419 |\n", "| time_elapsed | 6056 |\n", "| total_timesteps | 858112 |\n", "| train/ | |\n", "| approx_kl | 0.1174871 |\n", "| clip_fraction | 0.441 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.762 |\n", "| explained_variance | 0.418 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.422 |\n", "| policy_gradient_loss | 0.0113 |\n", "| value_loss | 2.84 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 716 |\n", "| ep_rew_mean | 79.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 420 |\n", "| time_elapsed | 6070 |\n", "| total_timesteps | 860160 |\n", "| train/ | |\n", "| approx_kl | 0.061987754 |\n", "| clip_fraction | 0.373 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.935 |\n", "| explained_variance | 0.687 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.137 |\n", "| policy_gradient_loss | -0.0145 |\n", "| value_loss | 1.26 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 78.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 421 |\n", "| time_elapsed | 6084 |\n", "| total_timesteps | 862208 |\n", "| train/ | |\n", "| approx_kl | 0.08781962 |\n", "| clip_fraction | 0.365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.737 |\n", "| explained_variance | 0.656 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.212 |\n", "| policy_gradient_loss | -0.0105 |\n", "| value_loss | 1.73 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 78.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 422 |\n", "| time_elapsed | 6098 |\n", "| total_timesteps | 864256 |\n", "| train/ | |\n", "| approx_kl | 0.11027957 |\n", "| clip_fraction | 0.432 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.918 |\n", "| explained_variance | 0.487 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0158 |\n", "| policy_gradient_loss | -0.00727 |\n", "| value_loss | 0.718 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 78.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 423 |\n", "| time_elapsed | 6112 |\n", "| total_timesteps | 866304 |\n", "| train/ | |\n", "| approx_kl | 0.017539665 |\n", "| clip_fraction | 0.248 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | -0.643 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0132 |\n", "| policy_gradient_loss | -0.0042 |\n", "| value_loss | 0.0479 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 78.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 424 |\n", "| time_elapsed | 6125 |\n", "| total_timesteps | 868352 |\n", "| train/ | |\n", "| approx_kl | 0.021948839 |\n", "| clip_fraction | 0.251 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.436 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0253 |\n", "| policy_gradient_loss | -0.00577 |\n", "| value_loss | 0.0326 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 78.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 425 |\n", "| time_elapsed | 6139 |\n", "| total_timesteps | 870400 |\n", "| train/ | |\n", "| approx_kl | 0.015842212 |\n", "| clip_fraction | 0.19 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.564 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00122 |\n", "| policy_gradient_loss | -0.0101 |\n", "| value_loss | 0.0163 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 822 |\n", "| ep_rew_mean | 78.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 426 |\n", "| time_elapsed | 6153 |\n", "| total_timesteps | 872448 |\n", "| train/ | |\n", "| approx_kl | 0.03494744 |\n", "| clip_fraction | 0.264 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | 0.514 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0599 |\n", "| policy_gradient_loss | -0.0187 |\n", "| value_loss | 0.0144 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 826 |\n", "| ep_rew_mean | 77.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 427 |\n", "| time_elapsed | 6167 |\n", "| total_timesteps | 874496 |\n", "| train/ | |\n", "| approx_kl | 0.03685309 |\n", "| clip_fraction | 0.246 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | 0.867 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0765 |\n", "| policy_gradient_loss | -0.00953 |\n", "| value_loss | 0.28 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 854 |\n", "| ep_rew_mean | 77.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 428 |\n", "| time_elapsed | 6181 |\n", "| total_timesteps | 876544 |\n", "| train/ | |\n", "| approx_kl | 0.13088885 |\n", "| clip_fraction | 0.519 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.01 |\n", "| explained_variance | 0.827 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0612 |\n", "| policy_gradient_loss | -0.0524 |\n", "| value_loss | 0.172 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 77.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 429 |\n", "| time_elapsed | 6195 |\n", "| total_timesteps | 878592 |\n", "| train/ | |\n", "| approx_kl | 0.08569945 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.757 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0621 |\n", "| policy_gradient_loss | -0.0353 |\n", "| value_loss | 0.169 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 869 |\n", "| ep_rew_mean | 76.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 430 |\n", "| time_elapsed | 6209 |\n", "| total_timesteps | 880640 |\n", "| train/ | |\n", "| approx_kl | 0.10922198 |\n", "| clip_fraction | 0.397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.968 |\n", "| explained_variance | 0.894 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0539 |\n", "| policy_gradient_loss | -0.048 |\n", "| value_loss | 0.326 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 864 |\n", "| ep_rew_mean | 75.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 431 |\n", "| time_elapsed | 6223 |\n", "| total_timesteps | 882688 |\n", "| train/ | |\n", "| approx_kl | 0.066638514 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1 |\n", "| explained_variance | 0.904 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0515 |\n", "| policy_gradient_loss | -0.0413 |\n", "| value_loss | 0.293 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 863 |\n", "| ep_rew_mean | 72.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 432 |\n", "| time_elapsed | 6237 |\n", "| total_timesteps | 884736 |\n", "| train/ | |\n", "| approx_kl | 0.06339681 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.992 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0856 |\n", "| policy_gradient_loss | -0.0517 |\n", "| value_loss | 0.452 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 69.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 433 |\n", "| time_elapsed | 6250 |\n", "| total_timesteps | 886784 |\n", "| train/ | |\n", "| approx_kl | 0.085071094 |\n", "| clip_fraction | 0.324 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.837 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.582 |\n", "| policy_gradient_loss | -0.0382 |\n", "| value_loss | 0.986 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 855 |\n", "| ep_rew_mean | 69 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 434 |\n", "| time_elapsed | 6264 |\n", "| total_timesteps | 888832 |\n", "| train/ | |\n", "| approx_kl | 0.07015162 |\n", "| clip_fraction | 0.341 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.964 |\n", "| explained_variance | 0.957 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0759 |\n", "| policy_gradient_loss | -0.0469 |\n", "| value_loss | 0.539 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 855 |\n", "| ep_rew_mean | 68.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 435 |\n", "| time_elapsed | 6278 |\n", "| total_timesteps | 890880 |\n", "| train/ | |\n", "| approx_kl | 0.06529637 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.945 |\n", "| explained_variance | 0.979 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.108 |\n", "| policy_gradient_loss | -0.055 |\n", "| value_loss | 0.286 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 64.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 436 |\n", "| time_elapsed | 6292 |\n", "| total_timesteps | 892928 |\n", "| train/ | |\n", "| approx_kl | 0.076239444 |\n", "| clip_fraction | 0.436 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.97 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0897 |\n", "| policy_gradient_loss | -0.0669 |\n", "| value_loss | 0.227 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 857 |\n", "| ep_rew_mean | 66.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 437 |\n", "| time_elapsed | 6306 |\n", "| total_timesteps | 894976 |\n", "| train/ | |\n", "| approx_kl | 0.26538506 |\n", "| clip_fraction | 0.585 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.716 |\n", "| explained_variance | 0.39 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.666 |\n", "| policy_gradient_loss | 0.0208 |\n", "| value_loss | 8.74 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 854 |\n", "| ep_rew_mean | 60.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 438 |\n", "| time_elapsed | 6320 |\n", "| total_timesteps | 897024 |\n", "| train/ | |\n", "| approx_kl | 0.12529904 |\n", "| clip_fraction | 0.45 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.809 |\n", "| explained_variance | 0.84 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.106 |\n", "| policy_gradient_loss | -0.0274 |\n", "| value_loss | 0.989 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 852 |\n", "| ep_rew_mean | 59.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 439 |\n", "| time_elapsed | 6334 |\n", "| total_timesteps | 899072 |\n", "| train/ | |\n", "| approx_kl | 0.14184646 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.781 |\n", "| explained_variance | 0.859 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0204 |\n", "| policy_gradient_loss | -0.0238 |\n", "| value_loss | 0.884 |\n", "----------------------------------------\n", "Eval num_timesteps=900000, episode_reward=110.40 +/- 82.97\n", "Episode length: 717.40 +/- 132.41\n", "-----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 717 |\n", "| mean_reward | 110 |\n", "| time/ | |\n", "| total_timesteps | 900000 |\n", "| train/ | |\n", "| approx_kl | 0.105283864 |\n", "| clip_fraction | 0.407 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.873 |\n", "| explained_variance | 0.89 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0949 |\n", "| policy_gradient_loss | -0.0333 |\n", "| value_loss | 0.787 |\n", "-----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 60.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 440 |\n", "| time_elapsed | 6367 |\n", "| total_timesteps | 901120 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 847 |\n", "| ep_rew_mean | 60.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 441 |\n", "| time_elapsed | 6381 |\n", "| total_timesteps | 903168 |\n", "| train/ | |\n", "| approx_kl | 0.17059132 |\n", "| clip_fraction | 0.469 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.724 |\n", "| explained_variance | 0.578 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.357 |\n", "| policy_gradient_loss | 0.0251 |\n", "| value_loss | 6.24 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 844 |\n", "| ep_rew_mean | 56.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 442 |\n", "| time_elapsed | 6395 |\n", "| total_timesteps | 905216 |\n", "| train/ | |\n", "| approx_kl | 0.2080266 |\n", "| clip_fraction | 0.417 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.725 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.374 |\n", "| policy_gradient_loss | -0.0348 |\n", "| value_loss | 0.937 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 848 |\n", "| ep_rew_mean | 58.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 443 |\n", "| time_elapsed | 6409 |\n", "| total_timesteps | 907264 |\n", "| train/ | |\n", "| approx_kl | 0.110680036 |\n", "| clip_fraction | 0.386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.727 |\n", "| explained_variance | 0.921 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00357 |\n", "| policy_gradient_loss | -0.041 |\n", "| value_loss | 0.595 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 60.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 444 |\n", "| time_elapsed | 6423 |\n", "| total_timesteps | 909312 |\n", "| train/ | |\n", "| approx_kl | 0.1866729 |\n", "| clip_fraction | 0.446 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.666 |\n", "| explained_variance | 0.707 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.22 |\n", "| policy_gradient_loss | 0.0192 |\n", "| value_loss | 6.5 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 62.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 445 |\n", "| time_elapsed | 6437 |\n", "| total_timesteps | 911360 |\n", "| train/ | |\n", "| approx_kl | 0.13591464 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.631 |\n", "| explained_variance | 0.949 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.162 |\n", "| policy_gradient_loss | -0.0394 |\n", "| value_loss | 0.709 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 863 |\n", "| ep_rew_mean | 64.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 446 |\n", "| time_elapsed | 6451 |\n", "| total_timesteps | 913408 |\n", "| train/ | |\n", "| approx_kl | 0.3940553 |\n", "| clip_fraction | 0.409 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.41 |\n", "| explained_variance | 0.599 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.13 |\n", "| policy_gradient_loss | 0.0573 |\n", "| value_loss | 8.95 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 868 |\n", "| ep_rew_mean | 66.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 447 |\n", "| time_elapsed | 6465 |\n", "| total_timesteps | 915456 |\n", "| train/ | |\n", "| approx_kl | 0.84783477 |\n", "| clip_fraction | 0.496 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.404 |\n", "| explained_variance | 0.553 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.697 |\n", "| policy_gradient_loss | 0.0414 |\n", "| value_loss | 10.9 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 64.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 448 |\n", "| time_elapsed | 6479 |\n", "| total_timesteps | 917504 |\n", "| train/ | |\n", "| approx_kl | 0.29317397 |\n", "| clip_fraction | 0.349 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.414 |\n", "| explained_variance | 0.79 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.358 |\n", "| policy_gradient_loss | -0.00528 |\n", "| value_loss | 1.74 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 66 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 449 |\n", "| time_elapsed | 6493 |\n", "| total_timesteps | 919552 |\n", "| train/ | |\n", "| approx_kl | 0.36573124 |\n", "| clip_fraction | 0.327 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.34 |\n", "| explained_variance | 0.628 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.25 |\n", "| policy_gradient_loss | 0.0259 |\n", "| value_loss | 5.53 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 856 |\n", "| ep_rew_mean | 69.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 450 |\n", "| time_elapsed | 6507 |\n", "| total_timesteps | 921600 |\n", "| train/ | |\n", "| approx_kl | 0.17583467 |\n", "| clip_fraction | 0.268 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.364 |\n", "| explained_variance | 0.851 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.266 |\n", "| policy_gradient_loss | -0.00691 |\n", "| value_loss | 2.52 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 849 |\n", "| ep_rew_mean | 67.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 451 |\n", "| time_elapsed | 6521 |\n", "| total_timesteps | 923648 |\n", "| train/ | |\n", "| approx_kl | 1.2637489 |\n", "| clip_fraction | 0.418 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.273 |\n", "| explained_variance | 0.514 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.11 |\n", "| policy_gradient_loss | 0.133 |\n", "| value_loss | 31.8 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 847 |\n", "| ep_rew_mean | 67.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 452 |\n", "| time_elapsed | 6535 |\n", "| total_timesteps | 925696 |\n", "| train/ | |\n", "| approx_kl | 0.2580928 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.432 |\n", "| explained_variance | 0.805 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.364 |\n", "| policy_gradient_loss | 0.00434 |\n", "| value_loss | 2.24 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 843 |\n", "| ep_rew_mean | 67.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 453 |\n", "| time_elapsed | 6549 |\n", "| total_timesteps | 927744 |\n", "| train/ | |\n", "| approx_kl | 0.18201317 |\n", "| clip_fraction | 0.358 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.494 |\n", "| explained_variance | 0.792 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.131 |\n", "| policy_gradient_loss | -0.00261 |\n", "| value_loss | 1.85 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 844 |\n", "| ep_rew_mean | 67.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 454 |\n", "| time_elapsed | 6563 |\n", "| total_timesteps | 929792 |\n", "| train/ | |\n", "| approx_kl | 0.19552687 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.515 |\n", "| explained_variance | 0.861 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.322 |\n", "| policy_gradient_loss | -0.0212 |\n", "| value_loss | 1.63 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 848 |\n", "| ep_rew_mean | 68.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 455 |\n", "| time_elapsed | 6577 |\n", "| total_timesteps | 931840 |\n", "| train/ | |\n", "| approx_kl | 0.06874326 |\n", "| clip_fraction | 0.295 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.63 |\n", "| explained_variance | 0.943 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.722 |\n", "| policy_gradient_loss | -0.0195 |\n", "| value_loss | 1.37 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 848 |\n", "| ep_rew_mean | 66.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 456 |\n", "| time_elapsed | 6591 |\n", "| total_timesteps | 933888 |\n", "| train/ | |\n", "| approx_kl | 0.13810986 |\n", "| clip_fraction | 0.377 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.688 |\n", "| explained_variance | 0.723 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.693 |\n", "| policy_gradient_loss | 0.00545 |\n", "| value_loss | 4.31 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 847 |\n", "| ep_rew_mean | 69.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 457 |\n", "| time_elapsed | 6605 |\n", "| total_timesteps | 935936 |\n", "| train/ | |\n", "| approx_kl | 0.122220084 |\n", "| clip_fraction | 0.341 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.722 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.189 |\n", "| policy_gradient_loss | -0.0333 |\n", "| value_loss | 1.52 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 843 |\n", "| ep_rew_mean | 68 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 458 |\n", "| time_elapsed | 6619 |\n", "| total_timesteps | 937984 |\n", "| train/ | |\n", "| approx_kl | 0.47225353 |\n", "| clip_fraction | 0.449 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.495 |\n", "| explained_variance | 0.53 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.649 |\n", "| policy_gradient_loss | 0.0795 |\n", "| value_loss | 21.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 845 |\n", "| ep_rew_mean | 67.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 459 |\n", "| time_elapsed | 6633 |\n", "| total_timesteps | 940032 |\n", "| train/ | |\n", "| approx_kl | 0.15772733 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.595 |\n", "| explained_variance | 0.924 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.03 |\n", "| policy_gradient_loss | -0.0103 |\n", "| value_loss | 1.6 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 460 |\n", "| time_elapsed | 6647 |\n", "| total_timesteps | 942080 |\n", "| train/ | |\n", "| approx_kl | 0.26609698 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.496 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0329 |\n", "| policy_gradient_loss | -0.0364 |\n", "| value_loss | 0.579 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 461 |\n", "| time_elapsed | 6661 |\n", "| total_timesteps | 944128 |\n", "| train/ | |\n", "| approx_kl | 0.1755164 |\n", "| clip_fraction | 0.345 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.502 |\n", "| explained_variance | 0.691 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.09 |\n", "| policy_gradient_loss | 0.0225 |\n", "| value_loss | 13.5 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 462 |\n", "| time_elapsed | 6675 |\n", "| total_timesteps | 946176 |\n", "| train/ | |\n", "| approx_kl | 0.06846498 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.674 |\n", "| explained_variance | 0.285 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.106 |\n", "| policy_gradient_loss | -0.0141 |\n", "| value_loss | 0.282 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 463 |\n", "| time_elapsed | 6689 |\n", "| total_timesteps | 948224 |\n", "| train/ | |\n", "| approx_kl | 0.08178307 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.961 |\n", "| explained_variance | 0.633 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0695 |\n", "| policy_gradient_loss | -0.028 |\n", "| value_loss | 0.074 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 464 |\n", "| time_elapsed | 6702 |\n", "| total_timesteps | 950272 |\n", "| train/ | |\n", "| approx_kl | 0.09058708 |\n", "| clip_fraction | 0.35 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.906 |\n", "| explained_variance | 0.399 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0635 |\n", "| policy_gradient_loss | -0.0297 |\n", "| value_loss | 0.0434 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 465 |\n", "| time_elapsed | 6716 |\n", "| total_timesteps | 952320 |\n", "| train/ | |\n", "| approx_kl | 0.05833247 |\n", "| clip_fraction | 0.261 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.927 |\n", "| explained_variance | 0.273 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0521 |\n", "| policy_gradient_loss | -0.0287 |\n", "| value_loss | 0.016 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 466 |\n", "| time_elapsed | 6730 |\n", "| total_timesteps | 954368 |\n", "| train/ | |\n", "| approx_kl | 0.09474261 |\n", "| clip_fraction | 0.272 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.848 |\n", "| explained_variance | -0.0854 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0177 |\n", "| policy_gradient_loss | -0.0207 |\n", "| value_loss | 0.125 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 467 |\n", "| time_elapsed | 6744 |\n", "| total_timesteps | 956416 |\n", "| train/ | |\n", "| approx_kl | 0.07365493 |\n", "| clip_fraction | 0.29 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.654 |\n", "| explained_variance | -1.43 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0798 |\n", "| policy_gradient_loss | -0.0168 |\n", "| value_loss | 0.0194 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 468 |\n", "| time_elapsed | 6758 |\n", "| total_timesteps | 958464 |\n", "| train/ | |\n", "| approx_kl | 0.032600958 |\n", "| clip_fraction | 0.209 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.636 |\n", "| explained_variance | 0.427 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0134 |\n", "| policy_gradient_loss | -0.018 |\n", "| value_loss | 0.0414 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 469 |\n", "| time_elapsed | 6772 |\n", "| total_timesteps | 960512 |\n", "| train/ | |\n", "| approx_kl | 0.045960642 |\n", "| clip_fraction | 0.258 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.648 |\n", "| explained_variance | 0.104 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0473 |\n", "| policy_gradient_loss | -0.0285 |\n", "| value_loss | 0.0206 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 470 |\n", "| time_elapsed | 6786 |\n", "| total_timesteps | 962560 |\n", "| train/ | |\n", "| approx_kl | 0.048935942 |\n", "| clip_fraction | 0.234 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.767 |\n", "| explained_variance | 0.322 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0231 |\n", "| policy_gradient_loss | -0.0349 |\n", "| value_loss | 0.036 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 471 |\n", "| time_elapsed | 6800 |\n", "| total_timesteps | 964608 |\n", "| train/ | |\n", "| approx_kl | 0.04976157 |\n", "| clip_fraction | 0.232 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.715 |\n", "| explained_variance | 0.572 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0605 |\n", "| policy_gradient_loss | -0.0307 |\n", "| value_loss | 0.0184 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 842 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 472 |\n", "| time_elapsed | 6814 |\n", "| total_timesteps | 966656 |\n", "| train/ | |\n", "| approx_kl | 0.038256977 |\n", "| clip_fraction | 0.22 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.68 |\n", "| explained_variance | 0.69 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0674 |\n", "| policy_gradient_loss | -0.0307 |\n", "| value_loss | 0.0124 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 73.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 473 |\n", "| time_elapsed | 6828 |\n", "| total_timesteps | 968704 |\n", "| train/ | |\n", "| approx_kl | 0.112811126 |\n", "| clip_fraction | 0.36 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.698 |\n", "| explained_variance | -2.2 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0412 |\n", "| policy_gradient_loss | -0.026 |\n", "| value_loss | 0.206 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 72.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 474 |\n", "| time_elapsed | 6842 |\n", "| total_timesteps | 970752 |\n", "| train/ | |\n", "| approx_kl | 0.053778827 |\n", "| clip_fraction | 0.19 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.626 |\n", "| explained_variance | 0.948 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0207 |\n", "| policy_gradient_loss | -0.0107 |\n", "| value_loss | 0.149 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 994 |\n", "| ep_rew_mean | 72.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 475 |\n", "| time_elapsed | 6856 |\n", "| total_timesteps | 972800 |\n", "| train/ | |\n", "| approx_kl | 0.14239061 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.578 |\n", "| explained_variance | 0.904 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0424 |\n", "| policy_gradient_loss | -0.0434 |\n", "| value_loss | 0.549 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 73.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 476 |\n", "| time_elapsed | 6870 |\n", "| total_timesteps | 974848 |\n", "| train/ | |\n", "| approx_kl | 0.28165948 |\n", "| clip_fraction | 0.355 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.621 |\n", "| explained_variance | 0.762 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.164 |\n", "| policy_gradient_loss | 0.0222 |\n", "| value_loss | 2.15 |\n", "----------------------------------------\n", "Eval num_timesteps=975000, episode_reward=92.20 +/- 21.11\n", "Episode length: 814.00 +/- 127.81\n", "---------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 814 |\n", "| mean_reward | 92.2 |\n", "| time/ | |\n", "| total_timesteps | 975000 |\n", "| train/ | |\n", "| approx_kl | 0.1686508 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.633 |\n", "| explained_variance | 0.851 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.17 |\n", "| policy_gradient_loss | -0.00836 |\n", "| value_loss | 3.23 |\n", "---------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 75.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 477 |\n", "| time_elapsed | 6905 |\n", "| total_timesteps | 976896 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 964 |\n", "| ep_rew_mean | 77.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 478 |\n", "| time_elapsed | 6919 |\n", "| total_timesteps | 978944 |\n", "| train/ | |\n", "| approx_kl | 0.10172522 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.672 |\n", "| explained_variance | 0.955 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.375 |\n", "| policy_gradient_loss | -0.0206 |\n", "| value_loss | 1.8 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 964 |\n", "| ep_rew_mean | 78.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 479 |\n", "| time_elapsed | 6933 |\n", "| total_timesteps | 980992 |\n", "| train/ | |\n", "| approx_kl | 1.309742 |\n", "| clip_fraction | 0.597 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.61 |\n", "| explained_variance | 0.733 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.76 |\n", "| policy_gradient_loss | 0.0638 |\n", "| value_loss | 14.4 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 965 |\n", "| ep_rew_mean | 80.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 480 |\n", "| time_elapsed | 6947 |\n", "| total_timesteps | 983040 |\n", "| train/ | |\n", "| approx_kl | 0.17637901 |\n", "| clip_fraction | 0.429 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.664 |\n", "| explained_variance | 0.945 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0752 |\n", "| policy_gradient_loss | -0.032 |\n", "| value_loss | 1.51 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 84.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 481 |\n", "| time_elapsed | 6962 |\n", "| total_timesteps | 985088 |\n", "| train/ | |\n", "| approx_kl | 0.1734243 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.668 |\n", "| explained_variance | 0.964 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.427 |\n", "| policy_gradient_loss | -0.0256 |\n", "| value_loss | 1.23 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 968 |\n", "| ep_rew_mean | 85.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 482 |\n", "| time_elapsed | 6976 |\n", "| total_timesteps | 987136 |\n", "| train/ | |\n", "| approx_kl | 0.7309092 |\n", "| clip_fraction | 0.586 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.596 |\n", "| explained_variance | 0.794 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.88 |\n", "| policy_gradient_loss | 0.0406 |\n", "| value_loss | 11 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 86.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 483 |\n", "| time_elapsed | 6990 |\n", "| total_timesteps | 989184 |\n", "| train/ | |\n", "| approx_kl | 0.14671694 |\n", "| clip_fraction | 0.345 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.549 |\n", "| explained_variance | 0.971 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.611 |\n", "| policy_gradient_loss | -0.0225 |\n", "| value_loss | 1.23 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 968 |\n", "| ep_rew_mean | 88 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 484 |\n", "| time_elapsed | 7004 |\n", "| total_timesteps | 991232 |\n", "| train/ | |\n", "| approx_kl | 0.09822461 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.694 |\n", "| explained_variance | 0.956 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.641 |\n", "| policy_gradient_loss | -0.0116 |\n", "| value_loss | 2.74 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 964 |\n", "| ep_rew_mean | 89.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 485 |\n", "| time_elapsed | 7018 |\n", "| total_timesteps | 993280 |\n", "| train/ | |\n", "| approx_kl | 0.11414385 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.629 |\n", "| explained_variance | 0.771 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.31 |\n", "| policy_gradient_loss | 0.0172 |\n", "| value_loss | 7.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 963 |\n", "| ep_rew_mean | 90.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 486 |\n", "| time_elapsed | 7032 |\n", "| total_timesteps | 995328 |\n", "| train/ | |\n", "| approx_kl | 0.09596471 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.652 |\n", "| explained_variance | 0.988 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.602 |\n", "| policy_gradient_loss | -0.0278 |\n", "| value_loss | 1.12 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 90.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 487 |\n", "| time_elapsed | 7046 |\n", "| total_timesteps | 997376 |\n", "| train/ | |\n", "| approx_kl | 0.06672311 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.754 |\n", "| explained_variance | 0.992 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.474 |\n", "| policy_gradient_loss | -0.0315 |\n", "| value_loss | 0.946 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 92.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 488 |\n", "| time_elapsed | 7060 |\n", "| total_timesteps | 999424 |\n", "| train/ | |\n", "| approx_kl | 0.091556594 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.725 |\n", "| explained_variance | 0.985 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.532 |\n", "| policy_gradient_loss | -0.0356 |\n", "| value_loss | 1.09 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 93.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 489 |\n", "| time_elapsed | 7074 |\n", "| total_timesteps | 1001472 |\n", "| train/ | |\n", "| approx_kl | 0.09094575 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.76 |\n", "| explained_variance | 0.993 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.125 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.746 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 960 |\n", "| ep_rew_mean | 92.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 490 |\n", "| time_elapsed | 7088 |\n", "| total_timesteps | 1003520 |\n", "| train/ | |\n", "| approx_kl | 0.080990225 |\n", "| clip_fraction | 0.298 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.708 |\n", "| explained_variance | 0.961 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.17 |\n", "| policy_gradient_loss | -0.022 |\n", "| value_loss | 1.65 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 959 |\n", "| ep_rew_mean | 94.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 491 |\n", "| time_elapsed | 7102 |\n", "| total_timesteps | 1005568 |\n", "| train/ | |\n", "| approx_kl | 0.2677441 |\n", "| clip_fraction | 0.395 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.705 |\n", "| explained_variance | 0.925 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.144 |\n", "| policy_gradient_loss | -0.0368 |\n", "| value_loss | 1.62 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 957 |\n", "| ep_rew_mean | 94.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 492 |\n", "| time_elapsed | 7116 |\n", "| total_timesteps | 1007616 |\n", "| train/ | |\n", "| approx_kl | 0.10455708 |\n", "| clip_fraction | 0.324 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.659 |\n", "| explained_variance | 0.989 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.261 |\n", "| policy_gradient_loss | -0.0335 |\n", "| value_loss | 0.953 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 955 |\n", "| ep_rew_mean | 94 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 493 |\n", "| time_elapsed | 7130 |\n", "| total_timesteps | 1009664 |\n", "| train/ | |\n", "| approx_kl | 0.09711806 |\n", "| clip_fraction | 0.303 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.587 |\n", "| explained_variance | 0.985 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.547 |\n", "| policy_gradient_loss | -0.0242 |\n", "| value_loss | 1.11 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 957 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 494 |\n", "| time_elapsed | 7144 |\n", "| total_timesteps | 1011712 |\n", "| train/ | |\n", "| approx_kl | 0.13705784 |\n", "| clip_fraction | 0.31 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.577 |\n", "| explained_variance | 0.953 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.03 |\n", "| policy_gradient_loss | -0.0329 |\n", "| value_loss | 2.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 959 |\n", "| ep_rew_mean | 96.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 495 |\n", "| time_elapsed | 7158 |\n", "| total_timesteps | 1013760 |\n", "| train/ | |\n", "| approx_kl | 0.13409123 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.607 |\n", "| explained_variance | 0.77 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.779 |\n", "| policy_gradient_loss | -0.0003 |\n", "| value_loss | 4.54 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 964 |\n", "| ep_rew_mean | 97 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 496 |\n", "| time_elapsed | 7173 |\n", "| total_timesteps | 1015808 |\n", "| train/ | |\n", "| approx_kl | 0.21774489 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.578 |\n", "| explained_variance | 0.951 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.25 |\n", "| policy_gradient_loss | -0.0305 |\n", "| value_loss | 1.39 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 94.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 497 |\n", "| time_elapsed | 7187 |\n", "| total_timesteps | 1017856 |\n", "| train/ | |\n", "| approx_kl | 0.103577055 |\n", "| clip_fraction | 0.315 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.899 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.58 |\n", "| policy_gradient_loss | -0.0163 |\n", "| value_loss | 3.45 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 94 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 498 |\n", "| time_elapsed | 7201 |\n", "| total_timesteps | 1019904 |\n", "| train/ | |\n", "| approx_kl | 0.108971246 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.7 |\n", "| explained_variance | 0.937 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.25 |\n", "| policy_gradient_loss | -0.0254 |\n", "| value_loss | 1.08 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 94.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 499 |\n", "| time_elapsed | 7215 |\n", "| total_timesteps | 1021952 |\n", "| train/ | |\n", "| approx_kl | 0.06802239 |\n", "| clip_fraction | 0.307 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.656 |\n", "| explained_variance | 0.966 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.327 |\n", "| policy_gradient_loss | -0.0325 |\n", "| value_loss | 1.72 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 980 |\n", "| ep_rew_mean | 95.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 500 |\n", "| time_elapsed | 7229 |\n", "| total_timesteps | 1024000 |\n", "| train/ | |\n", "| approx_kl | 0.12035249 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.633 |\n", "| explained_variance | 0.925 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.317 |\n", "| policy_gradient_loss | -0.028 |\n", "| value_loss | 1.91 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 980 |\n", "| ep_rew_mean | 95.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 501 |\n", "| time_elapsed | 7243 |\n", "| total_timesteps | 1026048 |\n", "| train/ | |\n", "| approx_kl | 0.15837331 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.519 |\n", "| explained_variance | 0.942 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.109 |\n", "| policy_gradient_loss | -0.0436 |\n", "| value_loss | 0.7 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 988 |\n", "| ep_rew_mean | 95.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 502 |\n", "| time_elapsed | 7257 |\n", "| total_timesteps | 1028096 |\n", "| train/ | |\n", "| approx_kl | 0.25118428 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.524 |\n", "| explained_variance | 0.835 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.163 |\n", "| policy_gradient_loss | -0.0309 |\n", "| value_loss | 1.41 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 993 |\n", "| ep_rew_mean | 96.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 503 |\n", "| time_elapsed | 7271 |\n", "| total_timesteps | 1030144 |\n", "| train/ | |\n", "| approx_kl | 0.11144666 |\n", "| clip_fraction | 0.266 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.488 |\n", "| explained_variance | 0.91 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.111 |\n", "| policy_gradient_loss | -0.0269 |\n", "| value_loss | 0.718 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 995 |\n", "| ep_rew_mean | 95 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 504 |\n", "| time_elapsed | 7286 |\n", "| total_timesteps | 1032192 |\n", "| train/ | |\n", "| approx_kl | 0.24936053 |\n", "| clip_fraction | 0.37 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.542 |\n", "| explained_variance | 0.882 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0132 |\n", "| policy_gradient_loss | -0.0377 |\n", "| value_loss | 0.39 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 996 |\n", "| ep_rew_mean | 95.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 505 |\n", "| time_elapsed | 7300 |\n", "| total_timesteps | 1034240 |\n", "| train/ | |\n", "| approx_kl | 0.1728574 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.673 |\n", "| explained_variance | 0.848 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0453 |\n", "| policy_gradient_loss | -0.046 |\n", "| value_loss | 0.618 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 92.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 506 |\n", "| time_elapsed | 7314 |\n", "| total_timesteps | 1036288 |\n", "| train/ | |\n", "| approx_kl | 0.17551374 |\n", "| clip_fraction | 0.416 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.764 |\n", "| explained_variance | 0.886 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0508 |\n", "| policy_gradient_loss | -0.0512 |\n", "| value_loss | 0.294 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 91.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 507 |\n", "| time_elapsed | 7328 |\n", "| total_timesteps | 1038336 |\n", "| train/ | |\n", "| approx_kl | 0.13522936 |\n", "| clip_fraction | 0.43 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.811 |\n", "| explained_variance | 0.873 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0287 |\n", "| policy_gradient_loss | -0.048 |\n", "| value_loss | 0.359 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 91.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 508 |\n", "| time_elapsed | 7342 |\n", "| total_timesteps | 1040384 |\n", "| train/ | |\n", "| approx_kl | 0.18778564 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.761 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0429 |\n", "| policy_gradient_loss | -0.0546 |\n", "| value_loss | 0.268 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.01e+03 |\n", "| ep_rew_mean | 89.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 509 |\n", "| time_elapsed | 7356 |\n", "| total_timesteps | 1042432 |\n", "| train/ | |\n", "| approx_kl | 0.1883404 |\n", "| clip_fraction | 0.394 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.741 |\n", "| explained_variance | 0.775 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0349 |\n", "| policy_gradient_loss | -0.0287 |\n", "| value_loss | 0.597 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 86.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 510 |\n", "| time_elapsed | 7370 |\n", "| total_timesteps | 1044480 |\n", "| train/ | |\n", "| approx_kl | 0.16439745 |\n", "| clip_fraction | 0.407 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.731 |\n", "| explained_variance | 0.832 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0696 |\n", "| policy_gradient_loss | -0.0462 |\n", "| value_loss | 0.425 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 761 |\n", "| ep_rew_mean | 87.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 511 |\n", "| time_elapsed | 7384 |\n", "| total_timesteps | 1046528 |\n", "| train/ | |\n", "| approx_kl | 0.12802264 |\n", "| clip_fraction | 0.397 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.766 |\n", "| explained_variance | 0.786 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.231 |\n", "| policy_gradient_loss | -0.03 |\n", "| value_loss | 0.817 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 761 |\n", "| ep_rew_mean | 87.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 512 |\n", "| time_elapsed | 7399 |\n", "| total_timesteps | 1048576 |\n", "| train/ | |\n", "| approx_kl | 0.16266143 |\n", "| clip_fraction | 0.441 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.746 |\n", "| explained_variance | 0.884 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0612 |\n", "| policy_gradient_loss | -0.0597 |\n", "| value_loss | 0.388 |\n", "----------------------------------------\n", "Eval num_timesteps=1050000, episode_reward=67.40 +/- 30.33\n", "Episode length: 946.40 +/- 81.85\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 946 |\n", "| mean_reward | 67.4 |\n", "| time/ | |\n", "| total_timesteps | 1050000 |\n", "| train/ | |\n", "| approx_kl | 0.09044021 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.796 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0146 |\n", "| policy_gradient_loss | -0.0452 |\n", "| value_loss | 0.259 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 769 |\n", "| ep_rew_mean | 88.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 513 |\n", "| time_elapsed | 7437 |\n", "| total_timesteps | 1050624 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 765 |\n", "| ep_rew_mean | 87.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 514 |\n", "| time_elapsed | 7451 |\n", "| total_timesteps | 1052672 |\n", "| train/ | |\n", "| approx_kl | 0.13337079 |\n", "| clip_fraction | 0.392 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.676 |\n", "| explained_variance | 0.451 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.68 |\n", "| policy_gradient_loss | 0.0234 |\n", "| value_loss | 6.28 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 764 |\n", "| ep_rew_mean | 85.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 515 |\n", "| time_elapsed | 7465 |\n", "| total_timesteps | 1054720 |\n", "| train/ | |\n", "| approx_kl | 0.1410482 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.648 |\n", "| explained_variance | 0.603 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.215 |\n", "| policy_gradient_loss | -0.0165 |\n", "| value_loss | 3.22 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 764 |\n", "| ep_rew_mean | 84.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 516 |\n", "| time_elapsed | 7479 |\n", "| total_timesteps | 1056768 |\n", "| train/ | |\n", "| approx_kl | 0.17721947 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.739 |\n", "| explained_variance | 0.771 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0285 |\n", "| policy_gradient_loss | -0.0282 |\n", "| value_loss | 1.08 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 769 |\n", "| ep_rew_mean | 83.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 517 |\n", "| time_elapsed | 7493 |\n", "| total_timesteps | 1058816 |\n", "| train/ | |\n", "| approx_kl | 0.14834747 |\n", "| clip_fraction | 0.431 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.76 |\n", "| explained_variance | 0.803 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0151 |\n", "| policy_gradient_loss | -0.046 |\n", "| value_loss | 0.461 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 765 |\n", "| ep_rew_mean | 79.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 518 |\n", "| time_elapsed | 7507 |\n", "| total_timesteps | 1060864 |\n", "| train/ | |\n", "| approx_kl | 0.1370758 |\n", "| clip_fraction | 0.4 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.666 |\n", "| explained_variance | 0.823 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0254 |\n", "| policy_gradient_loss | -0.0359 |\n", "| value_loss | 0.927 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 767 |\n", "| ep_rew_mean | 79.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 519 |\n", "| time_elapsed | 7522 |\n", "| total_timesteps | 1062912 |\n", "| train/ | |\n", "| approx_kl | 0.13583666 |\n", "| clip_fraction | 0.393 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.653 |\n", "| explained_variance | 0.551 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1 |\n", "| policy_gradient_loss | 0.000734 |\n", "| value_loss | 5.09 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 762 |\n", "| ep_rew_mean | 77.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 520 |\n", "| time_elapsed | 7536 |\n", "| total_timesteps | 1064960 |\n", "| train/ | |\n", "| approx_kl | 0.098127685 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.665 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.026 |\n", "| policy_gradient_loss | -0.0327 |\n", "| value_loss | 0.866 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 768 |\n", "| ep_rew_mean | 76.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 521 |\n", "| time_elapsed | 7549 |\n", "| total_timesteps | 1067008 |\n", "| train/ | |\n", "| approx_kl | 0.14229754 |\n", "| clip_fraction | 0.356 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.696 |\n", "| explained_variance | 0.813 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.173 |\n", "| policy_gradient_loss | -0.0342 |\n", "| value_loss | 1.58 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 769 |\n", "| ep_rew_mean | 76.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 522 |\n", "| time_elapsed | 7563 |\n", "| total_timesteps | 1069056 |\n", "| train/ | |\n", "| approx_kl | 0.086442456 |\n", "| clip_fraction | 0.329 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.589 |\n", "| explained_variance | 0.759 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.599 |\n", "| policy_gradient_loss | 0.0258 |\n", "| value_loss | 5.51 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 774 |\n", "| ep_rew_mean | 76.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 523 |\n", "| time_elapsed | 7577 |\n", "| total_timesteps | 1071104 |\n", "| train/ | |\n", "| approx_kl | 0.12793438 |\n", "| clip_fraction | 0.334 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.651 |\n", "| explained_variance | 0.892 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0439 |\n", "| policy_gradient_loss | -0.0381 |\n", "| value_loss | 0.956 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 779 |\n", "| ep_rew_mean | 77.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 524 |\n", "| time_elapsed | 7591 |\n", "| total_timesteps | 1073152 |\n", "| train/ | |\n", "| approx_kl | 0.08660039 |\n", "| clip_fraction | 0.266 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.45 |\n", "| explained_variance | 0.867 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.67 |\n", "| policy_gradient_loss | -0.00415 |\n", "| value_loss | 4.77 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 76.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 525 |\n", "| time_elapsed | 7605 |\n", "| total_timesteps | 1075200 |\n", "| train/ | |\n", "| approx_kl | 0.07803546 |\n", "| clip_fraction | 0.253 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.5 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.41 |\n", "| policy_gradient_loss | -0.0138 |\n", "| value_loss | 10.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 787 |\n", "| ep_rew_mean | 75.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 526 |\n", "| time_elapsed | 7619 |\n", "| total_timesteps | 1077248 |\n", "| train/ | |\n", "| approx_kl | 0.13441801 |\n", "| clip_fraction | 0.39 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.689 |\n", "| explained_variance | 0.917 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.162 |\n", "| policy_gradient_loss | -0.0253 |\n", "| value_loss | 1.69 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 76.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 527 |\n", "| time_elapsed | 7633 |\n", "| total_timesteps | 1079296 |\n", "| train/ | |\n", "| approx_kl | 0.12398209 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.659 |\n", "| explained_variance | 0.925 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.222 |\n", "| policy_gradient_loss | -0.0288 |\n", "| value_loss | 1.83 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 80.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 528 |\n", "| time_elapsed | 7646 |\n", "| total_timesteps | 1081344 |\n", "| train/ | |\n", "| approx_kl | 0.08450763 |\n", "| clip_fraction | 0.305 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.585 |\n", "| explained_variance | 0.931 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.59 |\n", "| policy_gradient_loss | -0.00759 |\n", "| value_loss | 5.82 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 787 |\n", "| ep_rew_mean | 81.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 529 |\n", "| time_elapsed | 7660 |\n", "| total_timesteps | 1083392 |\n", "| train/ | |\n", "| approx_kl | 0.39583436 |\n", "| clip_fraction | 0.351 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.417 |\n", "| explained_variance | 0.756 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.39 |\n", "| policy_gradient_loss | 0.0662 |\n", "| value_loss | 38.5 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 83.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 530 |\n", "| time_elapsed | 7674 |\n", "| total_timesteps | 1085440 |\n", "| train/ | |\n", "| approx_kl | 0.18948892 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.499 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.931 |\n", "| policy_gradient_loss | 0.0198 |\n", "| value_loss | 3.73 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 793 |\n", "| ep_rew_mean | 84.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 531 |\n", "| time_elapsed | 7688 |\n", "| total_timesteps | 1087488 |\n", "| train/ | |\n", "| approx_kl | 0.076432884 |\n", "| clip_fraction | 0.241 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.416 |\n", "| explained_variance | 0.973 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.845 |\n", "| policy_gradient_loss | -0.00809 |\n", "| value_loss | 4.06 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 87.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 532 |\n", "| time_elapsed | 7702 |\n", "| total_timesteps | 1089536 |\n", "| train/ | |\n", "| approx_kl | 0.07121172 |\n", "| clip_fraction | 0.249 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.562 |\n", "| explained_variance | 0.989 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.29 |\n", "| policy_gradient_loss | -0.0123 |\n", "| value_loss | 1.75 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 88.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 533 |\n", "| time_elapsed | 7715 |\n", "| total_timesteps | 1091584 |\n", "| train/ | |\n", "| approx_kl | 0.12120481 |\n", "| clip_fraction | 0.279 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.57 |\n", "| explained_variance | 0.99 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.353 |\n", "| policy_gradient_loss | -0.0114 |\n", "| value_loss | 2.26 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 93.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 534 |\n", "| time_elapsed | 7729 |\n", "| total_timesteps | 1093632 |\n", "| train/ | |\n", "| approx_kl | 0.0934096 |\n", "| clip_fraction | 0.327 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.642 |\n", "| explained_variance | 0.994 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.287 |\n", "| policy_gradient_loss | -0.0225 |\n", "| value_loss | 1.38 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 95.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 535 |\n", "| time_elapsed | 7743 |\n", "| total_timesteps | 1095680 |\n", "| train/ | |\n", "| approx_kl | 0.2023407 |\n", "| clip_fraction | 0.428 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.663 |\n", "| explained_variance | 0.911 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.59 |\n", "| policy_gradient_loss | 0.0213 |\n", "| value_loss | 37.2 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 97.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 536 |\n", "| time_elapsed | 7757 |\n", "| total_timesteps | 1097728 |\n", "| train/ | |\n", "| approx_kl | 0.12273581 |\n", "| clip_fraction | 0.322 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.622 |\n", "| explained_variance | 0.992 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.685 |\n", "| policy_gradient_loss | -0.0205 |\n", "| value_loss | 2.71 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 799 |\n", "| ep_rew_mean | 100 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 537 |\n", "| time_elapsed | 7770 |\n", "| total_timesteps | 1099776 |\n", "| train/ | |\n", "| approx_kl | 0.16223097 |\n", "| clip_fraction | 0.306 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.628 |\n", "| explained_variance | 0.969 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.885 |\n", "| policy_gradient_loss | -0.0206 |\n", "| value_loss | 4.11 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 538 |\n", "| time_elapsed | 7784 |\n", "| total_timesteps | 1101824 |\n", "| train/ | |\n", "| approx_kl | 0.07187468 |\n", "| clip_fraction | 0.334 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.702 |\n", "| explained_variance | 0.998 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.393 |\n", "| policy_gradient_loss | -0.0216 |\n", "| value_loss | 1.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 796 |\n", "| ep_rew_mean | 107 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 539 |\n", "| time_elapsed | 7798 |\n", "| total_timesteps | 1103872 |\n", "| train/ | |\n", "| approx_kl | 0.07297625 |\n", "| clip_fraction | 0.31 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.698 |\n", "| explained_variance | 0.998 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.198 |\n", "| policy_gradient_loss | -0.0228 |\n", "| value_loss | 1.48 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 800 |\n", "| ep_rew_mean | 108 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 540 |\n", "| time_elapsed | 7812 |\n", "| total_timesteps | 1105920 |\n", "| train/ | |\n", "| approx_kl | 0.08309823 |\n", "| clip_fraction | 0.31 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.71 |\n", "| explained_variance | 0.998 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.858 |\n", "| policy_gradient_loss | -0.0225 |\n", "| value_loss | 1.36 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 792 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 541 |\n", "| time_elapsed | 7826 |\n", "| total_timesteps | 1107968 |\n", "| train/ | |\n", "| approx_kl | 0.29016992 |\n", "| clip_fraction | 0.406 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.656 |\n", "| explained_variance | 0.956 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.912 |\n", "| policy_gradient_loss | -0.0178 |\n", "| value_loss | 7.84 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 782 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 542 |\n", "| time_elapsed | 7840 |\n", "| total_timesteps | 1110016 |\n", "| train/ | |\n", "| approx_kl | 0.07715938 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.759 |\n", "| explained_variance | 0.983 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.99 |\n", "| policy_gradient_loss | 0.00107 |\n", "| value_loss | 3.86 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 543 |\n", "| time_elapsed | 7853 |\n", "| total_timesteps | 1112064 |\n", "| train/ | |\n", "| approx_kl | 0.1606583 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.526 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.95 |\n", "| policy_gradient_loss | -0.0134 |\n", "| value_loss | 6.46 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 116 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 544 |\n", "| time_elapsed | 7867 |\n", "| total_timesteps | 1114112 |\n", "| train/ | |\n", "| approx_kl | 0.0831108 |\n", "| clip_fraction | 0.303 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.63 |\n", "| explained_variance | 0.945 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.15 |\n", "| policy_gradient_loss | -0.0118 |\n", "| value_loss | 10.3 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 785 |\n", "| ep_rew_mean | 118 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 545 |\n", "| time_elapsed | 7881 |\n", "| total_timesteps | 1116160 |\n", "| train/ | |\n", "| approx_kl | 0.25589606 |\n", "| clip_fraction | 0.411 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.607 |\n", "| explained_variance | 0.956 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.342 |\n", "| policy_gradient_loss | -0.00587 |\n", "| value_loss | 3.34 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 776 |\n", "| ep_rew_mean | 118 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 546 |\n", "| time_elapsed | 7895 |\n", "| total_timesteps | 1118208 |\n", "| train/ | |\n", "| approx_kl | 0.14086646 |\n", "| clip_fraction | 0.374 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.542 |\n", "| explained_variance | 0.913 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.424 |\n", "| policy_gradient_loss | -0.0204 |\n", "| value_loss | 4.24 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 783 |\n", "| ep_rew_mean | 118 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 547 |\n", "| time_elapsed | 7909 |\n", "| total_timesteps | 1120256 |\n", "| train/ | |\n", "| approx_kl | 0.11106922 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.625 |\n", "| explained_variance | 0.941 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.231 |\n", "| policy_gradient_loss | -0.03 |\n", "| value_loss | 1.87 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 775 |\n", "| ep_rew_mean | 117 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 548 |\n", "| time_elapsed | 7923 |\n", "| total_timesteps | 1122304 |\n", "| train/ | |\n", "| approx_kl | 0.102617405 |\n", "| clip_fraction | 0.383 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.678 |\n", "| explained_variance | 0.941 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0499 |\n", "| policy_gradient_loss | -0.0451 |\n", "| value_loss | 0.877 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 761 |\n", "| ep_rew_mean | 116 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 549 |\n", "| time_elapsed | 7936 |\n", "| total_timesteps | 1124352 |\n", "| train/ | |\n", "| approx_kl | 0.18986458 |\n", "| clip_fraction | 0.407 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.773 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0329 |\n", "| policy_gradient_loss | -0.0441 |\n", "| value_loss | 0.938 |\n", "----------------------------------------\n", "Eval num_timesteps=1125000, episode_reward=32.50 +/- 10.74\n", "Episode length: 549.20 +/- 116.68\n", "-----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 549 |\n", "| mean_reward | 32.5 |\n", "| time/ | |\n", "| total_timesteps | 1125000 |\n", "| train/ | |\n", "| approx_kl | 0.098822504 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.668 |\n", "| explained_variance | 0.816 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0891 |\n", "| policy_gradient_loss | -0.0182 |\n", "| value_loss | 1.72 |\n", "-----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 757 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 550 |\n", "| time_elapsed | 7965 |\n", "| total_timesteps | 1126400 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 748 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 551 |\n", "| time_elapsed | 7981 |\n", "| total_timesteps | 1128448 |\n", "| train/ | |\n", "| approx_kl | 0.18897694 |\n", "| clip_fraction | 0.395 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.649 |\n", "| explained_variance | 0.883 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0117 |\n", "| policy_gradient_loss | -0.0377 |\n", "| value_loss | 0.89 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 744 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 552 |\n", "| time_elapsed | 7995 |\n", "| total_timesteps | 1130496 |\n", "| train/ | |\n", "| approx_kl | 0.105050646 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.737 |\n", "| explained_variance | 0.895 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0342 |\n", "| policy_gradient_loss | -0.041 |\n", "| value_loss | 0.603 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 731 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 553 |\n", "| time_elapsed | 8009 |\n", "| total_timesteps | 1132544 |\n", "| train/ | |\n", "| approx_kl | 0.09947795 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.844 |\n", "| explained_variance | 0.868 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0358 |\n", "| policy_gradient_loss | -0.0449 |\n", "| value_loss | 0.68 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 731 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 554 |\n", "| time_elapsed | 8022 |\n", "| total_timesteps | 1134592 |\n", "| train/ | |\n", "| approx_kl | 0.1078112 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.684 |\n", "| explained_variance | 0.908 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0643 |\n", "| policy_gradient_loss | -0.0427 |\n", "| value_loss | 0.53 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 727 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 555 |\n", "| time_elapsed | 8036 |\n", "| total_timesteps | 1136640 |\n", "| train/ | |\n", "| approx_kl | 0.07935234 |\n", "| clip_fraction | 0.337 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.671 |\n", "| explained_variance | 0.914 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0568 |\n", "| policy_gradient_loss | -0.0478 |\n", "| value_loss | 0.431 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 727 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 556 |\n", "| time_elapsed | 8050 |\n", "| total_timesteps | 1138688 |\n", "| train/ | |\n", "| approx_kl | 0.10401054 |\n", "| clip_fraction | 0.39 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.8 |\n", "| explained_variance | 0.916 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0274 |\n", "| policy_gradient_loss | -0.0636 |\n", "| value_loss | 0.321 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 722 |\n", "| ep_rew_mean | 108 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 557 |\n", "| time_elapsed | 8064 |\n", "| total_timesteps | 1140736 |\n", "| train/ | |\n", "| approx_kl | 0.09861056 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.807 |\n", "| explained_variance | 0.889 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0378 |\n", "| policy_gradient_loss | -0.0631 |\n", "| value_loss | 0.308 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 722 |\n", "| ep_rew_mean | 106 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 558 |\n", "| time_elapsed | 8077 |\n", "| total_timesteps | 1142784 |\n", "| train/ | |\n", "| approx_kl | 0.10142133 |\n", "| clip_fraction | 0.385 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.845 |\n", "| explained_variance | 0.8 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.036 |\n", "| policy_gradient_loss | -0.0281 |\n", "| value_loss | 0.775 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 559 |\n", "| time_elapsed | 8091 |\n", "| total_timesteps | 1144832 |\n", "| train/ | |\n", "| approx_kl | 0.111896254 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.607 |\n", "| explained_variance | 0.914 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0322 |\n", "| policy_gradient_loss | -0.0518 |\n", "| value_loss | 0.244 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 720 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 560 |\n", "| time_elapsed | 8105 |\n", "| total_timesteps | 1146880 |\n", "| train/ | |\n", "| approx_kl | 0.12456137 |\n", "| clip_fraction | 0.38 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.698 |\n", "| explained_variance | 0.8 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.016 |\n", "| policy_gradient_loss | -0.0477 |\n", "| value_loss | 0.613 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 725 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 561 |\n", "| time_elapsed | 8119 |\n", "| total_timesteps | 1148928 |\n", "| train/ | |\n", "| approx_kl | 0.20282304 |\n", "| clip_fraction | 0.452 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.635 |\n", "| explained_variance | 0.567 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0297 |\n", "| policy_gradient_loss | -0.00891 |\n", "| value_loss | 1.75 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 728 |\n", "| ep_rew_mean | 105 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 562 |\n", "| time_elapsed | 8133 |\n", "| total_timesteps | 1150976 |\n", "| train/ | |\n", "| approx_kl | 0.18036062 |\n", "| clip_fraction | 0.412 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.611 |\n", "| explained_variance | 0.853 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0188 |\n", "| policy_gradient_loss | -0.0422 |\n", "| value_loss | 0.625 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 721 |\n", "| ep_rew_mean | 101 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 563 |\n", "| time_elapsed | 8146 |\n", "| total_timesteps | 1153024 |\n", "| train/ | |\n", "| approx_kl | 0.14970735 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.567 |\n", "| explained_variance | 0.771 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.172 |\n", "| policy_gradient_loss | -0.0226 |\n", "| value_loss | 2.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 715 |\n", "| ep_rew_mean | 95.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 564 |\n", "| time_elapsed | 8160 |\n", "| total_timesteps | 1155072 |\n", "| train/ | |\n", "| approx_kl | 0.10027686 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.625 |\n", "| explained_variance | 0.782 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0478 |\n", "| policy_gradient_loss | -0.0374 |\n", "| value_loss | 0.596 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 715 |\n", "| ep_rew_mean | 91.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 565 |\n", "| time_elapsed | 8174 |\n", "| total_timesteps | 1157120 |\n", "| train/ | |\n", "| approx_kl | 0.14167139 |\n", "| clip_fraction | 0.331 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.597 |\n", "| explained_variance | 0.888 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0631 |\n", "| policy_gradient_loss | -0.0404 |\n", "| value_loss | 0.455 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 89 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 566 |\n", "| time_elapsed | 8188 |\n", "| total_timesteps | 1159168 |\n", "| train/ | |\n", "| approx_kl | 0.10026175 |\n", "| clip_fraction | 0.374 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.715 |\n", "| explained_variance | 0.769 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0645 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.493 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 710 |\n", "| ep_rew_mean | 84.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 567 |\n", "| time_elapsed | 8202 |\n", "| total_timesteps | 1161216 |\n", "| train/ | |\n", "| approx_kl | 0.10079577 |\n", "| clip_fraction | 0.336 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.607 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.051 |\n", "| policy_gradient_loss | -0.0458 |\n", "| value_loss | 0.25 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 708 |\n", "| ep_rew_mean | 82.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 568 |\n", "| time_elapsed | 8216 |\n", "| total_timesteps | 1163264 |\n", "| train/ | |\n", "| approx_kl | 0.27028576 |\n", "| clip_fraction | 0.482 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.541 |\n", "| explained_variance | 0.446 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.19 |\n", "| policy_gradient_loss | 0.0405 |\n", "| value_loss | 8.03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 707 |\n", "| ep_rew_mean | 76.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 569 |\n", "| time_elapsed | 8229 |\n", "| total_timesteps | 1165312 |\n", "| train/ | |\n", "| approx_kl | 0.16427608 |\n", "| clip_fraction | 0.376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.617 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0731 |\n", "| policy_gradient_loss | -0.0391 |\n", "| value_loss | 0.385 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 73 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 570 |\n", "| time_elapsed | 8243 |\n", "| total_timesteps | 1167360 |\n", "| train/ | |\n", "| approx_kl | 0.11011449 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.663 |\n", "| explained_variance | 0.909 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0163 |\n", "| policy_gradient_loss | -0.045 |\n", "| value_loss | 0.357 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 709 |\n", "| ep_rew_mean | 71.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 571 |\n", "| time_elapsed | 8257 |\n", "| total_timesteps | 1169408 |\n", "| train/ | |\n", "| approx_kl | 0.1232683 |\n", "| clip_fraction | 0.324 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.554 |\n", "| explained_variance | 0.869 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0806 |\n", "| policy_gradient_loss | -0.0266 |\n", "| value_loss | 0.514 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 716 |\n", "| ep_rew_mean | 71 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 572 |\n", "| time_elapsed | 8271 |\n", "| total_timesteps | 1171456 |\n", "| train/ | |\n", "| approx_kl | 0.1762493 |\n", "| clip_fraction | 0.377 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.561 |\n", "| explained_variance | 0.879 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00311 |\n", "| policy_gradient_loss | -0.0414 |\n", "| value_loss | 0.442 |\n", "---------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 69.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 573 |\n", "| time_elapsed | 8285 |\n", "| total_timesteps | 1173504 |\n", "| train/ | |\n", "| approx_kl | 0.342027 |\n", "| clip_fraction | 0.384 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.458 |\n", "| explained_variance | 0.481 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.164 |\n", "| policy_gradient_loss | 0.0387 |\n", "| value_loss | 6.4 |\n", "--------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 730 |\n", "| ep_rew_mean | 66.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 574 |\n", "| time_elapsed | 8299 |\n", "| total_timesteps | 1175552 |\n", "| train/ | |\n", "| approx_kl | 0.1871935 |\n", "| clip_fraction | 0.372 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.558 |\n", "| explained_variance | 0.778 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.217 |\n", "| policy_gradient_loss | -0.0235 |\n", "| value_loss | 1.2 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 736 |\n", "| ep_rew_mean | 65 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 575 |\n", "| time_elapsed | 8313 |\n", "| total_timesteps | 1177600 |\n", "| train/ | |\n", "| approx_kl | 0.11278613 |\n", "| clip_fraction | 0.311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.527 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0965 |\n", "| policy_gradient_loss | -0.0335 |\n", "| value_loss | 0.586 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 733 |\n", "| ep_rew_mean | 63.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 576 |\n", "| time_elapsed | 8327 |\n", "| total_timesteps | 1179648 |\n", "| train/ | |\n", "| approx_kl | 0.11223337 |\n", "| clip_fraction | 0.352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.588 |\n", "| explained_variance | 0.842 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.833 |\n", "| policy_gradient_loss | -0.0192 |\n", "| value_loss | 1.91 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 736 |\n", "| ep_rew_mean | 62.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 577 |\n", "| time_elapsed | 8341 |\n", "| total_timesteps | 1181696 |\n", "| train/ | |\n", "| approx_kl | 0.1357804 |\n", "| clip_fraction | 0.316 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.512 |\n", "| explained_variance | 0.697 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.888 |\n", "| policy_gradient_loss | 0.0173 |\n", "| value_loss | 6.32 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 62.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 578 |\n", "| time_elapsed | 8355 |\n", "| total_timesteps | 1183744 |\n", "| train/ | |\n", "| approx_kl | 0.16212207 |\n", "| clip_fraction | 0.406 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.715 |\n", "| explained_variance | 0.781 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.474 |\n", "| policy_gradient_loss | -0.00421 |\n", "| value_loss | 4.03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 65.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 579 |\n", "| time_elapsed | 8368 |\n", "| total_timesteps | 1185792 |\n", "| train/ | |\n", "| approx_kl | 0.14280477 |\n", "| clip_fraction | 0.285 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.411 |\n", "| explained_variance | 0.808 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.908 |\n", "| policy_gradient_loss | -0.0116 |\n", "| value_loss | 5.38 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 733 |\n", "| ep_rew_mean | 67.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 580 |\n", "| time_elapsed | 8382 |\n", "| total_timesteps | 1187840 |\n", "| train/ | |\n", "| approx_kl | 0.395846 |\n", "| clip_fraction | 0.426 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.355 |\n", "| explained_variance | 0.666 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.06 |\n", "| policy_gradient_loss | 0.0405 |\n", "| value_loss | 67.3 |\n", "--------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 737 |\n", "| ep_rew_mean | 68.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 581 |\n", "| time_elapsed | 8397 |\n", "| total_timesteps | 1189888 |\n", "| train/ | |\n", "| approx_kl | 0.2204346 |\n", "| clip_fraction | 0.306 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.282 |\n", "| explained_variance | 0.786 |\n", "| learning_rate | 0.0003 |\n", "| loss | 26.4 |\n", "| policy_gradient_loss | 0.0233 |\n", "| value_loss | 31.9 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 740 |\n", "| ep_rew_mean | 69.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 582 |\n", "| time_elapsed | 8411 |\n", "| total_timesteps | 1191936 |\n", "| train/ | |\n", "| approx_kl | 0.7208297 |\n", "| clip_fraction | 0.369 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.291 |\n", "| explained_variance | 0.756 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.04 |\n", "| policy_gradient_loss | 0.0198 |\n", "| value_loss | 16.3 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 742 |\n", "| ep_rew_mean | 70.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 583 |\n", "| time_elapsed | 8425 |\n", "| total_timesteps | 1193984 |\n", "| train/ | |\n", "| approx_kl | 0.1932968 |\n", "| clip_fraction | 0.282 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.383 |\n", "| explained_variance | 0.892 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.658 |\n", "| policy_gradient_loss | -0.00447 |\n", "| value_loss | 5.65 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 750 |\n", "| ep_rew_mean | 70.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 584 |\n", "| time_elapsed | 8439 |\n", "| total_timesteps | 1196032 |\n", "| train/ | |\n", "| approx_kl | 0.2098594 |\n", "| clip_fraction | 0.304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.356 |\n", "| explained_variance | 0.894 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.878 |\n", "| policy_gradient_loss | 0.000371 |\n", "| value_loss | 3.62 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 754 |\n", "| ep_rew_mean | 72.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 585 |\n", "| time_elapsed | 8453 |\n", "| total_timesteps | 1198080 |\n", "| train/ | |\n", "| approx_kl | 0.23508471 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.401 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.289 |\n", "| policy_gradient_loss | -0.0268 |\n", "| value_loss | 1.43 |\n", "----------------------------------------\n", "Eval num_timesteps=1200000, episode_reward=201.20 +/- 93.85\n", "Episode length: 667.60 +/- 60.26\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 668 |\n", "| mean_reward | 201 |\n", "| time/ | |\n", "| total_timesteps | 1200000 |\n", "| train/ | |\n", "| approx_kl | 0.13794339 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.504 |\n", "| explained_variance | 0.811 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.297 |\n", "| policy_gradient_loss | 0.00883 |\n", "| value_loss | 4.29 |\n", "----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 757 |\n", "| ep_rew_mean | 73.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 586 |\n", "| time_elapsed | 8484 |\n", "| total_timesteps | 1200128 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 72.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 587 |\n", "| time_elapsed | 8498 |\n", "| total_timesteps | 1202176 |\n", "| train/ | |\n", "| approx_kl | 0.13450986 |\n", "| clip_fraction | 0.319 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.451 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.207 |\n", "| policy_gradient_loss | -0.00311 |\n", "| value_loss | 1.89 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 753 |\n", "| ep_rew_mean | 72.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 588 |\n", "| time_elapsed | 8512 |\n", "| total_timesteps | 1204224 |\n", "| train/ | |\n", "| approx_kl | 0.16736883 |\n", "| clip_fraction | 0.321 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.514 |\n", "| explained_variance | 0.886 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0297 |\n", "| policy_gradient_loss | -0.0354 |\n", "| value_loss | 1.06 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 745 |\n", "| ep_rew_mean | 72.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 589 |\n", "| time_elapsed | 8526 |\n", "| total_timesteps | 1206272 |\n", "| train/ | |\n", "| approx_kl | 0.17459065 |\n", "| clip_fraction | 0.33 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.498 |\n", "| explained_variance | 0.866 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0233 |\n", "| policy_gradient_loss | -0.0297 |\n", "| value_loss | 0.897 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 751 |\n", "| ep_rew_mean | 74.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 590 |\n", "| time_elapsed | 8540 |\n", "| total_timesteps | 1208320 |\n", "| train/ | |\n", "| approx_kl | 0.11062151 |\n", "| clip_fraction | 0.311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.544 |\n", "| explained_variance | 0.921 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.049 |\n", "| policy_gradient_loss | -0.0323 |\n", "| value_loss | 0.741 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 747 |\n", "| ep_rew_mean | 76.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 591 |\n", "| time_elapsed | 8555 |\n", "| total_timesteps | 1210368 |\n", "| train/ | |\n", "| approx_kl | 0.13910642 |\n", "| clip_fraction | 0.356 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.581 |\n", "| explained_variance | 0.701 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.264 |\n", "| policy_gradient_loss | 0.00578 |\n", "| value_loss | 5.02 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 746 |\n", "| ep_rew_mean | 79.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 592 |\n", "| time_elapsed | 8568 |\n", "| total_timesteps | 1212416 |\n", "| train/ | |\n", "| approx_kl | 0.14703187 |\n", "| clip_fraction | 0.442 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.79 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.42 |\n", "| policy_gradient_loss | -0.0017 |\n", "| value_loss | 4.72 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 745 |\n", "| ep_rew_mean | 82.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 593 |\n", "| time_elapsed | 8582 |\n", "| total_timesteps | 1214464 |\n", "| train/ | |\n", "| approx_kl | 0.35693195 |\n", "| clip_fraction | 0.477 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.497 |\n", "| explained_variance | 0.554 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.81 |\n", "| policy_gradient_loss | 0.0457 |\n", "| value_loss | 33.5 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 742 |\n", "| ep_rew_mean | 85.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 594 |\n", "| time_elapsed | 8596 |\n", "| total_timesteps | 1216512 |\n", "| train/ | |\n", "| approx_kl | 0.26003867 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.436 |\n", "| explained_variance | 0.643 |\n", "| learning_rate | 0.0003 |\n", "| loss | 17.6 |\n", "| policy_gradient_loss | 0.04 |\n", "| value_loss | 26.2 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 749 |\n", "| ep_rew_mean | 88.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 595 |\n", "| time_elapsed | 8610 |\n", "| total_timesteps | 1218560 |\n", "| train/ | |\n", "| approx_kl | 0.2368792 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.408 |\n", "| explained_variance | 0.733 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.01 |\n", "| policy_gradient_loss | 0.0241 |\n", "| value_loss | 20.3 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 745 |\n", "| ep_rew_mean | 94.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 596 |\n", "| time_elapsed | 8624 |\n", "| total_timesteps | 1220608 |\n", "| train/ | |\n", "| approx_kl | 0.19553457 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.305 |\n", "| explained_variance | 0.787 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.74 |\n", "| policy_gradient_loss | 0.0347 |\n", "| value_loss | 17.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 96.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 597 |\n", "| time_elapsed | 8638 |\n", "| total_timesteps | 1222656 |\n", "| train/ | |\n", "| approx_kl | 0.14873238 |\n", "| clip_fraction | 0.294 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.366 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.15 |\n", "| policy_gradient_loss | 0.00425 |\n", "| value_loss | 11.1 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 733 |\n", "| ep_rew_mean | 104 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 598 |\n", "| time_elapsed | 8652 |\n", "| total_timesteps | 1224704 |\n", "| train/ | |\n", "| approx_kl | 0.1316641 |\n", "| clip_fraction | 0.254 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.341 |\n", "| explained_variance | 0.935 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.94 |\n", "| policy_gradient_loss | 0.00167 |\n", "| value_loss | 49.5 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 599 |\n", "| time_elapsed | 8666 |\n", "| total_timesteps | 1226752 |\n", "| train/ | |\n", "| approx_kl | 0.062129278 |\n", "| clip_fraction | 0.214 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.513 |\n", "| explained_variance | 0.975 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.19 |\n", "| policy_gradient_loss | 0.0079 |\n", "| value_loss | 13 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 117 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 600 |\n", "| time_elapsed | 8680 |\n", "| total_timesteps | 1228800 |\n", "| train/ | |\n", "| approx_kl | 0.0924662 |\n", "| clip_fraction | 0.189 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.427 |\n", "| explained_variance | 0.915 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.25 |\n", "| policy_gradient_loss | 0.000481 |\n", "| value_loss | 45.3 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 737 |\n", "| ep_rew_mean | 122 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 601 |\n", "| time_elapsed | 8694 |\n", "| total_timesteps | 1230848 |\n", "| train/ | |\n", "| approx_kl | 0.13717516 |\n", "| clip_fraction | 0.253 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.447 |\n", "| explained_variance | 0.954 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.07 |\n", "| policy_gradient_loss | 0.00845 |\n", "| value_loss | 30.7 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 734 |\n", "| ep_rew_mean | 126 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 602 |\n", "| time_elapsed | 8708 |\n", "| total_timesteps | 1232896 |\n", "| train/ | |\n", "| approx_kl | 0.7218456 |\n", "| clip_fraction | 0.304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.32 |\n", "| explained_variance | 0.935 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.7 |\n", "| policy_gradient_loss | -0.00618 |\n", "| value_loss | 28.4 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 734 |\n", "| ep_rew_mean | 128 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 603 |\n", "| time_elapsed | 8722 |\n", "| total_timesteps | 1234944 |\n", "| train/ | |\n", "| approx_kl | 0.70367694 |\n", "| clip_fraction | 0.339 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.409 |\n", "| explained_variance | 0.929 |\n", "| learning_rate | 0.0003 |\n", "| loss | 24.3 |\n", "| policy_gradient_loss | 0.0191 |\n", "| value_loss | 100 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 729 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 604 |\n", "| time_elapsed | 8735 |\n", "| total_timesteps | 1236992 |\n", "| train/ | |\n", "| approx_kl | 0.459268 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.347 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.7 |\n", "| policy_gradient_loss | 0.0124 |\n", "| value_loss | 48.1 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 723 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 605 |\n", "| time_elapsed | 8749 |\n", "| total_timesteps | 1239040 |\n", "| train/ | |\n", "| approx_kl | 0.13203068 |\n", "| clip_fraction | 0.167 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.267 |\n", "| explained_variance | 0.969 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.59 |\n", "| policy_gradient_loss | -0.00095 |\n", "| value_loss | 14.2 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 728 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 606 |\n", "| time_elapsed | 8763 |\n", "| total_timesteps | 1241088 |\n", "| train/ | |\n", "| approx_kl | 0.39568943 |\n", "| clip_fraction | 0.241 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.253 |\n", "| explained_variance | 0.866 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.7 |\n", "| policy_gradient_loss | -0.00568 |\n", "| value_loss | 8.89 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 710 |\n", "| ep_rew_mean | 131 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 607 |\n", "| time_elapsed | 8777 |\n", "| total_timesteps | 1243136 |\n", "| train/ | |\n", "| approx_kl | 0.26118135 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.387 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.532 |\n", "| policy_gradient_loss | -0.011 |\n", "| value_loss | 2.47 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 688 |\n", "| ep_rew_mean | 130 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 608 |\n", "| time_elapsed | 8791 |\n", "| total_timesteps | 1245184 |\n", "| train/ | |\n", "| approx_kl | 0.25776416 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.428 |\n", "| explained_variance | 0.875 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.305 |\n", "| policy_gradient_loss | 0.00189 |\n", "| value_loss | 5.15 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 677 |\n", "| ep_rew_mean | 128 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 609 |\n", "| time_elapsed | 8805 |\n", "| total_timesteps | 1247232 |\n", "| train/ | |\n", "| approx_kl | 0.20577785 |\n", "| clip_fraction | 0.334 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.473 |\n", "| explained_variance | 0.936 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.154 |\n", "| policy_gradient_loss | -0.0278 |\n", "| value_loss | 1.63 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 670 |\n", "| ep_rew_mean | 124 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 610 |\n", "| time_elapsed | 8819 |\n", "| total_timesteps | 1249280 |\n", "| train/ | |\n", "| approx_kl | 0.12220313 |\n", "| clip_fraction | 0.298 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.468 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0156 |\n", "| policy_gradient_loss | -0.0299 |\n", "| value_loss | 1.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 662 |\n", "| ep_rew_mean | 122 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 611 |\n", "| time_elapsed | 8833 |\n", "| total_timesteps | 1251328 |\n", "| train/ | |\n", "| approx_kl | 0.12015412 |\n", "| clip_fraction | 0.311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.489 |\n", "| explained_variance | 0.825 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0076 |\n", "| policy_gradient_loss | -0.00435 |\n", "| value_loss | 2.29 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 654 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 612 |\n", "| time_elapsed | 8847 |\n", "| total_timesteps | 1253376 |\n", "| train/ | |\n", "| approx_kl | 0.1256675 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.531 |\n", "| explained_variance | 0.918 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.247 |\n", "| policy_gradient_loss | -0.0294 |\n", "| value_loss | 1.01 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 652 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 613 |\n", "| time_elapsed | 8861 |\n", "| total_timesteps | 1255424 |\n", "| train/ | |\n", "| approx_kl | 0.096629456 |\n", "| clip_fraction | 0.29 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.528 |\n", "| explained_variance | 0.893 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0699 |\n", "| policy_gradient_loss | -0.0243 |\n", "| value_loss | 0.866 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 649 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 614 |\n", "| time_elapsed | 8875 |\n", "| total_timesteps | 1257472 |\n", "| train/ | |\n", "| approx_kl | 0.32586366 |\n", "| clip_fraction | 0.493 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.475 |\n", "| explained_variance | 0.379 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.98 |\n", "| policy_gradient_loss | 0.073 |\n", "| value_loss | 16.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 629 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 615 |\n", "| time_elapsed | 8889 |\n", "| total_timesteps | 1259520 |\n", "| train/ | |\n", "| approx_kl | 0.17975052 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.569 |\n", "| explained_variance | 0.721 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0344 |\n", "| policy_gradient_loss | -0.0383 |\n", "| value_loss | 0.685 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 625 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 616 |\n", "| time_elapsed | 8903 |\n", "| total_timesteps | 1261568 |\n", "| train/ | |\n", "| approx_kl | 0.13593775 |\n", "| clip_fraction | 0.344 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.53 |\n", "| explained_variance | 0.863 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00249 |\n", "| policy_gradient_loss | -0.0408 |\n", "| value_loss | 0.402 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 625 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 617 |\n", "| time_elapsed | 8916 |\n", "| total_timesteps | 1263616 |\n", "| train/ | |\n", "| approx_kl | 0.094249144 |\n", "| clip_fraction | 0.335 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.721 |\n", "| explained_variance | 0.776 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.102 |\n", "| policy_gradient_loss | -0.0319 |\n", "| value_loss | 0.535 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 625 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 618 |\n", "| time_elapsed | 8930 |\n", "| total_timesteps | 1265664 |\n", "| train/ | |\n", "| approx_kl | 0.08192378 |\n", "| clip_fraction | 0.179 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.39 |\n", "| explained_variance | -0.578 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0347 |\n", "| policy_gradient_loss | -0.0181 |\n", "| value_loss | 0.0466 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 625 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 619 |\n", "| time_elapsed | 8944 |\n", "| total_timesteps | 1267712 |\n", "| train/ | |\n", "| approx_kl | 0.06120108 |\n", "| clip_fraction | 0.232 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.594 |\n", "| explained_variance | 0.323 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0279 |\n", "| policy_gradient_loss | -0.0171 |\n", "| value_loss | 0.0302 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 692 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 620 |\n", "| time_elapsed | 8958 |\n", "| total_timesteps | 1269760 |\n", "| train/ | |\n", "| approx_kl | 0.051327117 |\n", "| clip_fraction | 0.229 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.692 |\n", "| explained_variance | 0.356 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0276 |\n", "| policy_gradient_loss | -0.014 |\n", "| value_loss | 0.0636 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 692 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 621 |\n", "| time_elapsed | 8972 |\n", "| total_timesteps | 1271808 |\n", "| train/ | |\n", "| approx_kl | 0.14649358 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.506 |\n", "| explained_variance | 0.855 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00509 |\n", "| policy_gradient_loss | -0.0425 |\n", "| value_loss | 0.25 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 699 |\n", "| ep_rew_mean | 111 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 622 |\n", "| time_elapsed | 8986 |\n", "| total_timesteps | 1273856 |\n", "| train/ | |\n", "| approx_kl | 0.10476942 |\n", "| clip_fraction | 0.284 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.519 |\n", "| explained_variance | 0.912 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0409 |\n", "| policy_gradient_loss | -0.0452 |\n", "| value_loss | 0.359 |\n", "----------------------------------------\n", "Eval num_timesteps=1275000, episode_reward=40.70 +/- 14.23\n", "Episode length: 638.50 +/- 119.18\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 638 |\n", "| mean_reward | 40.7 |\n", "| time/ | |\n", "| total_timesteps | 1275000 |\n", "| train/ | |\n", "| approx_kl | 0.16560456 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.627 |\n", "| explained_variance | 0.824 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0256 |\n", "| policy_gradient_loss | -0.0475 |\n", "| value_loss | 0.419 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 705 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 623 |\n", "| time_elapsed | 9016 |\n", "| total_timesteps | 1275904 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 703 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 624 |\n", "| time_elapsed | 9030 |\n", "| total_timesteps | 1277952 |\n", "| train/ | |\n", "| approx_kl | 0.18714446 |\n", "| clip_fraction | 0.353 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.53 |\n", "| explained_variance | 0.311 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0233 |\n", "| policy_gradient_loss | -0.0174 |\n", "| value_loss | 2.04 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 705 |\n", "| ep_rew_mean | 110 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 625 |\n", "| time_elapsed | 9044 |\n", "| total_timesteps | 1280000 |\n", "| train/ | |\n", "| approx_kl | 0.15421537 |\n", "| clip_fraction | 0.361 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.638 |\n", "| explained_variance | 0.645 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0201 |\n", "| policy_gradient_loss | -0.0167 |\n", "| value_loss | 1.47 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 704 |\n", "| ep_rew_mean | 106 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 626 |\n", "| time_elapsed | 9058 |\n", "| total_timesteps | 1282048 |\n", "| train/ | |\n", "| approx_kl | 0.15228626 |\n", "| clip_fraction | 0.318 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.564 |\n", "| explained_variance | 0.811 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0385 |\n", "| policy_gradient_loss | -0.0408 |\n", "| value_loss | 0.465 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 707 |\n", "| ep_rew_mean | 103 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 627 |\n", "| time_elapsed | 9072 |\n", "| total_timesteps | 1284096 |\n", "| train/ | |\n", "| approx_kl | 0.09232226 |\n", "| clip_fraction | 0.281 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.507 |\n", "| explained_variance | 0.878 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.184 |\n", "| policy_gradient_loss | -0.0329 |\n", "| value_loss | 0.396 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 712 |\n", "| ep_rew_mean | 102 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 628 |\n", "| time_elapsed | 9085 |\n", "| total_timesteps | 1286144 |\n", "| train/ | |\n", "| approx_kl | 0.13847145 |\n", "| clip_fraction | 0.343 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.565 |\n", "| explained_variance | 0.788 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0164 |\n", "| policy_gradient_loss | -0.0353 |\n", "| value_loss | 0.58 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 714 |\n", "| ep_rew_mean | 100 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 629 |\n", "| time_elapsed | 9099 |\n", "| total_timesteps | 1288192 |\n", "| train/ | |\n", "| approx_kl | 0.17467141 |\n", "| clip_fraction | 0.371 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.498 |\n", "| explained_variance | 0.439 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.747 |\n", "| policy_gradient_loss | 0.00809 |\n", "| value_loss | 4.03 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 711 |\n", "| ep_rew_mean | 95.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 630 |\n", "| time_elapsed | 9113 |\n", "| total_timesteps | 1290240 |\n", "| train/ | |\n", "| approx_kl | 0.13344589 |\n", "| clip_fraction | 0.347 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.601 |\n", "| explained_variance | 0.687 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0478 |\n", "| policy_gradient_loss | -0.0194 |\n", "| value_loss | 1.15 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 713 |\n", "| ep_rew_mean | 92.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 631 |\n", "| time_elapsed | 9127 |\n", "| total_timesteps | 1292288 |\n", "| train/ | |\n", "| approx_kl | 0.29286253 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.594 |\n", "| explained_variance | 0.811 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.122 |\n", "| policy_gradient_loss | -0.0362 |\n", "| value_loss | 0.689 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 89.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 632 |\n", "| time_elapsed | 9141 |\n", "| total_timesteps | 1294336 |\n", "| train/ | |\n", "| approx_kl | 0.08259044 |\n", "| clip_fraction | 0.325 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.607 |\n", "| explained_variance | 0.755 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.214 |\n", "| policy_gradient_loss | -0.0321 |\n", "| value_loss | 1.22 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 720 |\n", "| ep_rew_mean | 84.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 633 |\n", "| time_elapsed | 9155 |\n", "| total_timesteps | 1296384 |\n", "| train/ | |\n", "| approx_kl | 0.19756918 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.604 |\n", "| explained_variance | 0.884 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0436 |\n", "| policy_gradient_loss | -0.0414 |\n", "| value_loss | 0.502 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 718 |\n", "| ep_rew_mean | 75.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 634 |\n", "| time_elapsed | 9169 |\n", "| total_timesteps | 1298432 |\n", "| train/ | |\n", "| approx_kl | 0.10317444 |\n", "| clip_fraction | 0.356 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.645 |\n", "| explained_variance | 0.746 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.00874 |\n", "| policy_gradient_loss | -0.0216 |\n", "| value_loss | 2.3 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 727 |\n", "| ep_rew_mean | 72.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 635 |\n", "| time_elapsed | 9183 |\n", "| total_timesteps | 1300480 |\n", "| train/ | |\n", "| approx_kl | 0.18565516 |\n", "| clip_fraction | 0.396 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.642 |\n", "| explained_variance | 0.616 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.98 |\n", "| policy_gradient_loss | 0.00865 |\n", "| value_loss | 4.06 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 734 |\n", "| ep_rew_mean | 68.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 636 |\n", "| time_elapsed | 9196 |\n", "| total_timesteps | 1302528 |\n", "| train/ | |\n", "| approx_kl | 0.1612351 |\n", "| clip_fraction | 0.365 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.599 |\n", "| explained_variance | 0.961 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0266 |\n", "| policy_gradient_loss | -0.0419 |\n", "| value_loss | 0.196 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 738 |\n", "| ep_rew_mean | 65.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 637 |\n", "| time_elapsed | 9210 |\n", "| total_timesteps | 1304576 |\n", "| train/ | |\n", "| approx_kl | 0.09993966 |\n", "| clip_fraction | 0.283 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.541 |\n", "| explained_variance | 0.884 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0304 |\n", "| policy_gradient_loss | -0.0331 |\n", "| value_loss | 0.347 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 746 |\n", "| ep_rew_mean | 64.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 638 |\n", "| time_elapsed | 9224 |\n", "| total_timesteps | 1306624 |\n", "| train/ | |\n", "| approx_kl | 0.11823492 |\n", "| clip_fraction | 0.316 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.558 |\n", "| explained_variance | 0.955 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0602 |\n", "| policy_gradient_loss | -0.0483 |\n", "| value_loss | 0.233 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 748 |\n", "| ep_rew_mean | 61.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 639 |\n", "| time_elapsed | 9238 |\n", "| total_timesteps | 1308672 |\n", "| train/ | |\n", "| approx_kl | 0.08644012 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.655 |\n", "| explained_variance | 0.918 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0428 |\n", "| policy_gradient_loss | -0.0455 |\n", "| value_loss | 0.413 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 761 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 640 |\n", "| time_elapsed | 9252 |\n", "| total_timesteps | 1310720 |\n", "| train/ | |\n", "| approx_kl | 0.20700097 |\n", "| clip_fraction | 0.344 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.54 |\n", "| explained_variance | 0.956 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0239 |\n", "| policy_gradient_loss | -0.0362 |\n", "| value_loss | 0.205 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 765 |\n", "| ep_rew_mean | 54.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 641 |\n", "| time_elapsed | 9266 |\n", "| total_timesteps | 1312768 |\n", "| train/ | |\n", "| approx_kl | 0.1679677 |\n", "| clip_fraction | 0.375 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.576 |\n", "| explained_variance | 0.934 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0285 |\n", "| policy_gradient_loss | -0.0536 |\n", "| value_loss | 0.279 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 778 |\n", "| ep_rew_mean | 53.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 642 |\n", "| time_elapsed | 9280 |\n", "| total_timesteps | 1314816 |\n", "| train/ | |\n", "| approx_kl | 0.13396999 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.525 |\n", "| explained_variance | 0.949 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0583 |\n", "| policy_gradient_loss | -0.0464 |\n", "| value_loss | 0.255 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 781 |\n", "| ep_rew_mean | 53 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 643 |\n", "| time_elapsed | 9294 |\n", "| total_timesteps | 1316864 |\n", "| train/ | |\n", "| approx_kl | 1.5308919 |\n", "| clip_fraction | 0.616 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.522 |\n", "| explained_variance | 0.238 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.168 |\n", "| policy_gradient_loss | 0.0439 |\n", "| value_loss | 10.3 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 776 |\n", "| ep_rew_mean | 53.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 644 |\n", "| time_elapsed | 9308 |\n", "| total_timesteps | 1318912 |\n", "| train/ | |\n", "| approx_kl | 0.3758734 |\n", "| clip_fraction | 0.438 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.532 |\n", "| explained_variance | 0.199 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.281 |\n", "| policy_gradient_loss | -0.011 |\n", "| value_loss | 2.94 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 780 |\n", "| ep_rew_mean | 53.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 645 |\n", "| time_elapsed | 9321 |\n", "| total_timesteps | 1320960 |\n", "| train/ | |\n", "| approx_kl | 0.2899809 |\n", "| clip_fraction | 0.374 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.549 |\n", "| explained_variance | 0.536 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0995 |\n", "| policy_gradient_loss | -0.0267 |\n", "| value_loss | 0.801 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 784 |\n", "| ep_rew_mean | 53.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 646 |\n", "| time_elapsed | 9335 |\n", "| total_timesteps | 1323008 |\n", "| train/ | |\n", "| approx_kl | 0.20998585 |\n", "| clip_fraction | 0.343 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.553 |\n", "| explained_variance | 0.636 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0818 |\n", "| policy_gradient_loss | -0.0224 |\n", "| value_loss | 0.725 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 786 |\n", "| ep_rew_mean | 53.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 647 |\n", "| time_elapsed | 9349 |\n", "| total_timesteps | 1325056 |\n", "| train/ | |\n", "| approx_kl | 0.13774574 |\n", "| clip_fraction | 0.378 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.613 |\n", "| explained_variance | 0.897 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.076 |\n", "| policy_gradient_loss | -0.0267 |\n", "| value_loss | 0.422 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 790 |\n", "| ep_rew_mean | 54.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 648 |\n", "| time_elapsed | 9363 |\n", "| total_timesteps | 1327104 |\n", "| train/ | |\n", "| approx_kl | 0.16580775 |\n", "| clip_fraction | 0.368 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.598 |\n", "| explained_variance | 0.889 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.015 |\n", "| policy_gradient_loss | -0.0308 |\n", "| value_loss | 0.366 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 792 |\n", "| ep_rew_mean | 54.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 649 |\n", "| time_elapsed | 9377 |\n", "| total_timesteps | 1329152 |\n", "| train/ | |\n", "| approx_kl | 0.31824228 |\n", "| clip_fraction | 0.413 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.562 |\n", "| explained_variance | 0.198 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.22 |\n", "| policy_gradient_loss | 0.026 |\n", "| value_loss | 8.36 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 795 |\n", "| ep_rew_mean | 54.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 650 |\n", "| time_elapsed | 9391 |\n", "| total_timesteps | 1331200 |\n", "| train/ | |\n", "| approx_kl | 0.26401865 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.486 |\n", "| explained_variance | 0.764 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.148 |\n", "| policy_gradient_loss | -0.0357 |\n", "| value_loss | 0.62 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 798 |\n", "| ep_rew_mean | 54.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 651 |\n", "| time_elapsed | 9405 |\n", "| total_timesteps | 1333248 |\n", "| train/ | |\n", "| approx_kl | 0.20250043 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.491 |\n", "| explained_variance | 0.911 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.168 |\n", "| policy_gradient_loss | -0.0336 |\n", "| value_loss | 0.389 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 794 |\n", "| ep_rew_mean | 50.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 652 |\n", "| time_elapsed | 9419 |\n", "| total_timesteps | 1335296 |\n", "| train/ | |\n", "| approx_kl | 0.16222224 |\n", "| clip_fraction | 0.308 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.49 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0678 |\n", "| policy_gradient_loss | -0.0305 |\n", "| value_loss | 0.359 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 797 |\n", "| ep_rew_mean | 50.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 653 |\n", "| time_elapsed | 9433 |\n", "| total_timesteps | 1337344 |\n", "| train/ | |\n", "| approx_kl | 0.13629788 |\n", "| clip_fraction | 0.36 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.604 |\n", "| explained_variance | 0.951 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.073 |\n", "| policy_gradient_loss | -0.0368 |\n", "| value_loss | 0.221 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 799 |\n", "| ep_rew_mean | 51.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 654 |\n", "| time_elapsed | 9447 |\n", "| total_timesteps | 1339392 |\n", "| train/ | |\n", "| approx_kl | 0.14614052 |\n", "| clip_fraction | 0.363 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.594 |\n", "| explained_variance | 0.943 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0427 |\n", "| policy_gradient_loss | -0.0438 |\n", "| value_loss | 0.244 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 731 |\n", "| ep_rew_mean | 51.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 655 |\n", "| time_elapsed | 9461 |\n", "| total_timesteps | 1341440 |\n", "| train/ | |\n", "| approx_kl | 0.27990127 |\n", "| clip_fraction | 0.37 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.525 |\n", "| explained_variance | 0.589 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0622 |\n", "| policy_gradient_loss | -0.0218 |\n", "| value_loss | 1.69 |\n", "----------------------------------------\n", "--------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 52 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 656 |\n", "| time_elapsed | 9474 |\n", "| total_timesteps | 1343488 |\n", "| train/ | |\n", "| approx_kl | 0.162527 |\n", "| clip_fraction | 0.346 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.552 |\n", "| explained_variance | 0.937 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0018 |\n", "| policy_gradient_loss | -0.0414 |\n", "| value_loss | 0.254 |\n", "--------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 737 |\n", "| ep_rew_mean | 52.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 657 |\n", "| time_elapsed | 9489 |\n", "| total_timesteps | 1345536 |\n", "| train/ | |\n", "| approx_kl | 0.23944047 |\n", "| clip_fraction | 0.342 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.487 |\n", "| explained_variance | 0.928 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0271 |\n", "| policy_gradient_loss | -0.0455 |\n", "| value_loss | 0.215 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 741 |\n", "| ep_rew_mean | 53.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 658 |\n", "| time_elapsed | 9502 |\n", "| total_timesteps | 1347584 |\n", "| train/ | |\n", "| approx_kl | 0.12772709 |\n", "| clip_fraction | 0.361 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.626 |\n", "| explained_variance | 0.398 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.3 |\n", "| policy_gradient_loss | 0.0129 |\n", "| value_loss | 3.28 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 53.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 659 |\n", "| time_elapsed | 9516 |\n", "| total_timesteps | 1349632 |\n", "| train/ | |\n", "| approx_kl | 0.23181334 |\n", "| clip_fraction | 0.384 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.621 |\n", "| explained_variance | 0.903 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0338 |\n", "| policy_gradient_loss | -0.0459 |\n", "| value_loss | 0.269 |\n", "----------------------------------------\n", "Eval num_timesteps=1350000, episode_reward=62.30 +/- 30.42\n", "Episode length: 699.00 +/- 143.73\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 699 |\n", "| mean_reward | 62.3 |\n", "| time/ | |\n", "| total_timesteps | 1350000 |\n", "| train/ | |\n", "| approx_kl | 0.11322656 |\n", "| clip_fraction | 0.314 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.56 |\n", "| explained_variance | 0.517 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0379 |\n", "| policy_gradient_loss | -0.0125 |\n", "| value_loss | 1.42 |\n", "----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 53.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 660 |\n", "| time_elapsed | 9548 |\n", "| total_timesteps | 1351680 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 734 |\n", "| ep_rew_mean | 54 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 661 |\n", "| time_elapsed | 9562 |\n", "| total_timesteps | 1353728 |\n", "| train/ | |\n", "| approx_kl | 0.15777576 |\n", "| clip_fraction | 0.358 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.644 |\n", "| explained_variance | 0.621 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.215 |\n", "| policy_gradient_loss | -0.0182 |\n", "| value_loss | 1.01 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 731 |\n", "| ep_rew_mean | 55.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 662 |\n", "| time_elapsed | 9576 |\n", "| total_timesteps | 1355776 |\n", "| train/ | |\n", "| approx_kl | 0.15572481 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.614 |\n", "| explained_variance | 0.752 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0289 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.491 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 733 |\n", "| ep_rew_mean | 56.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 663 |\n", "| time_elapsed | 9590 |\n", "| total_timesteps | 1357824 |\n", "| train/ | |\n", "| approx_kl | 0.21189508 |\n", "| clip_fraction | 0.418 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.552 |\n", "| explained_variance | 0.264 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0594 |\n", "| policy_gradient_loss | 0.00623 |\n", "| value_loss | 4.97 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 728 |\n", "| ep_rew_mean | 54.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 664 |\n", "| time_elapsed | 9603 |\n", "| total_timesteps | 1359872 |\n", "| train/ | |\n", "| approx_kl | 0.18792015 |\n", "| clip_fraction | 0.401 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.577 |\n", "| explained_variance | 0.775 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0686 |\n", "| policy_gradient_loss | -0.0377 |\n", "| value_loss | 0.866 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 725 |\n", "| ep_rew_mean | 54.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 665 |\n", "| time_elapsed | 9617 |\n", "| total_timesteps | 1361920 |\n", "| train/ | |\n", "| approx_kl | 0.2846138 |\n", "| clip_fraction | 0.413 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.562 |\n", "| explained_variance | 0.84 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.187 |\n", "| policy_gradient_loss | -0.0502 |\n", "| value_loss | 0.527 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 724 |\n", "| ep_rew_mean | 54.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 666 |\n", "| time_elapsed | 9631 |\n", "| total_timesteps | 1363968 |\n", "| train/ | |\n", "| approx_kl | 0.15273312 |\n", "| clip_fraction | 0.382 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.6 |\n", "| explained_variance | 0.895 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0176 |\n", "| policy_gradient_loss | -0.046 |\n", "| value_loss | 0.562 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 720 |\n", "| ep_rew_mean | 55.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 667 |\n", "| time_elapsed | 9645 |\n", "| total_timesteps | 1366016 |\n", "| train/ | |\n", "| approx_kl | 0.14242327 |\n", "| clip_fraction | 0.331 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.524 |\n", "| explained_variance | 0.785 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.105 |\n", "| policy_gradient_loss | -0.0287 |\n", "| value_loss | 1.37 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 55.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 668 |\n", "| time_elapsed | 9659 |\n", "| total_timesteps | 1368064 |\n", "| train/ | |\n", "| approx_kl | 0.22120175 |\n", "| clip_fraction | 0.382 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.589 |\n", "| explained_variance | 0.873 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0551 |\n", "| policy_gradient_loss | -0.0258 |\n", "| value_loss | 1.13 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 719 |\n", "| ep_rew_mean | 55.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 669 |\n", "| time_elapsed | 9673 |\n", "| total_timesteps | 1370112 |\n", "| train/ | |\n", "| approx_kl | 0.18453631 |\n", "| clip_fraction | 0.351 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.527 |\n", "| explained_variance | 0.913 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0944 |\n", "| policy_gradient_loss | -0.0353 |\n", "| value_loss | 0.668 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 711 |\n", "| ep_rew_mean | 55.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 670 |\n", "| time_elapsed | 9687 |\n", "| total_timesteps | 1372160 |\n", "| train/ | |\n", "| approx_kl | 0.12834562 |\n", "| clip_fraction | 0.338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.53 |\n", "| explained_variance | 0.79 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.22 |\n", "| policy_gradient_loss | -0.0266 |\n", "| value_loss | 1.51 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 703 |\n", "| ep_rew_mean | 57.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 671 |\n", "| time_elapsed | 9701 |\n", "| total_timesteps | 1374208 |\n", "| train/ | |\n", "| approx_kl | 0.11685011 |\n", "| clip_fraction | 0.303 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.553 |\n", "| explained_variance | 0.907 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0811 |\n", "| policy_gradient_loss | -0.0223 |\n", "| value_loss | 1.26 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 694 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 672 |\n", "| time_elapsed | 9715 |\n", "| total_timesteps | 1376256 |\n", "| train/ | |\n", "| approx_kl | 0.16529107 |\n", "| clip_fraction | 0.354 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.554 |\n", "| explained_variance | 0.871 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.04 |\n", "| policy_gradient_loss | -0.00555 |\n", "| value_loss | 3.89 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 694 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 673 |\n", "| time_elapsed | 9728 |\n", "| total_timesteps | 1378304 |\n", "| train/ | |\n", "| approx_kl | 0.1505962 |\n", "| clip_fraction | 0.335 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.612 |\n", "| explained_variance | 0.889 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0446 |\n", "| policy_gradient_loss | -0.0357 |\n", "| value_loss | 0.697 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 694 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 674 |\n", "| time_elapsed | 9742 |\n", "| total_timesteps | 1380352 |\n", "| train/ | |\n", "| approx_kl | 0.0986031 |\n", "| clip_fraction | 0.388 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.874 |\n", "| explained_variance | 0.901 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.088 |\n", "| policy_gradient_loss | -0.0228 |\n", "| value_loss | 0.206 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 694 |\n", "| ep_rew_mean | 58.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 675 |\n", "| time_elapsed | 9756 |\n", "| total_timesteps | 1382400 |\n", "| train/ | |\n", "| approx_kl | 0.10986849 |\n", "| clip_fraction | 0.366 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.905 |\n", "| explained_variance | -0.6 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0426 |\n", "| policy_gradient_loss | -0.0151 |\n", "| value_loss | 0.102 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 766 |\n", "| ep_rew_mean | 58.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 676 |\n", "| time_elapsed | 9769 |\n", "| total_timesteps | 1384448 |\n", "| train/ | |\n", "| approx_kl | 0.0961601 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.829 |\n", "| explained_variance | -0.308 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0464 |\n", "| policy_gradient_loss | -0.039 |\n", "| value_loss | 0.0791 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 755 |\n", "| ep_rew_mean | 62.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 677 |\n", "| time_elapsed | 9783 |\n", "| total_timesteps | 1386496 |\n", "| train/ | |\n", "| approx_kl | 0.38322723 |\n", "| clip_fraction | 0.535 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.67 |\n", "| explained_variance | 0.119 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.45 |\n", "| policy_gradient_loss | 0.0486 |\n", "| value_loss | 26.4 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 745 |\n", "| ep_rew_mean | 63.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 678 |\n", "| time_elapsed | 9797 |\n", "| total_timesteps | 1388544 |\n", "| train/ | |\n", "| approx_kl | 0.22010417 |\n", "| clip_fraction | 0.36 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.475 |\n", "| explained_variance | 0.468 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.56 |\n", "| policy_gradient_loss | 0.0164 |\n", "| value_loss | 10.8 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 742 |\n", "| ep_rew_mean | 62.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 679 |\n", "| time_elapsed | 9811 |\n", "| total_timesteps | 1390592 |\n", "| train/ | |\n", "| approx_kl | 0.2935366 |\n", "| clip_fraction | 0.405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.519 |\n", "| explained_variance | 0.693 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.109 |\n", "| policy_gradient_loss | -0.0255 |\n", "| value_loss | 0.782 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 63 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 680 |\n", "| time_elapsed | 9825 |\n", "| total_timesteps | 1392640 |\n", "| train/ | |\n", "| approx_kl | 0.18821175 |\n", "| clip_fraction | 0.35 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.549 |\n", "| explained_variance | 0.852 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0402 |\n", "| policy_gradient_loss | -0.0422 |\n", "| value_loss | 0.64 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 738 |\n", "| ep_rew_mean | 63.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 681 |\n", "| time_elapsed | 9839 |\n", "| total_timesteps | 1394688 |\n", "| train/ | |\n", "| approx_kl | 0.28061566 |\n", "| clip_fraction | 0.286 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.44 |\n", "| explained_variance | 0.644 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.211 |\n", "| policy_gradient_loss | -0.000165 |\n", "| value_loss | 5.09 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 737 |\n", "| ep_rew_mean | 64.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 682 |\n", "| time_elapsed | 9853 |\n", "| total_timesteps | 1396736 |\n", "| train/ | |\n", "| approx_kl | 0.10598057 |\n", "| clip_fraction | 0.3 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.465 |\n", "| explained_variance | 0.783 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.825 |\n", "| policy_gradient_loss | -0.0117 |\n", "| value_loss | 2.23 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 741 |\n", "| ep_rew_mean | 68.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 683 |\n", "| time_elapsed | 9867 |\n", "| total_timesteps | 1398784 |\n", "| train/ | |\n", "| approx_kl | 0.1496094 |\n", "| clip_fraction | 0.343 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.584 |\n", "| explained_variance | 0.842 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.00694 |\n", "| policy_gradient_loss | -0.0346 |\n", "| value_loss | 1.25 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 738 |\n", "| ep_rew_mean | 66.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 684 |\n", "| time_elapsed | 9881 |\n", "| total_timesteps | 1400832 |\n", "| train/ | |\n", "| approx_kl | 0.35269552 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.373 |\n", "| explained_variance | 0.325 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.29 |\n", "| policy_gradient_loss | 0.06 |\n", "| value_loss | 22.6 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 741 |\n", "| ep_rew_mean | 70.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 685 |\n", "| time_elapsed | 9895 |\n", "| total_timesteps | 1402880 |\n", "| train/ | |\n", "| approx_kl | 0.18114881 |\n", "| clip_fraction | 0.282 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.376 |\n", "| explained_variance | 0.741 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.18 |\n", "| policy_gradient_loss | -0.0178 |\n", "| value_loss | 2.63 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 739 |\n", "| ep_rew_mean | 70.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 686 |\n", "| time_elapsed | 9908 |\n", "| total_timesteps | 1404928 |\n", "| train/ | |\n", "| approx_kl | 0.2386382 |\n", "| clip_fraction | 0.395 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.413 |\n", "| explained_variance | 0.65 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.827 |\n", "| policy_gradient_loss | 0.0204 |\n", "| value_loss | 15.2 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 687 |\n", "| time_elapsed | 9922 |\n", "| total_timesteps | 1406976 |\n", "| train/ | |\n", "| approx_kl | 0.21112111 |\n", "| clip_fraction | 0.302 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.359 |\n", "| explained_variance | 0.802 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.223 |\n", "| policy_gradient_loss | -0.0168 |\n", "| value_loss | 1.14 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 688 |\n", "| time_elapsed | 9936 |\n", "| total_timesteps | 1409024 |\n", "| train/ | |\n", "| approx_kl | 0.19843048 |\n", "| clip_fraction | 0.326 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.462 |\n", "| explained_variance | 0.877 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0628 |\n", "| policy_gradient_loss | -0.0129 |\n", "| value_loss | 0.173 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 689 |\n", "| time_elapsed | 9950 |\n", "| total_timesteps | 1411072 |\n", "| train/ | |\n", "| approx_kl | 0.33175007 |\n", "| clip_fraction | 0.38 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.39 |\n", "| explained_variance | 0.588 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0325 |\n", "| policy_gradient_loss | -0.025 |\n", "| value_loss | 0.133 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 690 |\n", "| time_elapsed | 9964 |\n", "| total_timesteps | 1413120 |\n", "| train/ | |\n", "| approx_kl | 0.12810653 |\n", "| clip_fraction | 0.441 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.66 |\n", "| explained_variance | 0.14 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0383 |\n", "| policy_gradient_loss | -0.0222 |\n", "| value_loss | 0.0581 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 691 |\n", "| time_elapsed | 9977 |\n", "| total_timesteps | 1415168 |\n", "| train/ | |\n", "| approx_kl | 0.0569193 |\n", "| clip_fraction | 0.326 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.707 |\n", "| explained_variance | 0.739 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0275 |\n", "| policy_gradient_loss | -0.0215 |\n", "| value_loss | 0.0428 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 692 |\n", "| time_elapsed | 9991 |\n", "| total_timesteps | 1417216 |\n", "| train/ | |\n", "| approx_kl | 0.06454541 |\n", "| clip_fraction | 0.301 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.906 |\n", "| explained_variance | -0.129 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0497 |\n", "| policy_gradient_loss | -0.0286 |\n", "| value_loss | 0.0132 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 693 |\n", "| time_elapsed | 10005 |\n", "| total_timesteps | 1419264 |\n", "| train/ | |\n", "| approx_kl | 0.07966368 |\n", "| clip_fraction | 0.306 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.919 |\n", "| explained_variance | 0.53 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0336 |\n", "| policy_gradient_loss | -0.0426 |\n", "| value_loss | 0.1 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 694 |\n", "| time_elapsed | 10019 |\n", "| total_timesteps | 1421312 |\n", "| train/ | |\n", "| approx_kl | 0.09968886 |\n", "| clip_fraction | 0.241 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.567 |\n", "| explained_variance | -2.26 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0486 |\n", "| policy_gradient_loss | -0.0354 |\n", "| value_loss | 0.0258 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 695 |\n", "| time_elapsed | 10032 |\n", "| total_timesteps | 1423360 |\n", "| train/ | |\n", "| approx_kl | 0.06403296 |\n", "| clip_fraction | 0.248 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.703 |\n", "| explained_variance | 0.0185 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0152 |\n", "| policy_gradient_loss | -0.0317 |\n", "| value_loss | 0.0173 |\n", "----------------------------------------\n", "Eval num_timesteps=1425000, episode_reward=121.30 +/- 111.76\n", "Episode length: 739.40 +/- 140.43\n", "-----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 739 |\n", "| mean_reward | 121 |\n", "| time/ | |\n", "| total_timesteps | 1425000 |\n", "| train/ | |\n", "| approx_kl | 0.053441763 |\n", "| clip_fraction | 0.285 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.689 |\n", "| explained_variance | -1.16 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0639 |\n", "| policy_gradient_loss | -0.0431 |\n", "| value_loss | 0.0146 |\n", "-----------------------------------------\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 696 |\n", "| time_elapsed | 10065 |\n", "| total_timesteps | 1425408 |\n", "---------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 697 |\n", "| time_elapsed | 10078 |\n", "| total_timesteps | 1427456 |\n", "| train/ | |\n", "| approx_kl | 0.10139216 |\n", "| clip_fraction | 0.32 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.711 |\n", "| explained_variance | -0.231 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0796 |\n", "| policy_gradient_loss | -0.0513 |\n", "| value_loss | 0.0204 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 698 |\n", "| time_elapsed | 10092 |\n", "| total_timesteps | 1429504 |\n", "| train/ | |\n", "| approx_kl | 0.12009242 |\n", "| clip_fraction | 0.382 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.791 |\n", "| explained_variance | 0.226 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0674 |\n", "| policy_gradient_loss | -0.0573 |\n", "| value_loss | 0.0168 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 735 |\n", "| ep_rew_mean | 70.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 699 |\n", "| time_elapsed | 10106 |\n", "| total_timesteps | 1431552 |\n", "| train/ | |\n", "| approx_kl | 0.080891386 |\n", "| clip_fraction | 0.313 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.77 |\n", "| explained_variance | 0.0382 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0608 |\n", "| policy_gradient_loss | -0.0527 |\n", "| value_loss | 0.00483 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 71.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 700 |\n", "| time_elapsed | 10120 |\n", "| total_timesteps | 1433600 |\n", "| train/ | |\n", "| approx_kl | 0.08367188 |\n", "| clip_fraction | 0.329 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.688 |\n", "| explained_variance | 0.414 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0682 |\n", "| policy_gradient_loss | -0.0445 |\n", "| value_loss | 0.0112 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 71.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 701 |\n", "| time_elapsed | 10134 |\n", "| total_timesteps | 1435648 |\n", "| train/ | |\n", "| approx_kl | 0.3230015 |\n", "| clip_fraction | 0.569 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.733 |\n", "| explained_variance | 0.636 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0468 |\n", "| policy_gradient_loss | -0.04 |\n", "| value_loss | 0.218 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1e+03 |\n", "| ep_rew_mean | 71.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 702 |\n", "| time_elapsed | 10147 |\n", "| total_timesteps | 1437696 |\n", "| train/ | |\n", "| approx_kl | 0.07795051 |\n", "| clip_fraction | 0.41 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.853 |\n", "| explained_variance | -0.588 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0572 |\n", "| policy_gradient_loss | -0.0359 |\n", "| value_loss | 0.148 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.06e+03 |\n", "| ep_rew_mean | 71.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 703 |\n", "| time_elapsed | 10161 |\n", "| total_timesteps | 1439744 |\n", "| train/ | |\n", "| approx_kl | 0.06250182 |\n", "| clip_fraction | 0.323 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.858 |\n", "| explained_variance | -1.32 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0358 |\n", "| policy_gradient_loss | -0.0259 |\n", "| value_loss | 0.0145 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.06e+03 |\n", "| ep_rew_mean | 71.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 704 |\n", "| time_elapsed | 10175 |\n", "| total_timesteps | 1441792 |\n", "| train/ | |\n", "| approx_kl | 0.17703551 |\n", "| clip_fraction | 0.477 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.786 |\n", "| explained_variance | 0.705 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0401 |\n", "| policy_gradient_loss | -0.0607 |\n", "| value_loss | 0.244 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 72.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 705 |\n", "| time_elapsed | 10189 |\n", "| total_timesteps | 1443840 |\n", "| train/ | |\n", "| approx_kl | 0.29780114 |\n", "| clip_fraction | 0.482 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.802 |\n", "| explained_variance | 0.442 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0868 |\n", "| policy_gradient_loss | -0.071 |\n", "| value_loss | 0.197 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 73.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 706 |\n", "| time_elapsed | 10203 |\n", "| total_timesteps | 1445888 |\n", "| train/ | |\n", "| approx_kl | 0.19933268 |\n", "| clip_fraction | 0.477 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.741 |\n", "| explained_variance | 0.614 |\n", "| learning_rate | 0.0003 |\n", "| loss | -0.0732 |\n", "| policy_gradient_loss | -0.0515 |\n", "| value_loss | 0.705 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.08e+03 |\n", "| ep_rew_mean | 72.4 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 707 |\n", "| time_elapsed | 10217 |\n", "| total_timesteps | 1447936 |\n", "| train/ | |\n", "| approx_kl | 0.16085923 |\n", "| clip_fraction | 0.419 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.712 |\n", "| explained_variance | 0.726 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0874 |\n", "| policy_gradient_loss | -0.0524 |\n", "| value_loss | 0.607 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 76 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 708 |\n", "| time_elapsed | 10231 |\n", "| total_timesteps | 1449984 |\n", "| train/ | |\n", "| approx_kl | 0.118496746 |\n", "| clip_fraction | 0.362 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.669 |\n", "| explained_variance | 0.396 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.412 |\n", "| policy_gradient_loss | -0.0233 |\n", "| value_loss | 1.76 |\n", "-----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 76 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 709 |\n", "| time_elapsed | 10245 |\n", "| total_timesteps | 1452032 |\n", "| train/ | |\n", "| approx_kl | 0.2873941 |\n", "| clip_fraction | 0.487 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.543 |\n", "| explained_variance | 0.102 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.569 |\n", "| policy_gradient_loss | 0.0367 |\n", "| value_loss | 22 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 76.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 710 |\n", "| time_elapsed | 10260 |\n", "| total_timesteps | 1454080 |\n", "| train/ | |\n", "| approx_kl | 0.21956778 |\n", "| clip_fraction | 0.353 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.512 |\n", "| explained_variance | 0.722 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0685 |\n", "| policy_gradient_loss | -0.0426 |\n", "| value_loss | 0.811 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 76.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 711 |\n", "| time_elapsed | 10274 |\n", "| total_timesteps | 1456128 |\n", "| train/ | |\n", "| approx_kl | 0.26665372 |\n", "| clip_fraction | 0.415 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.577 |\n", "| explained_variance | 0.843 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0897 |\n", "| policy_gradient_loss | -0.0466 |\n", "| value_loss | 0.672 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 77.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 712 |\n", "| time_elapsed | 10288 |\n", "| total_timesteps | 1458176 |\n", "| train/ | |\n", "| approx_kl | 0.18680984 |\n", "| clip_fraction | 0.394 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.599 |\n", "| explained_variance | 0.641 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.0711 |\n", "| policy_gradient_loss | -0.0195 |\n", "| value_loss | 2.05 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 78.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 713 |\n", "| time_elapsed | 10302 |\n", "| total_timesteps | 1460224 |\n", "| train/ | |\n", "| approx_kl | 0.13580064 |\n", "| clip_fraction | 0.328 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.554 |\n", "| explained_variance | 0.619 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.44 |\n", "| policy_gradient_loss | -0.00511 |\n", "| value_loss | 6.79 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 79.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 714 |\n", "| time_elapsed | 10316 |\n", "| total_timesteps | 1462272 |\n", "| train/ | |\n", "| approx_kl | 0.18467487 |\n", "| clip_fraction | 0.314 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.54 |\n", "| explained_variance | 0.828 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.244 |\n", "| policy_gradient_loss | -0.04 |\n", "| value_loss | 1.49 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 82.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 715 |\n", "| time_elapsed | 10330 |\n", "| total_timesteps | 1464320 |\n", "| train/ | |\n", "| approx_kl | 0.12358463 |\n", "| clip_fraction | 0.367 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.624 |\n", "| explained_variance | 0.856 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.837 |\n", "| policy_gradient_loss | -0.0106 |\n", "| value_loss | 3.32 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 80.5 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 716 |\n", "| time_elapsed | 10344 |\n", "| total_timesteps | 1466368 |\n", "| train/ | |\n", "| approx_kl | 0.10321774 |\n", "| clip_fraction | 0.296 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.479 |\n", "| explained_variance | 0.672 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.941 |\n", "| policy_gradient_loss | 0.00206 |\n", "| value_loss | 21.8 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 82.3 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 717 |\n", "| time_elapsed | 10357 |\n", "| total_timesteps | 1468416 |\n", "| train/ | |\n", "| approx_kl | 0.20363739 |\n", "| clip_fraction | 0.348 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.489 |\n", "| explained_variance | 0.7 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.64 |\n", "| policy_gradient_loss | -0.00332 |\n", "| value_loss | 10.9 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 87.7 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 718 |\n", "| time_elapsed | 10371 |\n", "| total_timesteps | 1470464 |\n", "| train/ | |\n", "| approx_kl | 0.18716374 |\n", "| clip_fraction | 0.379 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.541 |\n", "| explained_variance | 0.855 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.976 |\n", "| policy_gradient_loss | 0.00499 |\n", "| value_loss | 7.01 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 90.8 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 719 |\n", "| time_elapsed | 10385 |\n", "| total_timesteps | 1472512 |\n", "| train/ | |\n", "| approx_kl | 0.66923034 |\n", "| clip_fraction | 0.434 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.386 |\n", "| explained_variance | 0.704 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.1 |\n", "| policy_gradient_loss | 0.0507 |\n", "| value_loss | 33.7 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 89.6 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 720 |\n", "| time_elapsed | 10399 |\n", "| total_timesteps | 1474560 |\n", "| train/ | |\n", "| approx_kl | 0.3336941 |\n", "| clip_fraction | 0.371 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.444 |\n", "| explained_variance | 0.762 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.339 |\n", "| policy_gradient_loss | -0.0064 |\n", "| value_loss | 2.62 |\n", "---------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 91.1 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 721 |\n", "| time_elapsed | 10413 |\n", "| total_timesteps | 1476608 |\n", "| train/ | |\n", "| approx_kl | 0.3378224 |\n", "| clip_fraction | 0.418 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.549 |\n", "| explained_variance | 0.857 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.129 |\n", "| policy_gradient_loss | -0.0347 |\n", "| value_loss | 1.25 |\n", "---------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 92 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 722 |\n", "| time_elapsed | 10427 |\n", "| total_timesteps | 1478656 |\n", "| train/ | |\n", "| approx_kl | 0.21876013 |\n", "| clip_fraction | 0.443 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.572 |\n", "| explained_variance | 0.846 |\n", "| learning_rate | 0.0003 |\n", "| loss | 1.13 |\n", "| policy_gradient_loss | 0.0194 |\n", "| value_loss | 6.55 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 96.9 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 723 |\n", "| time_elapsed | 10441 |\n", "| total_timesteps | 1480704 |\n", "| train/ | |\n", "| approx_kl | 0.17348605 |\n", "| clip_fraction | 0.364 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.553 |\n", "| explained_variance | 0.874 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.83 |\n", "| policy_gradient_loss | -0.00326 |\n", "| value_loss | 8.46 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 99.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 724 |\n", "| time_elapsed | 10455 |\n", "| total_timesteps | 1482752 |\n", "| train/ | |\n", "| approx_kl | 0.17905894 |\n", "| clip_fraction | 0.301 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.494 |\n", "| explained_variance | 0.83 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3 |\n", "| policy_gradient_loss | 0.0355 |\n", "| value_loss | 34.1 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 99.2 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 725 |\n", "| time_elapsed | 10469 |\n", "| total_timesteps | 1484800 |\n", "| train/ | |\n", "| approx_kl | 0.16372603 |\n", "| clip_fraction | 0.301 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.492 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.59 |\n", "| policy_gradient_loss | -0.0142 |\n", "| value_loss | 8.68 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.03e+03 |\n", "| ep_rew_mean | 103 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 726 |\n", "| time_elapsed | 10483 |\n", "| total_timesteps | 1486848 |\n", "| train/ | |\n", "| approx_kl | 0.12201724 |\n", "| clip_fraction | 0.296 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.477 |\n", "| explained_variance | 0.951 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.74 |\n", "| policy_gradient_loss | -0.012 |\n", "| value_loss | 15.9 |\n", "----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.03e+03 |\n", "| ep_rew_mean | 101 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 727 |\n", "| time_elapsed | 10496 |\n", "| total_timesteps | 1488896 |\n", "| train/ | |\n", "| approx_kl | 0.11655447 |\n", "| clip_fraction | 0.34 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.568 |\n", "| explained_variance | 0.966 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.02 |\n", "| policy_gradient_loss | -0.0185 |\n", "| value_loss | 4.94 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.03e+03 |\n", "| ep_rew_mean | 103 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 728 |\n", "| time_elapsed | 10510 |\n", "| total_timesteps | 1490944 |\n", "| train/ | |\n", "| approx_kl | 0.105847254 |\n", "| clip_fraction | 0.353 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.626 |\n", "| explained_variance | 0.987 |\n", "| learning_rate | 0.0003 |\n", "| loss | 0.659 |\n", "| policy_gradient_loss | -0.0149 |\n", "| value_loss | 2.87 |\n", "-----------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.03e+03 |\n", "| ep_rew_mean | 109 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 729 |\n", "| time_elapsed | 10524 |\n", "| total_timesteps | 1492992 |\n", "| train/ | |\n", "| approx_kl | 0.11300732 |\n", "| clip_fraction | 0.294 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.538 |\n", "| explained_variance | 0.95 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.09 |\n", "| policy_gradient_loss | -0.0146 |\n", "| value_loss | 5.44 |\n", "----------------------------------------\n", "---------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.03e+03 |\n", "| ep_rew_mean | 115 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 730 |\n", "| time_elapsed | 10538 |\n", "| total_timesteps | 1495040 |\n", "| train/ | |\n", "| approx_kl | 0.2546178 |\n", "| clip_fraction | 0.336 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.496 |\n", "| explained_variance | 0.821 |\n", "| learning_rate | 0.0003 |\n", "| loss | 30.9 |\n", "| policy_gradient_loss | 0.0275 |\n", "| value_loss | 49.7 |\n", "---------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.04e+03 |\n", "| ep_rew_mean | 121 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 731 |\n", "| time_elapsed | 10551 |\n", "| total_timesteps | 1497088 |\n", "| train/ | |\n", "| approx_kl | 0.085519984 |\n", "| clip_fraction | 0.288 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.655 |\n", "| explained_variance | 0.891 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.5 |\n", "| policy_gradient_loss | 0.0218 |\n", "| value_loss | 14.1 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.04e+03 |\n", "| ep_rew_mean | 129 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 732 |\n", "| time_elapsed | 10565 |\n", "| total_timesteps | 1499136 |\n", "| train/ | |\n", "| approx_kl | 0.088662475 |\n", "| clip_fraction | 0.28 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.611 |\n", "| explained_variance | 0.941 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.09 |\n", "| policy_gradient_loss | -0.00474 |\n", "| value_loss | 17.8 |\n", "-----------------------------------------\n", "Eval num_timesteps=1500000, episode_reward=256.70 +/- 107.51\n", "Episode length: 720.90 +/- 92.14\n", "----------------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 721 |\n", "| mean_reward | 257 |\n", "| time/ | |\n", "| total_timesteps | 1500000 |\n", "| train/ | |\n", "| approx_kl | 0.10802646 |\n", "| clip_fraction | 0.319 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.715 |\n", "| explained_variance | 0.977 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.14 |\n", "| policy_gradient_loss | 0.000293 |\n", "| value_loss | 7.97 |\n", "----------------------------------------\n", "New best mean reward!\n", "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.05e+03 |\n", "| ep_rew_mean | 133 |\n", "| time/ | |\n", "| fps | 141 |\n", "| iterations | 733 |\n", "| time_elapsed | 10600 |\n", "| total_timesteps | 1501184 |\n", "---------------------------------\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, callback=callback_list, tb_log_name=\"./tb/\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "f43b60f1", "metadata": { "execution": { "iopub.execute_input": "2024-05-11T00:41:04.758667Z", "iopub.status.busy": "2024-05-11T00:41:04.758009Z", "iopub.status.idle": "2024-05-11T00:41:05.318420Z", "shell.execute_reply": "2024-05-11T00:41:05.317582Z" }, "papermill": { "duration": 0.64119, "end_time": "2024-05-11T00:41:05.320859", "exception": false, "start_time": "2024-05-11T00:41:04.679669", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model and policy for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [], "dockerImageVersionId": 30698, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 10704.308805, "end_time": "2024-05-11T00:41:08.582907", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-10T21:42:44.274102", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }