Upload . with huggingface_hub

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.summary/0/events.out.tfevents.1673492200.brain2.usc.edu +3 -0
README.md +56 -0
checkpoint_p0/best_000014838_97320960_reward_12530.994.pth +3 -0
checkpoint_p0/checkpoint_000015058_98762752.pth +3 -0
checkpoint_p0/checkpoint_000015258_100073472.pth +3 -0
config.json +147 -0
git.diff +0 -0
replay.mp4 +3 -0
sf_log.txt +326 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text

.summary/0/events.out.tfevents.1673492200.brain2.usc.edu ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cbc79a9ea0ca3bcdcd8fcd24d550e316c6963c5d569d7f18776b1875138a5b2
+size 205847

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+model-index:
+- name: APPO
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: ant
+      type: ant
+    metrics:
+    - type: mean_reward
+      value: 12233.03 +/- 3798.23
+      name: mean_reward
+      verified: false
+---
+A(n) **APPO** model trained on the **ant** environment.
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+## Downloading the model
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_ant
+```
+## Using the model
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m sf_examples.brax.enjoy_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant
+```
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+## Training with this model
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m sf_examples.brax.train_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant --restart_behavior=resume --train_for_env_steps=10000000000
+```
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/best_000014838_97320960_reward_12530.994.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdea51de26ab48b950efd7b8a5916f05c52104b15e694c0610cb373939a1eb2b
+size 788471

checkpoint_p0/checkpoint_000015058_98762752.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20c1a00a5eaaa28aeb04b33a1d3f45724f3103c23b5cbe03ed36e2c51e1c2878
+size 788847

checkpoint_p0/checkpoint_000015258_100073472.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8de2e2737c38ff9e7c846f2432fca3ba08ed00db926a3ac5b32eed1812a86112
+size 788847

config.json ADDED Viewed

	@@ -0,0 +1,147 @@

+{
+  "help": false,
+  "algo": "APPO",
+  "env": "ant",
+  "experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
+  "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": 2322090,
+  "num_policies": 1,
+  "async_rl": false,
+  "serial_mode": true,
+  "batched_sampling": true,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 1,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 1,
+  "num_envs_per_worker": 1,
+  "batch_size": 32768,
+  "num_batches_per_epoch": 2,
+  "num_epochs": 5,
+  "rollout": 32,
+  "recurrence": 1,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 0.01,
+  "reward_clip": 1000.0,
+  "value_bootstrap": true,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.0,
+  "value_loss_coeff": 2.0,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "entropy",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.2,
+  "ppo_clip_value": 1.0,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-06,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 1.0,
+  "learning_rate": 0.0003,
+  "lr_schedule": "kl_adaptive_epoch",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.002,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 1.0,
+  "normalize_input": true,
+  "normalize_input_keys": null,
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [
+    0
+  ],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 3,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 20,
+  "heartbeat_reporting_interval": 180,
+  "train_for_env_steps": 100000000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 15,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": -1,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 5000000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    256,
+    128,
+    64
+  ],
+  "encoder_conv_architecture": "convnet_simple",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": false,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "elu",
+  "policy_initialization": "torch_default",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": false,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": true,
+  "env_gpu_observations": true,
+  "env_frameskip": 1,
+  "env_framestack": 1,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": false,
+  "with_wandb": true,
+  "wandb_user": null,
+  "wandb_project": "sample_factory",
+  "wandb_group": null,
+  "wandb_job_type": "SF",
+  "wandb_tags": [],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "env_agents": 2048,
+  "clamp_actions": false,
+  "clamp_rew_obs": false,
+  "command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=ant --use_rnn=False --num_epochs=5 --experiment=00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
+  "cli_args": {
+    "env": "ant",
+    "experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
+    "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
+    "seed": 2322090,
+    "num_epochs": 5,
+    "actor_worker_gpus": [
+      0
+    ],
+    "use_rnn": false,
+    "with_wandb": true,
+    "wandb_project": "sample_factory"
+  },
+  "git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
+  "git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
+  "train_script": "sf_examples.brax.train_brax",
+  "wandb_unique_id": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5_20230111_185633_673782"
+}

git.diff ADDED Viewed

File without changes

replay.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ac1b8101df4100c1291f8ec18054be94feae73f23047ffc3b2cbd0aaa2dc3ba
+size 1424460

sf_log.txt ADDED Viewed

	@@ -0,0 +1,326 @@

+[2023-01-11 18:56:45,735][451905] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/config.json...
+[2023-01-11 18:56:45,916][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 18:56:45,917][451905] Rollout worker 0 uses device cuda:0
+[2023-01-11 18:56:45,918][451905] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
+[2023-01-11 18:56:45,958][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 18:56:45,959][451905] InferenceWorker_p0-w0: min num requests: 1
+[2023-01-11 18:56:45,960][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 18:56:45,961][451905] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
+[2023-01-11 18:56:45,962][451905] Setting fixed seed 2322090
+[2023-01-11 18:56:45,962][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 18:56:45,963][451905] Initializing actor-critic model on device cuda:0
+[2023-01-11 18:56:45,963][451905] RunningMeanStd input shape: (87,)
+[2023-01-11 18:56:45,964][451905] RunningMeanStd input shape: (1,)
+[2023-01-11 18:56:46,032][451905] Created Actor Critic model with architecture:
+[2023-01-11 18:56:46,033][451905] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): MultiInputEncoder(
+    (encoders): ModuleDict(
+      (obs): MlpEncoder(
+        (mlp_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Linear)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Linear)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreIdentity()
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=64, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
+    (distribution_linear): Linear(in_features=64, out_features=8, bias=True)
+  )
+)
+[2023-01-11 18:56:46,035][451905] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-01-11 18:56:46,039][451905] No checkpoints found
+[2023-01-11 18:56:46,039][451905] Did not load from checkpoint, starting from scratch!
+[2023-01-11 18:56:46,040][451905] Initialized policy 0 weights for model version 0
+[2023-01-11 18:56:46,040][451905] LearnerWorker_p0 finished initialization!
+[2023-01-11 18:56:46,042][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 18:56:46,053][451905] Inference worker 0-0 is ready!
+[2023-01-11 18:56:46,054][451905] All inference workers are ready! Signal rollout workers to start!
+[2023-01-11 18:56:46,054][451905] EnvRunner 0-0 uses policy 0
+[2023-01-11 18:56:47,539][451905] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
+[2023-01-11 18:56:53,140][451905] reset() done, obs.shape=torch.Size([2048, 87])!
+[2023-01-11 18:56:53,149][451905] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 18:57:02,209][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 226.0. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 18:57:10,799][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 232.1. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 18:57:10,804][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-01-11 18:57:10,813][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 231.9. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 18:57:10,821][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 347.7. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 18:57:10,826][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-01-11 18:57:10,833][451905] Heartbeat connected on Batcher_0
+[2023-01-11 18:57:10,833][451905] Heartbeat connected on LearnerWorker_p0
+[2023-01-11 18:57:10,834][451905] Heartbeat connected on InferenceWorker_p0-w0
+[2023-01-11 18:57:10,834][451905] Heartbeat connected on RolloutWorker_w0
+[2023-01-11 18:57:15,256][451905] Fps is (10 sec: 191771.0, 60 sec: 38537.5, 300 sec: 38537.5). Total num frames: 851968. Throughput: 0: 10838.7. Samples: 239616. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 18:57:15,257][451905] Avg episode reward: [(0, '-548.169')]
+[2023-01-11 18:57:20,255][451905] Fps is (10 sec: 243129.2, 60 sec: 84619.1, 300 sec: 84619.1). Total num frames: 2293760. Throughput: 0: 73437.3. Samples: 1990656. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+[2023-01-11 18:57:20,256][451905] Avg episode reward: [(0, '-1154.421')]
+[2023-01-11 18:57:25,255][451905] Fps is (10 sec: 294955.1, 60 sec: 118391.7, 300 sec: 118391.7). Total num frames: 3801088. Throughput: 0: 116924.6. Samples: 3753984. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 18:57:25,256][451905] Avg episode reward: [(0, '44.684')]
+[2023-01-11 18:57:25,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth...
+[2023-01-11 18:57:30,257][451905] Fps is (10 sec: 294876.7, 60 sec: 141286.8, 300 sec: 141286.8). Total num frames: 5242880. Throughput: 0: 124895.3. Samples: 4634624. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+[2023-01-11 18:57:30,258][451905] Avg episode reward: [(0, '737.538')]
+[2023-01-11 18:57:30,260][451905] Saving new best policy, reward=737.538!
+[2023-01-11 18:57:35,255][451905] Fps is (10 sec: 288356.1, 60 sec: 158757.8, 300 sec: 158757.8). Total num frames: 6684672. Throughput: 0: 151559.2. Samples: 6381568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:57:35,255][451905] Avg episode reward: [(0, '1378.020')]
+[2023-01-11 18:57:35,261][451905] Saving new best policy, reward=1378.020!
+[2023-01-11 18:57:40,255][451905] Fps is (10 sec: 288413.4, 60 sec: 172513.8, 300 sec: 172513.8). Total num frames: 8126464. Throughput: 0: 213380.6. Samples: 8120320. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:57:40,256][451905] Avg episode reward: [(0, '2120.758')]
+[2023-01-11 18:57:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth...
+[2023-01-11 18:57:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth
+[2023-01-11 18:57:40,286][451905] Saving new best policy, reward=2120.758!
+[2023-01-11 18:57:45,256][451905] Fps is (10 sec: 288325.2, 60 sec: 183626.2, 300 sec: 183626.2). Total num frames: 9568256. Throughput: 0: 260450.5. Samples: 8978432. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+[2023-01-11 18:57:45,257][451905] Avg episode reward: [(0, '2764.891')]
+[2023-01-11 18:57:45,261][451905] Saving new best policy, reward=2764.891!
+[2023-01-11 18:57:50,256][451905] Fps is (10 sec: 288317.5, 60 sec: 192794.8, 300 sec: 192794.8). Total num frames: 11010048. Throughput: 0: 271299.6. Samples: 10704896. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:57:50,257][451905] Avg episode reward: [(0, '3270.653')]
+[2023-01-11 18:57:50,259][451905] Saving new best policy, reward=3270.653!
+[2023-01-11 18:57:55,273][451905] Fps is (10 sec: 294393.9, 60 sec: 235889.2, 300 sec: 201487.4). Total num frames: 12517376. Throughput: 0: 279978.7. Samples: 12451840. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:57:55,274][451905] Avg episode reward: [(0, '3854.397')]
+[2023-01-11 18:57:55,281][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth...
+[2023-01-11 18:57:55,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth
+[2023-01-11 18:57:55,299][451905] Saving new best policy, reward=3854.397!
+[2023-01-11 18:58:00,311][451905] Fps is (10 sec: 293300.4, 60 sec: 281933.6, 300 sec: 207841.6). Total num frames: 13959168. Throughput: 0: 289960.7. Samples: 13303808. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 18:58:00,312][451905] Avg episode reward: [(0, '4070.903')]
+[2023-01-11 18:58:00,314][451905] Saving new best policy, reward=4070.903!
+[2023-01-11 18:58:05,256][451905] Fps is (10 sec: 269160.0, 60 sec: 279271.9, 300 sec: 210856.2). Total num frames: 15204352. Throughput: 0: 286578.4. Samples: 14886912. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 18:58:05,257][451905] Avg episode reward: [(0, '4564.800')]
+[2023-01-11 18:58:05,259][451905] Saving new best policy, reward=4564.800!
+[2023-01-11 18:58:10,257][451905] Fps is (10 sec: 270162.6, 60 sec: 280069.5, 300 sec: 215880.0). Total num frames: 16646144. Throughput: 0: 286432.4. Samples: 16644096. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 18:58:10,258][451905] Avg episode reward: [(0, '5311.048')]
+[2023-01-11 18:58:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth...
+[2023-01-11 18:58:10,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth
+[2023-01-11 18:58:10,294][451905] Saving new best policy, reward=5311.048!
+[2023-01-11 18:58:15,298][451905] Fps is (10 sec: 293691.4, 60 sec: 288157.9, 300 sec: 220981.6). Total num frames: 18153472. Throughput: 0: 285594.1. Samples: 17498112. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:15,299][451905] Avg episode reward: [(0, '5501.038')]
+[2023-01-11 18:58:15,301][451905] Saving new best policy, reward=5501.038!
+[2023-01-11 18:58:20,257][451905] Fps is (10 sec: 294904.3, 60 sec: 288350.3, 300 sec: 224952.2). Total num frames: 19595264. Throughput: 0: 286340.2. Samples: 19267584. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:20,258][451905] Avg episode reward: [(0, '6154.250')]
+[2023-01-11 18:58:20,260][451905] Saving new best policy, reward=6154.250!
+[2023-01-11 18:58:25,256][451905] Fps is (10 sec: 289554.9, 60 sec: 287257.1, 300 sec: 228395.8). Total num frames: 21037056. Throughput: 0: 286253.8. Samples: 21002240. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 18:58:25,257][451905] Avg episode reward: [(0, '6654.922')]
+[2023-01-11 18:58:25,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth...
+[2023-01-11 18:58:25,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth
+[2023-01-11 18:58:25,283][451905] Saving new best policy, reward=6654.922!
+[2023-01-11 18:58:30,257][451905] Fps is (10 sec: 288365.8, 60 sec: 287265.0, 300 sec: 231482.2). Total num frames: 22478848. Throughput: 0: 286394.7. Samples: 21866496. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:30,257][451905] Avg episode reward: [(0, '6893.634')]
+[2023-01-11 18:58:30,263][451905] Saving new best policy, reward=6893.634!
+[2023-01-11 18:58:35,257][451905] Fps is (10 sec: 288352.7, 60 sec: 287256.5, 300 sec: 234267.8). Total num frames: 23920640. Throughput: 0: 286716.7. Samples: 23607296. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:35,257][451905] Avg episode reward: [(0, '7570.934')]
+[2023-01-11 18:58:35,259][451905] Saving new best policy, reward=7570.934!
+[2023-01-11 18:58:40,305][451905] Fps is (10 sec: 293488.6, 60 sec: 288115.2, 300 sec: 237296.8). Total num frames: 25427968. Throughput: 0: 286698.4. Samples: 25362432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:40,306][451905] Avg episode reward: [(0, '7604.016')]
+[2023-01-11 18:58:40,313][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth...
+[2023-01-11 18:58:40,335][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth
+[2023-01-11 18:58:40,337][451905] Saving new best policy, reward=7604.016!
+[2023-01-11 18:58:45,304][451905] Fps is (10 sec: 293524.2, 60 sec: 288127.3, 300 sec: 239576.2). Total num frames: 26869760. Throughput: 0: 286947.6. Samples: 26214400. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:45,305][451905] Avg episode reward: [(0, '7992.428')]
+[2023-01-11 18:58:45,307][451905] Saving new best policy, reward=7992.428!
+[2023-01-11 18:58:50,311][451905] Fps is (10 sec: 288192.1, 60 sec: 288094.3, 300 sec: 241643.3). Total num frames: 28311552. Throughput: 0: 290188.9. Samples: 27961344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:50,312][451905] Avg episode reward: [(0, '8068.034')]
+[2023-01-11 18:58:50,314][451905] Saving new best policy, reward=8068.034!
+[2023-01-11 18:58:55,254][451905] Fps is (10 sec: 289807.7, 60 sec: 287359.4, 300 sec: 243669.4). Total num frames: 29753344. Throughput: 0: 290106.7. Samples: 29698048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:58:55,254][451905] Avg episode reward: [(0, '8315.286')]
+[2023-01-11 18:58:55,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth...
+[2023-01-11 18:58:55,288][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth
+[2023-01-11 18:58:55,290][451905] Saving new best policy, reward=8315.286!
+[2023-01-11 18:59:00,277][451905] Fps is (10 sec: 289339.8, 60 sec: 287428.4, 300 sec: 245382.4). Total num frames: 31195136. Throughput: 0: 290311.6. Samples: 30556160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:00,278][451905] Avg episode reward: [(0, '8176.998')]
+[2023-01-11 18:59:05,255][451905] Fps is (10 sec: 288321.9, 60 sec: 290547.9, 300 sec: 247049.9). Total num frames: 32636928. Throughput: 0: 289827.1. Samples: 32309248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:05,256][451905] Avg episode reward: [(0, '8691.556')]
+[2023-01-11 18:59:05,261][451905] Saving new best policy, reward=8691.556!
+[2023-01-11 18:59:10,255][451905] Fps is (10 sec: 288987.4, 60 sec: 290549.8, 300 sec: 248555.8). Total num frames: 34078720. Throughput: 0: 289957.8. Samples: 34050048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:10,256][451905] Avg episode reward: [(0, '9060.014')]
+[2023-01-11 18:59:10,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth...
+[2023-01-11 18:59:10,289][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth
+[2023-01-11 18:59:10,291][451905] Saving new best policy, reward=9060.014!
+[2023-01-11 18:59:15,256][451905] Fps is (10 sec: 288344.1, 60 sec: 289653.9, 300 sec: 249955.9). Total num frames: 35520512. Throughput: 0: 289822.3. Samples: 34908160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:15,256][451905] Avg episode reward: [(0, '9002.059')]
+[2023-01-11 18:59:16,089][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000003
+[2023-01-11 18:59:20,257][451905] Fps is (10 sec: 288305.1, 60 sec: 289449.8, 300 sec: 251258.4). Total num frames: 36962304. Throughput: 0: 289901.7. Samples: 36653056. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:20,258][451905] Avg episode reward: [(0, '9044.166')]
+[2023-01-11 18:59:25,255][451905] Fps is (10 sec: 294934.7, 60 sec: 290550.4, 300 sec: 252912.7). Total num frames: 38469632. Throughput: 0: 290595.6. Samples: 38424576. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:25,255][451905] Avg episode reward: [(0, '9184.393')]
+[2023-01-11 18:59:25,267][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth...
+[2023-01-11 18:59:25,283][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth
+[2023-01-11 18:59:25,286][451905] Saving new best policy, reward=9184.393!
+[2023-01-11 18:59:30,254][451905] Fps is (10 sec: 294993.9, 60 sec: 290554.3, 300 sec: 254041.4). Total num frames: 39911424. Throughput: 0: 290907.9. Samples: 39290880. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 18:59:30,255][451905] Avg episode reward: [(0, '9521.790')]
+[2023-01-11 18:59:30,260][451905] Saving new best policy, reward=9521.790!
+[2023-01-11 18:59:35,256][451905] Fps is (10 sec: 288339.3, 60 sec: 290548.1, 300 sec: 255098.2). Total num frames: 41353216. Throughput: 0: 291540.0. Samples: 41064448. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 18:59:35,256][451905] Avg episode reward: [(0, '9662.545')]
+[2023-01-11 18:59:35,260][451905] Saving new best policy, reward=9662.545!
+[2023-01-11 18:59:36,895][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000000
+[2023-01-11 18:59:40,254][451905] Fps is (10 sec: 294911.6, 60 sec: 290789.3, 300 sec: 256487.2). Total num frames: 42860544. Throughput: 0: 291631.3. Samples: 42821632. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 18:59:40,255][451905] Avg episode reward: [(0, '10113.781')]
+[2023-01-11 18:59:40,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth...
+[2023-01-11 18:59:40,626][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth
+[2023-01-11 18:59:40,725][451905] Saving new best policy, reward=10113.781!
+[2023-01-11 18:59:45,302][451905] Fps is (10 sec: 273976.5, 60 sec: 287274.9, 300 sec: 256199.9). Total num frames: 44105728. Throughput: 0: 286379.5. Samples: 43450368. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 18:59:45,303][451905] Avg episode reward: [(0, '9794.349')]
+[2023-01-11 18:59:50,256][451905] Fps is (10 sec: 268652.8, 60 sec: 287529.4, 300 sec: 257174.2). Total num frames: 45547520. Throughput: 0: 286895.6. Samples: 45219840. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+[2023-01-11 18:59:50,257][451905] Avg episode reward: [(0, '10065.353')]
+[2023-01-11 18:59:55,299][451905] Fps is (10 sec: 295009.9, 60 sec: 288142.9, 300 sec: 258329.9). Total num frames: 47054848. Throughput: 0: 287262.5. Samples: 46989312. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+[2023-01-11 18:59:55,300][451905] Avg episode reward: [(0, '9859.390')]
+[2023-01-11 18:59:55,307][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth...
+[2023-01-11 18:59:55,604][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth
+[2023-01-11 19:00:00,257][451905] Fps is (10 sec: 281782.9, 60 sec: 286270.4, 300 sec: 258489.5). Total num frames: 48365568. Throughput: 0: 285255.5. Samples: 47745024. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+[2023-01-11 19:00:00,258][451905] Avg episode reward: [(0, '10446.865')]
+[2023-01-11 19:00:00,265][451905] Saving new best policy, reward=10441.701!
+[2023-01-11 19:00:05,257][451905] Fps is (10 sec: 269818.3, 60 sec: 285071.8, 300 sec: 258925.4). Total num frames: 49741824. Throughput: 0: 283580.0. Samples: 49414144. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:05,258][451905] Avg episode reward: [(0, '10311.739')]
+[2023-01-11 19:00:10,288][451905] Fps is (10 sec: 287452.8, 60 sec: 286016.4, 300 sec: 259963.3). Total num frames: 51249152. Throughput: 0: 283322.9. Samples: 51183616. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:10,289][451905] Avg episode reward: [(0, '10161.780')]
+[2023-01-11 19:00:10,297][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth...
+[2023-01-11 19:00:10,609][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth
+[2023-01-11 19:00:15,254][451905] Fps is (10 sec: 281893.6, 60 sec: 283996.8, 300 sec: 260061.5). Total num frames: 52559872. Throughput: 0: 281033.7. Samples: 51937280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:15,255][451905] Avg episode reward: [(0, '10633.747')]
+[2023-01-11 19:00:15,260][451905] Saving new best policy, reward=10633.747!
+[2023-01-11 19:00:20,254][451905] Fps is (10 sec: 269625.8, 60 sec: 282912.3, 300 sec: 260428.3). Total num frames: 53936128. Throughput: 0: 278947.0. Samples: 53616640. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:20,255][451905] Avg episode reward: [(0, '10862.651')]
+[2023-01-11 19:00:20,260][451905] Saving new best policy, reward=10862.651!
+[2023-01-11 19:00:25,258][451905] Fps is (10 sec: 275143.2, 60 sec: 280698.0, 300 sec: 260772.8). Total num frames: 55312384. Throughput: 0: 276913.6. Samples: 55283712. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:25,259][451905] Avg episode reward: [(0, '10839.951')]
+[2023-01-11 19:00:25,271][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth...
+[2023-01-11 19:00:25,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth
+[2023-01-11 19:00:30,257][451905] Fps is (10 sec: 281733.7, 60 sec: 280702.9, 300 sec: 261409.8). Total num frames: 56754176. Throughput: 0: 282500.2. Samples: 56150016. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:30,257][451905] Avg episode reward: [(0, '11148.982')]
+[2023-01-11 19:00:30,263][451905] Saving new best policy, reward=11148.982!
+[2023-01-11 19:00:35,256][451905] Fps is (10 sec: 288406.0, 60 sec: 280708.8, 300 sec: 262016.7). Total num frames: 58195968. Throughput: 0: 281075.5. Samples: 57868288. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 19:00:35,257][451905] Avg episode reward: [(0, '10900.463')]
+[2023-01-11 19:00:40,256][451905] Fps is (10 sec: 288368.4, 60 sec: 279612.4, 300 sec: 262596.8). Total num frames: 59637760. Throughput: 0: 280796.1. Samples: 59613184. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 19:00:40,257][451905] Avg episode reward: [(0, '10984.909')]
+[2023-01-11 19:00:40,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth...
+[2023-01-11 19:00:40,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth
+[2023-01-11 19:00:45,256][451905] Fps is (10 sec: 288366.8, 60 sec: 283114.2, 300 sec: 263152.0). Total num frames: 61079552. Throughput: 0: 283175.7. Samples: 60487680. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 19:00:45,257][451905] Avg episode reward: [(0, '10910.111')]
+[2023-01-11 19:00:50,257][451905] Fps is (10 sec: 288340.4, 60 sec: 282894.0, 300 sec: 263682.6). Total num frames: 62521344. Throughput: 0: 284082.9. Samples: 62197760. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:50,258][451905] Avg episode reward: [(0, '11054.682')]
+[2023-01-11 19:00:55,254][451905] Fps is (10 sec: 288421.9, 60 sec: 282015.9, 300 sec: 264195.5). Total num frames: 63963136. Throughput: 0: 283706.8. Samples: 63940608. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:00:55,255][451905] Avg episode reward: [(0, '11399.716')]
+[2023-01-11 19:00:55,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth...
+[2023-01-11 19:00:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth
+[2023-01-11 19:00:55,283][451905] Saving new best policy, reward=11399.716!
+[2023-01-11 19:01:00,283][451905] Fps is (10 sec: 294139.5, 60 sec: 284957.5, 300 sec: 264918.3). Total num frames: 65470464. Throughput: 0: 286125.9. Samples: 64821248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:00,284][451905] Avg episode reward: [(0, '11540.237')]
+[2023-01-11 19:01:00,286][451905] Saving new best policy, reward=11540.237!
+[2023-01-11 19:01:05,257][451905] Fps is (10 sec: 294816.1, 60 sec: 286174.4, 300 sec: 265410.5). Total num frames: 66912256. Throughput: 0: 288020.1. Samples: 66578432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:05,258][451905] Avg episode reward: [(0, '11359.174')]
+[2023-01-11 19:01:10,256][451905] Fps is (10 sec: 289131.9, 60 sec: 285234.3, 300 sec: 265857.5). Total num frames: 68354048. Throughput: 0: 289734.5. Samples: 68321280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:10,257][451905] Avg episode reward: [(0, '11490.578')]
+[2023-01-11 19:01:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth...
+[2023-01-11 19:01:10,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth
+[2023-01-11 19:01:15,256][451905] Fps is (10 sec: 288397.2, 60 sec: 287258.0, 300 sec: 266287.3). Total num frames: 69795840. Throughput: 0: 289956.3. Samples: 69197824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:15,256][451905] Avg episode reward: [(0, '11371.602')]
+[2023-01-11 19:01:20,286][451905] Fps is (10 sec: 294033.6, 60 sec: 289295.5, 300 sec: 266915.4). Total num frames: 71303168. Throughput: 0: 290714.1. Samples: 70959104. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:20,287][451905] Avg episode reward: [(0, '11555.144')]
+[2023-01-11 19:01:20,289][451905] Saving new best policy, reward=11555.144!
+[2023-01-11 19:01:25,284][451905] Fps is (10 sec: 294087.5, 60 sec: 290418.0, 300 sec: 267311.8). Total num frames: 72744960. Throughput: 0: 290182.9. Samples: 72679424. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:25,284][451905] Avg episode reward: [(0, '11422.452')]
+[2023-01-11 19:01:25,292][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth...
+[2023-01-11 19:01:25,308][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth
+[2023-01-11 19:01:30,307][451905] Fps is (10 sec: 287771.5, 60 sec: 290300.8, 300 sec: 267669.5). Total num frames: 74186752. Throughput: 0: 289626.0. Samples: 73535488. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:30,307][451905] Avg episode reward: [(0, '11773.974')]
+[2023-01-11 19:01:30,310][451905] Saving new best policy, reward=11773.974!
+[2023-01-11 19:01:35,256][451905] Fps is (10 sec: 282588.7, 60 sec: 289452.0, 300 sec: 267851.8). Total num frames: 75563008. Throughput: 0: 290365.8. Samples: 75264000. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:01:35,257][451905] Avg episode reward: [(0, '11651.489')]
+[2023-01-11 19:01:40,257][451905] Fps is (10 sec: 283208.3, 60 sec: 289446.6, 300 sec: 268208.0). Total num frames: 77004800. Throughput: 0: 290294.8. Samples: 77004800. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 19:01:40,258][451905] Avg episode reward: [(0, '11539.934')]
+[2023-01-11 19:01:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth...
+[2023-01-11 19:01:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth
+[2023-01-11 19:01:45,256][451905] Fps is (10 sec: 288367.2, 60 sec: 289452.1, 300 sec: 268554.1). Total num frames: 78446592. Throughput: 0: 289854.2. Samples: 77856768. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 19:01:45,256][451905] Avg episode reward: [(0, '11773.561')]
+[2023-01-11 19:01:50,310][451905] Fps is (10 sec: 293362.0, 60 sec: 290286.2, 300 sec: 277520.3). Total num frames: 79953920. Throughput: 0: 288884.3. Samples: 79593472. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 19:01:50,311][451905] Avg episode reward: [(0, '11745.324')]
+[2023-01-11 19:01:55,256][451905] Fps is (10 sec: 288354.0, 60 sec: 289440.8, 300 sec: 285913.7). Total num frames: 81330176. Throughput: 0: 289089.3. Samples: 81330176. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 19:01:55,257][451905] Avg episode reward: [(0, '11664.784')]
+[2023-01-11 19:01:55,263][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth...
+[2023-01-11 19:01:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth
+[2023-01-11 19:02:00,257][451905] Fps is (10 sec: 289899.3, 60 sec: 289577.9, 300 sec: 286195.9). Total num frames: 82837504. Throughput: 0: 288761.8. Samples: 82192384. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 19:02:00,257][451905] Avg episode reward: [(0, '11688.261')]
+[2023-01-11 19:02:05,257][451905] Fps is (10 sec: 301442.7, 60 sec: 290545.1, 300 sec: 286462.8). Total num frames: 84344832. Throughput: 0: 290369.5. Samples: 84017152. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 19:02:05,257][451905] Avg episode reward: [(0, '12067.370')]
+[2023-01-11 19:02:05,263][451905] Saving new best policy, reward=12067.370!
+[2023-01-11 19:02:10,291][451905] Fps is (10 sec: 293899.0, 60 sec: 290374.2, 300 sec: 287879.7). Total num frames: 85786624. Throughput: 0: 289767.1. Samples: 85721088. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 19:02:10,292][451905] Avg episode reward: [(0, '12220.248')]
+[2023-01-11 19:02:10,298][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth...
+[2023-01-11 19:02:10,315][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth
+[2023-01-11 19:02:10,317][451905] Saving new best policy, reward=12220.248!
+[2023-01-11 19:02:15,254][451905] Fps is (10 sec: 281876.8, 60 sec: 289458.6, 300 sec: 287693.2). Total num frames: 87162880. Throughput: 0: 290062.1. Samples: 86573056. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 19:02:15,255][451905] Avg episode reward: [(0, '11914.534')]
+[2023-01-11 19:02:20,257][451905] Fps is (10 sec: 289339.0, 60 sec: 289590.2, 300 sec: 287689.2). Total num frames: 88670208. Throughput: 0: 290580.4. Samples: 88340480. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 19:02:20,258][451905] Avg episode reward: [(0, '11682.962')]
+[2023-01-11 19:02:25,257][451905] Fps is (10 sec: 294827.6, 60 sec: 289580.1, 300 sec: 287691.6). Total num frames: 90112000. Throughput: 0: 290270.3. Samples: 90066944. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 19:02:25,258][451905] Avg episode reward: [(0, '12117.807')]
+[2023-01-11 19:02:25,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth...
+[2023-01-11 19:02:25,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth
+[2023-01-11 19:02:30,254][451905] Fps is (10 sec: 288451.7, 60 sec: 289704.3, 300 sec: 287692.5). Total num frames: 91553792. Throughput: 0: 290508.3. Samples: 90929152. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 19:02:30,255][451905] Avg episode reward: [(0, '12096.269')]
+[2023-01-11 19:02:35,313][451905] Fps is (10 sec: 293260.2, 60 sec: 291357.3, 300 sec: 287856.9). Total num frames: 93061120. Throughput: 0: 291476.5. Samples: 92710912. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 19:02:35,314][451905] Avg episode reward: [(0, '12384.883')]
+[2023-01-11 19:02:35,316][451905] Saving new best policy, reward=12384.883!
+[2023-01-11 19:02:40,308][451905] Fps is (10 sec: 293345.9, 60 sec: 291390.4, 300 sec: 287863.7). Total num frames: 94502912. Throughput: 0: 290937.8. Samples: 94437376. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2023-01-11 19:02:40,308][451905] Avg episode reward: [(0, '12423.622')]
+[2023-01-11 19:02:40,315][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth...
+[2023-01-11 19:02:40,338][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth
+[2023-01-11 19:02:40,340][451905] Saving new best policy, reward=12423.622!
+[2023-01-11 19:02:45,257][451905] Fps is (10 sec: 283393.6, 60 sec: 290535.8, 300 sec: 287690.8). Total num frames: 95879168. Throughput: 0: 291040.3. Samples: 95289344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:02:45,258][451905] Avg episode reward: [(0, '12489.164')]
+[2023-01-11 19:02:45,264][451905] Saving new best policy, reward=12489.164!
+[2023-01-11 19:02:50,257][451905] Fps is (10 sec: 283244.4, 60 sec: 289707.8, 300 sec: 287486.1). Total num frames: 97320960. Throughput: 0: 288358.6. Samples: 96993280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:02:50,257][451905] Avg episode reward: [(0, '12530.994')]
+[2023-01-11 19:02:50,259][451905] Saving new best policy, reward=12530.994!
+[2023-01-11 19:02:55,256][451905] Fps is (10 sec: 288402.6, 60 sec: 290543.9, 300 sec: 287523.7). Total num frames: 98762752. Throughput: 0: 289269.2. Samples: 98727936. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
+[2023-01-11 19:02:55,256][451905] Avg episode reward: [(0, '11972.568')]
+[2023-01-11 19:02:55,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015058_98762752.pth...
+[2023-01-11 19:02:55,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth
+[2023-01-11 19:02:59,748][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
+[2023-01-11 19:02:59,764][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth
+[2023-01-11 19:02:59,766][451905] Stopping InferenceWorker_p0-w0...
+[2023-01-11 19:02:59,767][451905] Stopping RolloutWorker_w0...
+[2023-01-11 19:02:59,767][451905] Stopping Batcher_0...
+[2023-01-11 19:02:59,767][451905] Component InferenceWorker_p0-w0 stopped!
+[2023-01-11 19:02:59,768][451905] Component RolloutWorker_w0 stopped!
+[2023-01-11 19:02:59,768][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
+[2023-01-11 19:02:59,783][451905] Stopping LearnerWorker_p0...
+[2023-01-11 19:02:59,784][451905] Component Batcher_0 stopped!
+[2023-01-11 19:02:59,784][451905] Component LearnerWorker_p0 stopped!
+[2023-01-11 19:02:59,784][451905] Batcher 0 profile tree view:
+batching: 0.3719, releasing_batches: 0.0723
+[2023-01-11 19:02:59,785][451905] InferenceWorker_p0-w0 profile tree view:
+update_model: 0.4822
+one_step: 0.0012
+  handle_policy_step: 62.2539
+    deserialize: 0.5489, stack: 0.0690, obs_to_device_normalize: 11.2582, forward: 39.1646, prepare_outputs: 6.9404, send_messages: 0.8335
+[2023-01-11 19:02:59,785][451905] Learner 0 profile tree view:
+misc: 0.0067, prepare_batch: 5.9127
+train: 90.4802
+  epoch_init: 0.0649, minibatch_init: 1.0155, losses_postprocess: 3.0830, kl_divergence: 5.8746, after_optimizer: 0.3524
+  calculate_losses: 18.6587
+    losses_init: 0.0395, forward_head: 3.0733, bptt_initial: 0.1318, bptt: 0.1409, tail: 9.1108, advantages_returns: 1.1916, losses: 3.6217
+  update: 59.4683
+    clip: 9.0914
+[2023-01-11 19:02:59,785][451905] RolloutWorker_w0 profile tree view:
+wait_for_trajectories: 0.0886, enqueue_policy_requests: 5.8259, process_policy_outputs: 3.7482, env_step: 157.1659, finalize_trajectories: 0.1642, complete_rollouts: 0.0683
+post_env_step: 20.3344
+  process_env_step: 8.2187
+[2023-01-11 19:02:59,785][451905] Loop Runner_EvtLoop terminating...
+[2023-01-11 19:02:59,786][451905] Runner profile tree view:
+main_loop: 373.8249
+[2023-01-11 19:02:59,786][451905] Collected {0: 100073472}, FPS: 267701.5