apetrenko commited on
Commit
13d12ce
1 Parent(s): a6f69f3

Upload . with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1673492200.brain2.usc.edu ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cbc79a9ea0ca3bcdcd8fcd24d550e316c6963c5d569d7f18776b1875138a5b2
3
+ size 205847
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: ant
15
+ type: ant
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 12233.03 +/- 3798.23
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **ant** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_ant
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m sf_examples.brax.enjoy_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m sf_examples.brax.train_brax --algo=APPO --env=ant --train_dir=./train_dir --experiment=sample_factory_brax_ant --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000014838_97320960_reward_12530.994.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdea51de26ab48b950efd7b8a5916f05c52104b15e694c0610cb373939a1eb2b
3
+ size 788471
checkpoint_p0/checkpoint_000015058_98762752.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c1a00a5eaaa28aeb04b33a1d3f45724f3103c23b5cbe03ed36e2c51e1c2878
3
+ size 788847
checkpoint_p0/checkpoint_000015258_100073472.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de2e2737c38ff9e7c846f2432fca3ba08ed00db926a3ac5b32eed1812a86112
3
+ size 788847
config.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "ant",
5
+ "experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
6
+ "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": 2322090,
10
+ "num_policies": 1,
11
+ "async_rl": false,
12
+ "serial_mode": true,
13
+ "batched_sampling": true,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 1,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 1,
19
+ "num_envs_per_worker": 1,
20
+ "batch_size": 32768,
21
+ "num_batches_per_epoch": 2,
22
+ "num_epochs": 5,
23
+ "rollout": 32,
24
+ "recurrence": 1,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 0.01,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": true,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 2.0,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "entropy",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
+ "ppo_clip_value": 1.0,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 1.0,
46
+ "learning_rate": 0.0003,
47
+ "lr_schedule": "kl_adaptive_epoch",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.002,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 1.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [
58
+ 0
59
+ ],
60
+ "set_workers_cpu_affinity": true,
61
+ "force_envs_single_thread": false,
62
+ "default_niceness": 0,
63
+ "log_to_file": true,
64
+ "experiment_summaries_interval": 3,
65
+ "flush_summaries_interval": 30,
66
+ "stats_avg": 100,
67
+ "summaries_use_frameskip": true,
68
+ "heartbeat_interval": 20,
69
+ "heartbeat_reporting_interval": 180,
70
+ "train_for_env_steps": 100000000,
71
+ "train_for_seconds": 10000000000,
72
+ "save_every_sec": 15,
73
+ "keep_checkpoints": 2,
74
+ "load_checkpoint_kind": "latest",
75
+ "save_milestones_sec": -1,
76
+ "save_best_every_sec": 5,
77
+ "save_best_metric": "reward",
78
+ "save_best_after": 5000000,
79
+ "benchmark": false,
80
+ "encoder_mlp_layers": [
81
+ 256,
82
+ 128,
83
+ 64
84
+ ],
85
+ "encoder_conv_architecture": "convnet_simple",
86
+ "encoder_conv_mlp_layers": [
87
+ 512
88
+ ],
89
+ "use_rnn": false,
90
+ "rnn_size": 512,
91
+ "rnn_type": "gru",
92
+ "rnn_num_layers": 1,
93
+ "decoder_mlp_layers": [],
94
+ "nonlinearity": "elu",
95
+ "policy_initialization": "torch_default",
96
+ "policy_init_gain": 1.0,
97
+ "actor_critic_share_weights": true,
98
+ "adaptive_stddev": false,
99
+ "continuous_tanh_scale": 0.0,
100
+ "initial_stddev": 1.0,
101
+ "use_env_info_cache": false,
102
+ "env_gpu_actions": true,
103
+ "env_gpu_observations": true,
104
+ "env_frameskip": 1,
105
+ "env_framestack": 1,
106
+ "pixel_format": "CHW",
107
+ "use_record_episode_statistics": false,
108
+ "with_wandb": true,
109
+ "wandb_user": null,
110
+ "wandb_project": "sample_factory",
111
+ "wandb_group": null,
112
+ "wandb_job_type": "SF",
113
+ "wandb_tags": [],
114
+ "with_pbt": false,
115
+ "pbt_mix_policies_in_one_env": true,
116
+ "pbt_period_env_steps": 5000000,
117
+ "pbt_start_mutation": 20000000,
118
+ "pbt_replace_fraction": 0.3,
119
+ "pbt_mutation_rate": 0.15,
120
+ "pbt_replace_reward_gap": 0.1,
121
+ "pbt_replace_reward_gap_absolute": 1e-06,
122
+ "pbt_optimize_gamma": false,
123
+ "pbt_target_objective": "true_objective",
124
+ "pbt_perturb_min": 1.1,
125
+ "pbt_perturb_max": 1.5,
126
+ "env_agents": 2048,
127
+ "clamp_actions": false,
128
+ "clamp_rew_obs": false,
129
+ "command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=ant --use_rnn=False --num_epochs=5 --experiment=00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
130
+ "cli_args": {
131
+ "env": "ant",
132
+ "experiment": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5",
133
+ "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
134
+ "seed": 2322090,
135
+ "num_epochs": 5,
136
+ "actor_worker_gpus": [
137
+ 0
138
+ ],
139
+ "use_rnn": false,
140
+ "with_wandb": true,
141
+ "wandb_project": "sample_factory"
142
+ },
143
+ "git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
144
+ "git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
145
+ "train_script": "sf_examples.brax.train_brax",
146
+ "wandb_unique_id": "00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5_20230111_185633_673782"
147
+ }
git.diff ADDED
File without changes
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac1b8101df4100c1291f8ec18054be94feae73f23047ffc3b2cbd0aaa2dc3ba
3
+ size 1424460
sf_log.txt ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-01-11 18:56:45,735][451905] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/config.json...
2
+ [2023-01-11 18:56:45,916][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
3
+ [2023-01-11 18:56:45,917][451905] Rollout worker 0 uses device cuda:0
4
+ [2023-01-11 18:56:45,918][451905] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
5
+ [2023-01-11 18:56:45,958][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
6
+ [2023-01-11 18:56:45,959][451905] InferenceWorker_p0-w0: min num requests: 1
7
+ [2023-01-11 18:56:45,960][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
8
+ [2023-01-11 18:56:45,961][451905] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
9
+ [2023-01-11 18:56:45,962][451905] Setting fixed seed 2322090
10
+ [2023-01-11 18:56:45,962][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
11
+ [2023-01-11 18:56:45,963][451905] Initializing actor-critic model on device cuda:0
12
+ [2023-01-11 18:56:45,963][451905] RunningMeanStd input shape: (87,)
13
+ [2023-01-11 18:56:45,964][451905] RunningMeanStd input shape: (1,)
14
+ [2023-01-11 18:56:46,032][451905] Created Actor Critic model with architecture:
15
+ [2023-01-11 18:56:46,033][451905] ActorCriticSharedWeights(
16
+ (obs_normalizer): ObservationNormalizer(
17
+ (running_mean_std): RunningMeanStdDictInPlace(
18
+ (running_mean_std): ModuleDict(
19
+ (obs): RunningMeanStdInPlace()
20
+ )
21
+ )
22
+ )
23
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
24
+ (encoder): MultiInputEncoder(
25
+ (encoders): ModuleDict(
26
+ (obs): MlpEncoder(
27
+ (mlp_head): RecursiveScriptModule(
28
+ original_name=Sequential
29
+ (0): RecursiveScriptModule(original_name=Linear)
30
+ (1): RecursiveScriptModule(original_name=ELU)
31
+ (2): RecursiveScriptModule(original_name=Linear)
32
+ (3): RecursiveScriptModule(original_name=ELU)
33
+ (4): RecursiveScriptModule(original_name=Linear)
34
+ (5): RecursiveScriptModule(original_name=ELU)
35
+ )
36
+ )
37
+ )
38
+ )
39
+ (core): ModelCoreIdentity()
40
+ (decoder): MlpDecoder(
41
+ (mlp): Identity()
42
+ )
43
+ (critic_linear): Linear(in_features=64, out_features=1, bias=True)
44
+ (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
45
+ (distribution_linear): Linear(in_features=64, out_features=8, bias=True)
46
+ )
47
+ )
48
+ [2023-01-11 18:56:46,035][451905] Using optimizer <class 'torch.optim.adam.Adam'>
49
+ [2023-01-11 18:56:46,039][451905] No checkpoints found
50
+ [2023-01-11 18:56:46,039][451905] Did not load from checkpoint, starting from scratch!
51
+ [2023-01-11 18:56:46,040][451905] Initialized policy 0 weights for model version 0
52
+ [2023-01-11 18:56:46,040][451905] LearnerWorker_p0 finished initialization!
53
+ [2023-01-11 18:56:46,042][451905] Using GPUs [0] for process 0 (actually maps to GPUs [0])
54
+ [2023-01-11 18:56:46,053][451905] Inference worker 0-0 is ready!
55
+ [2023-01-11 18:56:46,054][451905] All inference workers are ready! Signal rollout workers to start!
56
+ [2023-01-11 18:56:46,054][451905] EnvRunner 0-0 uses policy 0
57
+ [2023-01-11 18:56:47,539][451905] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
58
+ [2023-01-11 18:56:53,140][451905] reset() done, obs.shape=torch.Size([2048, 87])!
59
+ [2023-01-11 18:56:53,149][451905] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
60
+ [2023-01-11 18:57:02,209][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 226.0. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
61
+ [2023-01-11 18:57:10,799][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 232.1. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
62
+ [2023-01-11 18:57:10,804][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
63
+ [2023-01-11 18:57:10,813][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 231.9. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
64
+ [2023-01-11 18:57:10,821][451905] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 347.7. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
65
+ [2023-01-11 18:57:10,826][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth...
66
+ [2023-01-11 18:57:10,833][451905] Heartbeat connected on Batcher_0
67
+ [2023-01-11 18:57:10,833][451905] Heartbeat connected on LearnerWorker_p0
68
+ [2023-01-11 18:57:10,834][451905] Heartbeat connected on InferenceWorker_p0-w0
69
+ [2023-01-11 18:57:10,834][451905] Heartbeat connected on RolloutWorker_w0
70
+ [2023-01-11 18:57:15,256][451905] Fps is (10 sec: 191771.0, 60 sec: 38537.5, 300 sec: 38537.5). Total num frames: 851968. Throughput: 0: 10838.7. Samples: 239616. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
71
+ [2023-01-11 18:57:15,257][451905] Avg episode reward: [(0, '-548.169')]
72
+ [2023-01-11 18:57:20,255][451905] Fps is (10 sec: 243129.2, 60 sec: 84619.1, 300 sec: 84619.1). Total num frames: 2293760. Throughput: 0: 73437.3. Samples: 1990656. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
73
+ [2023-01-11 18:57:20,256][451905] Avg episode reward: [(0, '-1154.421')]
74
+ [2023-01-11 18:57:25,255][451905] Fps is (10 sec: 294955.1, 60 sec: 118391.7, 300 sec: 118391.7). Total num frames: 3801088. Throughput: 0: 116924.6. Samples: 3753984. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
75
+ [2023-01-11 18:57:25,256][451905] Avg episode reward: [(0, '44.684')]
76
+ [2023-01-11 18:57:25,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth...
77
+ [2023-01-11 18:57:30,257][451905] Fps is (10 sec: 294876.7, 60 sec: 141286.8, 300 sec: 141286.8). Total num frames: 5242880. Throughput: 0: 124895.3. Samples: 4634624. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
78
+ [2023-01-11 18:57:30,258][451905] Avg episode reward: [(0, '737.538')]
79
+ [2023-01-11 18:57:30,260][451905] Saving new best policy, reward=737.538!
80
+ [2023-01-11 18:57:35,255][451905] Fps is (10 sec: 288356.1, 60 sec: 158757.8, 300 sec: 158757.8). Total num frames: 6684672. Throughput: 0: 151559.2. Samples: 6381568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
81
+ [2023-01-11 18:57:35,255][451905] Avg episode reward: [(0, '1378.020')]
82
+ [2023-01-11 18:57:35,261][451905] Saving new best policy, reward=1378.020!
83
+ [2023-01-11 18:57:40,255][451905] Fps is (10 sec: 288413.4, 60 sec: 172513.8, 300 sec: 172513.8). Total num frames: 8126464. Throughput: 0: 213380.6. Samples: 8120320. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
84
+ [2023-01-11 18:57:40,256][451905] Avg episode reward: [(0, '2120.758')]
85
+ [2023-01-11 18:57:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth...
86
+ [2023-01-11 18:57:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000000_0.pth
87
+ [2023-01-11 18:57:40,286][451905] Saving new best policy, reward=2120.758!
88
+ [2023-01-11 18:57:45,256][451905] Fps is (10 sec: 288325.2, 60 sec: 183626.2, 300 sec: 183626.2). Total num frames: 9568256. Throughput: 0: 260450.5. Samples: 8978432. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
89
+ [2023-01-11 18:57:45,257][451905] Avg episode reward: [(0, '2764.891')]
90
+ [2023-01-11 18:57:45,261][451905] Saving new best policy, reward=2764.891!
91
+ [2023-01-11 18:57:50,256][451905] Fps is (10 sec: 288317.5, 60 sec: 192794.8, 300 sec: 192794.8). Total num frames: 11010048. Throughput: 0: 271299.6. Samples: 10704896. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
92
+ [2023-01-11 18:57:50,257][451905] Avg episode reward: [(0, '3270.653')]
93
+ [2023-01-11 18:57:50,259][451905] Saving new best policy, reward=3270.653!
94
+ [2023-01-11 18:57:55,273][451905] Fps is (10 sec: 294393.9, 60 sec: 235889.2, 300 sec: 201487.4). Total num frames: 12517376. Throughput: 0: 279978.7. Samples: 12451840. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
95
+ [2023-01-11 18:57:55,274][451905] Avg episode reward: [(0, '3854.397')]
96
+ [2023-01-11 18:57:55,281][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth...
97
+ [2023-01-11 18:57:55,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000000580_3801088.pth
98
+ [2023-01-11 18:57:55,299][451905] Saving new best policy, reward=3854.397!
99
+ [2023-01-11 18:58:00,311][451905] Fps is (10 sec: 293300.4, 60 sec: 281933.6, 300 sec: 207841.6). Total num frames: 13959168. Throughput: 0: 289960.7. Samples: 13303808. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
100
+ [2023-01-11 18:58:00,312][451905] Avg episode reward: [(0, '4070.903')]
101
+ [2023-01-11 18:58:00,314][451905] Saving new best policy, reward=4070.903!
102
+ [2023-01-11 18:58:05,256][451905] Fps is (10 sec: 269160.0, 60 sec: 279271.9, 300 sec: 210856.2). Total num frames: 15204352. Throughput: 0: 286578.4. Samples: 14886912. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
103
+ [2023-01-11 18:58:05,257][451905] Avg episode reward: [(0, '4564.800')]
104
+ [2023-01-11 18:58:05,259][451905] Saving new best policy, reward=4564.800!
105
+ [2023-01-11 18:58:10,257][451905] Fps is (10 sec: 270162.6, 60 sec: 280069.5, 300 sec: 215880.0). Total num frames: 16646144. Throughput: 0: 286432.4. Samples: 16644096. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
106
+ [2023-01-11 18:58:10,258][451905] Avg episode reward: [(0, '5311.048')]
107
+ [2023-01-11 18:58:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth...
108
+ [2023-01-11 18:58:10,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001240_8126464.pth
109
+ [2023-01-11 18:58:10,294][451905] Saving new best policy, reward=5311.048!
110
+ [2023-01-11 18:58:15,298][451905] Fps is (10 sec: 293691.4, 60 sec: 288157.9, 300 sec: 220981.6). Total num frames: 18153472. Throughput: 0: 285594.1. Samples: 17498112. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
111
+ [2023-01-11 18:58:15,299][451905] Avg episode reward: [(0, '5501.038')]
112
+ [2023-01-11 18:58:15,301][451905] Saving new best policy, reward=5501.038!
113
+ [2023-01-11 18:58:20,257][451905] Fps is (10 sec: 294904.3, 60 sec: 288350.3, 300 sec: 224952.2). Total num frames: 19595264. Throughput: 0: 286340.2. Samples: 19267584. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
114
+ [2023-01-11 18:58:20,258][451905] Avg episode reward: [(0, '6154.250')]
115
+ [2023-01-11 18:58:20,260][451905] Saving new best policy, reward=6154.250!
116
+ [2023-01-11 18:58:25,256][451905] Fps is (10 sec: 289554.9, 60 sec: 287257.1, 300 sec: 228395.8). Total num frames: 21037056. Throughput: 0: 286253.8. Samples: 21002240. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
117
+ [2023-01-11 18:58:25,257][451905] Avg episode reward: [(0, '6654.922')]
118
+ [2023-01-11 18:58:25,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth...
119
+ [2023-01-11 18:58:25,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000001910_12517376.pth
120
+ [2023-01-11 18:58:25,283][451905] Saving new best policy, reward=6654.922!
121
+ [2023-01-11 18:58:30,257][451905] Fps is (10 sec: 288365.8, 60 sec: 287265.0, 300 sec: 231482.2). Total num frames: 22478848. Throughput: 0: 286394.7. Samples: 21866496. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
122
+ [2023-01-11 18:58:30,257][451905] Avg episode reward: [(0, '6893.634')]
123
+ [2023-01-11 18:58:30,263][451905] Saving new best policy, reward=6893.634!
124
+ [2023-01-11 18:58:35,257][451905] Fps is (10 sec: 288352.7, 60 sec: 287256.5, 300 sec: 234267.8). Total num frames: 23920640. Throughput: 0: 286716.7. Samples: 23607296. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
125
+ [2023-01-11 18:58:35,257][451905] Avg episode reward: [(0, '7570.934')]
126
+ [2023-01-11 18:58:35,259][451905] Saving new best policy, reward=7570.934!
127
+ [2023-01-11 18:58:40,305][451905] Fps is (10 sec: 293488.6, 60 sec: 288115.2, 300 sec: 237296.8). Total num frames: 25427968. Throughput: 0: 286698.4. Samples: 25362432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
128
+ [2023-01-11 18:58:40,306][451905] Avg episode reward: [(0, '7604.016')]
129
+ [2023-01-11 18:58:40,313][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth...
130
+ [2023-01-11 18:58:40,335][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002540_16646144.pth
131
+ [2023-01-11 18:58:40,337][451905] Saving new best policy, reward=7604.016!
132
+ [2023-01-11 18:58:45,304][451905] Fps is (10 sec: 293524.2, 60 sec: 288127.3, 300 sec: 239576.2). Total num frames: 26869760. Throughput: 0: 286947.6. Samples: 26214400. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
133
+ [2023-01-11 18:58:45,305][451905] Avg episode reward: [(0, '7992.428')]
134
+ [2023-01-11 18:58:45,307][451905] Saving new best policy, reward=7992.428!
135
+ [2023-01-11 18:58:50,311][451905] Fps is (10 sec: 288192.1, 60 sec: 288094.3, 300 sec: 241643.3). Total num frames: 28311552. Throughput: 0: 290188.9. Samples: 27961344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
136
+ [2023-01-11 18:58:50,312][451905] Avg episode reward: [(0, '8068.034')]
137
+ [2023-01-11 18:58:50,314][451905] Saving new best policy, reward=8068.034!
138
+ [2023-01-11 18:58:55,254][451905] Fps is (10 sec: 289807.7, 60 sec: 287359.4, 300 sec: 243669.4). Total num frames: 29753344. Throughput: 0: 290106.7. Samples: 29698048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
139
+ [2023-01-11 18:58:55,254][451905] Avg episode reward: [(0, '8315.286')]
140
+ [2023-01-11 18:58:55,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth...
141
+ [2023-01-11 18:58:55,288][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003210_21037056.pth
142
+ [2023-01-11 18:58:55,290][451905] Saving new best policy, reward=8315.286!
143
+ [2023-01-11 18:59:00,277][451905] Fps is (10 sec: 289339.8, 60 sec: 287428.4, 300 sec: 245382.4). Total num frames: 31195136. Throughput: 0: 290311.6. Samples: 30556160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
144
+ [2023-01-11 18:59:00,278][451905] Avg episode reward: [(0, '8176.998')]
145
+ [2023-01-11 18:59:05,255][451905] Fps is (10 sec: 288321.9, 60 sec: 290547.9, 300 sec: 247049.9). Total num frames: 32636928. Throughput: 0: 289827.1. Samples: 32309248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
146
+ [2023-01-11 18:59:05,256][451905] Avg episode reward: [(0, '8691.556')]
147
+ [2023-01-11 18:59:05,261][451905] Saving new best policy, reward=8691.556!
148
+ [2023-01-11 18:59:10,255][451905] Fps is (10 sec: 288987.4, 60 sec: 290549.8, 300 sec: 248555.8). Total num frames: 34078720. Throughput: 0: 289957.8. Samples: 34050048. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
149
+ [2023-01-11 18:59:10,256][451905] Avg episode reward: [(0, '9060.014')]
150
+ [2023-01-11 18:59:10,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth...
151
+ [2023-01-11 18:59:10,289][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003880_25427968.pth
152
+ [2023-01-11 18:59:10,291][451905] Saving new best policy, reward=9060.014!
153
+ [2023-01-11 18:59:15,256][451905] Fps is (10 sec: 288344.1, 60 sec: 289653.9, 300 sec: 249955.9). Total num frames: 35520512. Throughput: 0: 289822.3. Samples: 34908160. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
154
+ [2023-01-11 18:59:15,256][451905] Avg episode reward: [(0, '9002.059')]
155
+ [2023-01-11 18:59:16,089][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000003
156
+ [2023-01-11 18:59:20,257][451905] Fps is (10 sec: 288305.1, 60 sec: 289449.8, 300 sec: 251258.4). Total num frames: 36962304. Throughput: 0: 289901.7. Samples: 36653056. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
157
+ [2023-01-11 18:59:20,258][451905] Avg episode reward: [(0, '9044.166')]
158
+ [2023-01-11 18:59:25,255][451905] Fps is (10 sec: 294934.7, 60 sec: 290550.4, 300 sec: 252912.7). Total num frames: 38469632. Throughput: 0: 290595.6. Samples: 38424576. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
159
+ [2023-01-11 18:59:25,255][451905] Avg episode reward: [(0, '9184.393')]
160
+ [2023-01-11 18:59:25,267][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth...
161
+ [2023-01-11 18:59:25,283][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000004540_29753344.pth
162
+ [2023-01-11 18:59:25,286][451905] Saving new best policy, reward=9184.393!
163
+ [2023-01-11 18:59:30,254][451905] Fps is (10 sec: 294993.9, 60 sec: 290554.3, 300 sec: 254041.4). Total num frames: 39911424. Throughput: 0: 290907.9. Samples: 39290880. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
164
+ [2023-01-11 18:59:30,255][451905] Avg episode reward: [(0, '9521.790')]
165
+ [2023-01-11 18:59:30,260][451905] Saving new best policy, reward=9521.790!
166
+ [2023-01-11 18:59:35,256][451905] Fps is (10 sec: 288339.3, 60 sec: 290548.1, 300 sec: 255098.2). Total num frames: 41353216. Throughput: 0: 291540.0. Samples: 41064448. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
167
+ [2023-01-11 18:59:35,256][451905] Avg episode reward: [(0, '9662.545')]
168
+ [2023-01-11 18:59:35,260][451905] Saving new best policy, reward=9662.545!
169
+ [2023-01-11 18:59:36,895][451905] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000000
170
+ [2023-01-11 18:59:40,254][451905] Fps is (10 sec: 294911.6, 60 sec: 290789.3, 300 sec: 256487.2). Total num frames: 42860544. Throughput: 0: 291631.3. Samples: 42821632. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
171
+ [2023-01-11 18:59:40,255][451905] Avg episode reward: [(0, '10113.781')]
172
+ [2023-01-11 18:59:40,265][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth...
173
+ [2023-01-11 18:59:40,626][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005200_34078720.pth
174
+ [2023-01-11 18:59:40,725][451905] Saving new best policy, reward=10113.781!
175
+ [2023-01-11 18:59:45,302][451905] Fps is (10 sec: 273976.5, 60 sec: 287274.9, 300 sec: 256199.9). Total num frames: 44105728. Throughput: 0: 286379.5. Samples: 43450368. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
176
+ [2023-01-11 18:59:45,303][451905] Avg episode reward: [(0, '9794.349')]
177
+ [2023-01-11 18:59:50,256][451905] Fps is (10 sec: 268652.8, 60 sec: 287529.4, 300 sec: 257174.2). Total num frames: 45547520. Throughput: 0: 286895.6. Samples: 45219840. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
178
+ [2023-01-11 18:59:50,257][451905] Avg episode reward: [(0, '10065.353')]
179
+ [2023-01-11 18:59:55,299][451905] Fps is (10 sec: 295009.9, 60 sec: 288142.9, 300 sec: 258329.9). Total num frames: 47054848. Throughput: 0: 287262.5. Samples: 46989312. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
180
+ [2023-01-11 18:59:55,300][451905] Avg episode reward: [(0, '9859.390')]
181
+ [2023-01-11 18:59:55,307][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth...
182
+ [2023-01-11 18:59:55,604][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005864_38469632.pth
183
+ [2023-01-11 19:00:00,257][451905] Fps is (10 sec: 281782.9, 60 sec: 286270.4, 300 sec: 258489.5). Total num frames: 48365568. Throughput: 0: 285255.5. Samples: 47745024. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
184
+ [2023-01-11 19:00:00,258][451905] Avg episode reward: [(0, '10446.865')]
185
+ [2023-01-11 19:00:00,265][451905] Saving new best policy, reward=10441.701!
186
+ [2023-01-11 19:00:05,257][451905] Fps is (10 sec: 269818.3, 60 sec: 285071.8, 300 sec: 258925.4). Total num frames: 49741824. Throughput: 0: 283580.0. Samples: 49414144. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
187
+ [2023-01-11 19:00:05,258][451905] Avg episode reward: [(0, '10311.739')]
188
+ [2023-01-11 19:00:10,288][451905] Fps is (10 sec: 287452.8, 60 sec: 286016.4, 300 sec: 259963.3). Total num frames: 51249152. Throughput: 0: 283322.9. Samples: 51183616. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
189
+ [2023-01-11 19:00:10,289][451905] Avg episode reward: [(0, '10161.780')]
190
+ [2023-01-11 19:00:10,297][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth...
191
+ [2023-01-11 19:00:10,609][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006528_42860544.pth
192
+ [2023-01-11 19:00:15,254][451905] Fps is (10 sec: 281893.6, 60 sec: 283996.8, 300 sec: 260061.5). Total num frames: 52559872. Throughput: 0: 281033.7. Samples: 51937280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
193
+ [2023-01-11 19:00:15,255][451905] Avg episode reward: [(0, '10633.747')]
194
+ [2023-01-11 19:00:15,260][451905] Saving new best policy, reward=10633.747!
195
+ [2023-01-11 19:00:20,254][451905] Fps is (10 sec: 269625.8, 60 sec: 282912.3, 300 sec: 260428.3). Total num frames: 53936128. Throughput: 0: 278947.0. Samples: 53616640. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
196
+ [2023-01-11 19:00:20,255][451905] Avg episode reward: [(0, '10862.651')]
197
+ [2023-01-11 19:00:20,260][451905] Saving new best policy, reward=10862.651!
198
+ [2023-01-11 19:00:25,258][451905] Fps is (10 sec: 275143.2, 60 sec: 280698.0, 300 sec: 260772.8). Total num frames: 55312384. Throughput: 0: 276913.6. Samples: 55283712. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
199
+ [2023-01-11 19:00:25,259][451905] Avg episode reward: [(0, '10839.951')]
200
+ [2023-01-11 19:00:25,271][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth...
201
+ [2023-01-11 19:00:25,297][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007168_47054848.pth
202
+ [2023-01-11 19:00:30,257][451905] Fps is (10 sec: 281733.7, 60 sec: 280702.9, 300 sec: 261409.8). Total num frames: 56754176. Throughput: 0: 282500.2. Samples: 56150016. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
203
+ [2023-01-11 19:00:30,257][451905] Avg episode reward: [(0, '11148.982')]
204
+ [2023-01-11 19:00:30,263][451905] Saving new best policy, reward=11148.982!
205
+ [2023-01-11 19:00:35,256][451905] Fps is (10 sec: 288406.0, 60 sec: 280708.8, 300 sec: 262016.7). Total num frames: 58195968. Throughput: 0: 281075.5. Samples: 57868288. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
206
+ [2023-01-11 19:00:35,257][451905] Avg episode reward: [(0, '10900.463')]
207
+ [2023-01-11 19:00:40,256][451905] Fps is (10 sec: 288368.4, 60 sec: 279612.4, 300 sec: 262596.8). Total num frames: 59637760. Throughput: 0: 280796.1. Samples: 59613184. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
208
+ [2023-01-11 19:00:40,257][451905] Avg episode reward: [(0, '10984.909')]
209
+ [2023-01-11 19:00:40,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth...
210
+ [2023-01-11 19:00:40,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007808_51249152.pth
211
+ [2023-01-11 19:00:45,256][451905] Fps is (10 sec: 288366.8, 60 sec: 283114.2, 300 sec: 263152.0). Total num frames: 61079552. Throughput: 0: 283175.7. Samples: 60487680. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
212
+ [2023-01-11 19:00:45,257][451905] Avg episode reward: [(0, '10910.111')]
213
+ [2023-01-11 19:00:50,257][451905] Fps is (10 sec: 288340.4, 60 sec: 282894.0, 300 sec: 263682.6). Total num frames: 62521344. Throughput: 0: 284082.9. Samples: 62197760. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
214
+ [2023-01-11 19:00:50,258][451905] Avg episode reward: [(0, '11054.682')]
215
+ [2023-01-11 19:00:55,254][451905] Fps is (10 sec: 288421.9, 60 sec: 282015.9, 300 sec: 264195.5). Total num frames: 63963136. Throughput: 0: 283706.8. Samples: 63940608. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
216
+ [2023-01-11 19:00:55,255][451905] Avg episode reward: [(0, '11399.716')]
217
+ [2023-01-11 19:00:55,264][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth...
218
+ [2023-01-11 19:00:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000008428_55312384.pth
219
+ [2023-01-11 19:00:55,283][451905] Saving new best policy, reward=11399.716!
220
+ [2023-01-11 19:01:00,283][451905] Fps is (10 sec: 294139.5, 60 sec: 284957.5, 300 sec: 264918.3). Total num frames: 65470464. Throughput: 0: 286125.9. Samples: 64821248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
221
+ [2023-01-11 19:01:00,284][451905] Avg episode reward: [(0, '11540.237')]
222
+ [2023-01-11 19:01:00,286][451905] Saving new best policy, reward=11540.237!
223
+ [2023-01-11 19:01:05,257][451905] Fps is (10 sec: 294816.1, 60 sec: 286174.4, 300 sec: 265410.5). Total num frames: 66912256. Throughput: 0: 288020.1. Samples: 66578432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
224
+ [2023-01-11 19:01:05,258][451905] Avg episode reward: [(0, '11359.174')]
225
+ [2023-01-11 19:01:10,256][451905] Fps is (10 sec: 289131.9, 60 sec: 285234.3, 300 sec: 265857.5). Total num frames: 68354048. Throughput: 0: 289734.5. Samples: 68321280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
226
+ [2023-01-11 19:01:10,257][451905] Avg episode reward: [(0, '11490.578')]
227
+ [2023-01-11 19:01:10,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth...
228
+ [2023-01-11 19:01:10,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009088_59637760.pth
229
+ [2023-01-11 19:01:15,256][451905] Fps is (10 sec: 288397.2, 60 sec: 287258.0, 300 sec: 266287.3). Total num frames: 69795840. Throughput: 0: 289956.3. Samples: 69197824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
230
+ [2023-01-11 19:01:15,256][451905] Avg episode reward: [(0, '11371.602')]
231
+ [2023-01-11 19:01:20,286][451905] Fps is (10 sec: 294033.6, 60 sec: 289295.5, 300 sec: 266915.4). Total num frames: 71303168. Throughput: 0: 290714.1. Samples: 70959104. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
232
+ [2023-01-11 19:01:20,287][451905] Avg episode reward: [(0, '11555.144')]
233
+ [2023-01-11 19:01:20,289][451905] Saving new best policy, reward=11555.144!
234
+ [2023-01-11 19:01:25,284][451905] Fps is (10 sec: 294087.5, 60 sec: 290418.0, 300 sec: 267311.8). Total num frames: 72744960. Throughput: 0: 290182.9. Samples: 72679424. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
235
+ [2023-01-11 19:01:25,284][451905] Avg episode reward: [(0, '11422.452')]
236
+ [2023-01-11 19:01:25,292][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth...
237
+ [2023-01-11 19:01:25,308][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000009748_63963136.pth
238
+ [2023-01-11 19:01:30,307][451905] Fps is (10 sec: 287771.5, 60 sec: 290300.8, 300 sec: 267669.5). Total num frames: 74186752. Throughput: 0: 289626.0. Samples: 73535488. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
239
+ [2023-01-11 19:01:30,307][451905] Avg episode reward: [(0, '11773.974')]
240
+ [2023-01-11 19:01:30,310][451905] Saving new best policy, reward=11773.974!
241
+ [2023-01-11 19:01:35,256][451905] Fps is (10 sec: 282588.7, 60 sec: 289452.0, 300 sec: 267851.8). Total num frames: 75563008. Throughput: 0: 290365.8. Samples: 75264000. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
242
+ [2023-01-11 19:01:35,257][451905] Avg episode reward: [(0, '11651.489')]
243
+ [2023-01-11 19:01:40,257][451905] Fps is (10 sec: 283208.3, 60 sec: 289446.6, 300 sec: 268208.0). Total num frames: 77004800. Throughput: 0: 290294.8. Samples: 77004800. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
244
+ [2023-01-11 19:01:40,258][451905] Avg episode reward: [(0, '11539.934')]
245
+ [2023-01-11 19:01:40,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth...
246
+ [2023-01-11 19:01:40,284][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000010418_68354048.pth
247
+ [2023-01-11 19:01:45,256][451905] Fps is (10 sec: 288367.2, 60 sec: 289452.1, 300 sec: 268554.1). Total num frames: 78446592. Throughput: 0: 289854.2. Samples: 77856768. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
248
+ [2023-01-11 19:01:45,256][451905] Avg episode reward: [(0, '11773.561')]
249
+ [2023-01-11 19:01:50,310][451905] Fps is (10 sec: 293362.0, 60 sec: 290286.2, 300 sec: 277520.3). Total num frames: 79953920. Throughput: 0: 288884.3. Samples: 79593472. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
250
+ [2023-01-11 19:01:50,311][451905] Avg episode reward: [(0, '11745.324')]
251
+ [2023-01-11 19:01:55,256][451905] Fps is (10 sec: 288354.0, 60 sec: 289440.8, 300 sec: 285913.7). Total num frames: 81330176. Throughput: 0: 289089.3. Samples: 81330176. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
252
+ [2023-01-11 19:01:55,257][451905] Avg episode reward: [(0, '11664.784')]
253
+ [2023-01-11 19:01:55,263][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth...
254
+ [2023-01-11 19:01:55,281][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011088_72744960.pth
255
+ [2023-01-11 19:02:00,257][451905] Fps is (10 sec: 289899.3, 60 sec: 289577.9, 300 sec: 286195.9). Total num frames: 82837504. Throughput: 0: 288761.8. Samples: 82192384. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
256
+ [2023-01-11 19:02:00,257][451905] Avg episode reward: [(0, '11688.261')]
257
+ [2023-01-11 19:02:05,257][451905] Fps is (10 sec: 301442.7, 60 sec: 290545.1, 300 sec: 286462.8). Total num frames: 84344832. Throughput: 0: 290369.5. Samples: 84017152. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
258
+ [2023-01-11 19:02:05,257][451905] Avg episode reward: [(0, '12067.370')]
259
+ [2023-01-11 19:02:05,263][451905] Saving new best policy, reward=12067.370!
260
+ [2023-01-11 19:02:10,291][451905] Fps is (10 sec: 293899.0, 60 sec: 290374.2, 300 sec: 287879.7). Total num frames: 85786624. Throughput: 0: 289767.1. Samples: 85721088. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
261
+ [2023-01-11 19:02:10,292][451905] Avg episode reward: [(0, '12220.248')]
262
+ [2023-01-11 19:02:10,298][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth...
263
+ [2023-01-11 19:02:10,315][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011738_77004800.pth
264
+ [2023-01-11 19:02:10,317][451905] Saving new best policy, reward=12220.248!
265
+ [2023-01-11 19:02:15,254][451905] Fps is (10 sec: 281876.8, 60 sec: 289458.6, 300 sec: 287693.2). Total num frames: 87162880. Throughput: 0: 290062.1. Samples: 86573056. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
266
+ [2023-01-11 19:02:15,255][451905] Avg episode reward: [(0, '11914.534')]
267
+ [2023-01-11 19:02:20,257][451905] Fps is (10 sec: 289339.0, 60 sec: 289590.2, 300 sec: 287689.2). Total num frames: 88670208. Throughput: 0: 290580.4. Samples: 88340480. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
268
+ [2023-01-11 19:02:20,258][451905] Avg episode reward: [(0, '11682.962')]
269
+ [2023-01-11 19:02:25,257][451905] Fps is (10 sec: 294827.6, 60 sec: 289580.1, 300 sec: 287691.6). Total num frames: 90112000. Throughput: 0: 290270.3. Samples: 90066944. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
270
+ [2023-01-11 19:02:25,258][451905] Avg episode reward: [(0, '12117.807')]
271
+ [2023-01-11 19:02:25,268][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth...
272
+ [2023-01-11 19:02:25,285][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012398_81330176.pth
273
+ [2023-01-11 19:02:30,254][451905] Fps is (10 sec: 288451.7, 60 sec: 289704.3, 300 sec: 287692.5). Total num frames: 91553792. Throughput: 0: 290508.3. Samples: 90929152. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
274
+ [2023-01-11 19:02:30,255][451905] Avg episode reward: [(0, '12096.269')]
275
+ [2023-01-11 19:02:35,313][451905] Fps is (10 sec: 293260.2, 60 sec: 291357.3, 300 sec: 287856.9). Total num frames: 93061120. Throughput: 0: 291476.5. Samples: 92710912. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
276
+ [2023-01-11 19:02:35,314][451905] Avg episode reward: [(0, '12384.883')]
277
+ [2023-01-11 19:02:35,316][451905] Saving new best policy, reward=12384.883!
278
+ [2023-01-11 19:02:40,308][451905] Fps is (10 sec: 293345.9, 60 sec: 291390.4, 300 sec: 287863.7). Total num frames: 94502912. Throughput: 0: 290937.8. Samples: 94437376. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
279
+ [2023-01-11 19:02:40,308][451905] Avg episode reward: [(0, '12423.622')]
280
+ [2023-01-11 19:02:40,315][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth...
281
+ [2023-01-11 19:02:40,338][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013078_85786624.pth
282
+ [2023-01-11 19:02:40,340][451905] Saving new best policy, reward=12423.622!
283
+ [2023-01-11 19:02:45,257][451905] Fps is (10 sec: 283393.6, 60 sec: 290535.8, 300 sec: 287690.8). Total num frames: 95879168. Throughput: 0: 291040.3. Samples: 95289344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
284
+ [2023-01-11 19:02:45,258][451905] Avg episode reward: [(0, '12489.164')]
285
+ [2023-01-11 19:02:45,264][451905] Saving new best policy, reward=12489.164!
286
+ [2023-01-11 19:02:50,257][451905] Fps is (10 sec: 283244.4, 60 sec: 289707.8, 300 sec: 287486.1). Total num frames: 97320960. Throughput: 0: 288358.6. Samples: 96993280. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
287
+ [2023-01-11 19:02:50,257][451905] Avg episode reward: [(0, '12530.994')]
288
+ [2023-01-11 19:02:50,259][451905] Saving new best policy, reward=12530.994!
289
+ [2023-01-11 19:02:55,256][451905] Fps is (10 sec: 288402.6, 60 sec: 290543.9, 300 sec: 287523.7). Total num frames: 98762752. Throughput: 0: 289269.2. Samples: 98727936. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
290
+ [2023-01-11 19:02:55,256][451905] Avg episode reward: [(0, '11972.568')]
291
+ [2023-01-11 19:02:55,266][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015058_98762752.pth...
292
+ [2023-01-11 19:02:55,292][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000013738_90112000.pth
293
+ [2023-01-11 19:02:59,748][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
294
+ [2023-01-11 19:02:59,764][451905] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000014408_94502912.pth
295
+ [2023-01-11 19:02:59,766][451905] Stopping InferenceWorker_p0-w0...
296
+ [2023-01-11 19:02:59,767][451905] Stopping RolloutWorker_w0...
297
+ [2023-01-11 19:02:59,767][451905] Stopping Batcher_0...
298
+ [2023-01-11 19:02:59,767][451905] Component InferenceWorker_p0-w0 stopped!
299
+ [2023-01-11 19:02:59,768][451905] Component RolloutWorker_w0 stopped!
300
+ [2023-01-11 19:02:59,768][451905] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/00_v083_brax_basic_benchmark_see_2322090_env_ant_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015258_100073472.pth...
301
+ [2023-01-11 19:02:59,783][451905] Stopping LearnerWorker_p0...
302
+ [2023-01-11 19:02:59,784][451905] Component Batcher_0 stopped!
303
+ [2023-01-11 19:02:59,784][451905] Component LearnerWorker_p0 stopped!
304
+ [2023-01-11 19:02:59,784][451905] Batcher 0 profile tree view:
305
+ batching: 0.3719, releasing_batches: 0.0723
306
+ [2023-01-11 19:02:59,785][451905] InferenceWorker_p0-w0 profile tree view:
307
+ update_model: 0.4822
308
+ one_step: 0.0012
309
+ handle_policy_step: 62.2539
310
+ deserialize: 0.5489, stack: 0.0690, obs_to_device_normalize: 11.2582, forward: 39.1646, prepare_outputs: 6.9404, send_messages: 0.8335
311
+ [2023-01-11 19:02:59,785][451905] Learner 0 profile tree view:
312
+ misc: 0.0067, prepare_batch: 5.9127
313
+ train: 90.4802
314
+ epoch_init: 0.0649, minibatch_init: 1.0155, losses_postprocess: 3.0830, kl_divergence: 5.8746, after_optimizer: 0.3524
315
+ calculate_losses: 18.6587
316
+ losses_init: 0.0395, forward_head: 3.0733, bptt_initial: 0.1318, bptt: 0.1409, tail: 9.1108, advantages_returns: 1.1916, losses: 3.6217
317
+ update: 59.4683
318
+ clip: 9.0914
319
+ [2023-01-11 19:02:59,785][451905] RolloutWorker_w0 profile tree view:
320
+ wait_for_trajectories: 0.0886, enqueue_policy_requests: 5.8259, process_policy_outputs: 3.7482, env_step: 157.1659, finalize_trajectories: 0.1642, complete_rollouts: 0.0683
321
+ post_env_step: 20.3344
322
+ process_env_step: 8.2187
323
+ [2023-01-11 19:02:59,785][451905] Loop Runner_EvtLoop terminating...
324
+ [2023-01-11 19:02:59,786][451905] Runner profile tree view:
325
+ main_loop: 373.8249
326
+ [2023-01-11 19:02:59,786][451905] Collected {0: 100073472}, FPS: 267701.5