apetrenko commited on
Commit
9068c97
1 Parent(s): 5fa4b73

Upload . with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1673492602.brain2.usc.edu ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fb2bb6e00f09ec189faa9cfd8f9793d154723ce6b6c3cae0c6de99b18f37d9e
3
+ size 137478
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: humanoid
15
+ type: humanoid
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 33847.53 +/- 6327.36
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **humanoid** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_humanoid
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m sf_examples.brax.enjoy_brax --algo=APPO --env=humanoid --train_dir=./train_dir --experiment=sample_factory_brax_humanoid
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m sf_examples.brax.train_brax --algo=APPO --env=humanoid --train_dir=./train_dir --experiment=sample_factory_brax_humanoid --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000014914_97779712_reward_35312.879.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83ea810de8cb1fe3fa77530ef544e2f2603d8c0bbf1145904d0fc132c60ed38
3
+ size 3499383
checkpoint_p0/checkpoint_000011080_72613888.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ef0093b0501c8500b078298cd42119ac5a7c0c13fdebaf10957d4a58cbd2a4
3
+ size 3499695
checkpoint_p0/checkpoint_000015264_100073472.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f562dde39eac5d324a2b12b28b6df1d310aa86483746ccc00a7c7b9b98b1d6
3
+ size 3499695
config.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "humanoid",
5
+ "experiment": "02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5",
6
+ "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": 2322090,
10
+ "num_policies": 1,
11
+ "async_rl": false,
12
+ "serial_mode": true,
13
+ "batched_sampling": true,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 1,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 1,
19
+ "num_envs_per_worker": 1,
20
+ "batch_size": 32768,
21
+ "num_batches_per_epoch": 2,
22
+ "num_epochs": 5,
23
+ "rollout": 32,
24
+ "recurrence": 1,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 0.01,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": true,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 2.0,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "entropy",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
+ "ppo_clip_value": 1.0,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 1.0,
46
+ "learning_rate": 0.0003,
47
+ "lr_schedule": "kl_adaptive_epoch",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.002,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 1.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [
58
+ 0
59
+ ],
60
+ "set_workers_cpu_affinity": true,
61
+ "force_envs_single_thread": false,
62
+ "default_niceness": 0,
63
+ "log_to_file": true,
64
+ "experiment_summaries_interval": 10,
65
+ "flush_summaries_interval": 30,
66
+ "stats_avg": 100,
67
+ "summaries_use_frameskip": true,
68
+ "heartbeat_interval": 20,
69
+ "heartbeat_reporting_interval": 180,
70
+ "train_for_env_steps": 100000000,
71
+ "train_for_seconds": 10000000000,
72
+ "save_every_sec": 120,
73
+ "keep_checkpoints": 2,
74
+ "load_checkpoint_kind": "latest",
75
+ "save_milestones_sec": -1,
76
+ "save_best_every_sec": 5,
77
+ "save_best_metric": "reward",
78
+ "save_best_after": 5000000,
79
+ "benchmark": false,
80
+ "encoder_mlp_layers": [
81
+ 512,
82
+ 256,
83
+ 128
84
+ ],
85
+ "encoder_conv_architecture": "convnet_simple",
86
+ "encoder_conv_mlp_layers": [
87
+ 512
88
+ ],
89
+ "use_rnn": false,
90
+ "rnn_size": 512,
91
+ "rnn_type": "gru",
92
+ "rnn_num_layers": 1,
93
+ "decoder_mlp_layers": [],
94
+ "nonlinearity": "elu",
95
+ "policy_initialization": "torch_default",
96
+ "policy_init_gain": 1.0,
97
+ "actor_critic_share_weights": true,
98
+ "adaptive_stddev": false,
99
+ "continuous_tanh_scale": 0.0,
100
+ "initial_stddev": 1.0,
101
+ "use_env_info_cache": false,
102
+ "env_gpu_actions": true,
103
+ "env_gpu_observations": true,
104
+ "env_frameskip": 1,
105
+ "env_framestack": 1,
106
+ "pixel_format": "CHW",
107
+ "use_record_episode_statistics": false,
108
+ "with_wandb": true,
109
+ "wandb_user": null,
110
+ "wandb_project": "sample_factory",
111
+ "wandb_group": null,
112
+ "wandb_job_type": "SF",
113
+ "wandb_tags": [],
114
+ "with_pbt": false,
115
+ "pbt_mix_policies_in_one_env": true,
116
+ "pbt_period_env_steps": 5000000,
117
+ "pbt_start_mutation": 20000000,
118
+ "pbt_replace_fraction": 0.3,
119
+ "pbt_mutation_rate": 0.15,
120
+ "pbt_replace_reward_gap": 0.1,
121
+ "pbt_replace_reward_gap_absolute": 1e-06,
122
+ "pbt_optimize_gamma": false,
123
+ "pbt_target_objective": "true_objective",
124
+ "pbt_perturb_min": 1.1,
125
+ "pbt_perturb_max": 1.5,
126
+ "env_agents": 2048,
127
+ "clamp_actions": false,
128
+ "clamp_rew_obs": false,
129
+ "command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=humanoid --use_rnn=False --num_epochs=5 --experiment=02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
130
+ "cli_args": {
131
+ "env": "humanoid",
132
+ "experiment": "02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5",
133
+ "train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
134
+ "seed": 2322090,
135
+ "num_epochs": 5,
136
+ "actor_worker_gpus": [
137
+ 0
138
+ ],
139
+ "use_rnn": false,
140
+ "with_wandb": true,
141
+ "wandb_project": "sample_factory"
142
+ },
143
+ "git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
144
+ "git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
145
+ "train_script": "sf_examples.brax.train_brax",
146
+ "wandb_unique_id": "02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5_20230111_190315_359969"
147
+ }
git.diff ADDED
File without changes
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed43c64af48c394d505d5c3631c6c859e773f1b34162d7e6e3158ba1b3256ac
3
+ size 2260300
sf_log.txt ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-01-11 19:03:27,380][454600] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/config.json...
2
+ [2023-01-11 19:03:27,559][454600] Using GPUs [0] for process 0 (actually maps to GPUs [0])
3
+ [2023-01-11 19:03:27,561][454600] Rollout worker 0 uses device cuda:0
4
+ [2023-01-11 19:03:27,562][454600] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
5
+ [2023-01-11 19:03:27,679][454600] Using GPUs [0] for process 0 (actually maps to GPUs [0])
6
+ [2023-01-11 19:03:27,680][454600] InferenceWorker_p0-w0: min num requests: 1
7
+ [2023-01-11 19:03:27,681][454600] Using GPUs [0] for process 0 (actually maps to GPUs [0])
8
+ [2023-01-11 19:03:27,682][454600] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
9
+ [2023-01-11 19:03:27,682][454600] Setting fixed seed 2322090
10
+ [2023-01-11 19:03:27,682][454600] Using GPUs [0] for process 0 (actually maps to GPUs [0])
11
+ [2023-01-11 19:03:27,683][454600] Initializing actor-critic model on device cuda:0
12
+ [2023-01-11 19:03:27,683][454600] RunningMeanStd input shape: (240,)
13
+ [2023-01-11 19:03:27,684][454600] RunningMeanStd input shape: (1,)
14
+ [2023-01-11 19:03:27,768][454600] Created Actor Critic model with architecture:
15
+ [2023-01-11 19:03:27,769][454600] ActorCriticSharedWeights(
16
+ (obs_normalizer): ObservationNormalizer(
17
+ (running_mean_std): RunningMeanStdDictInPlace(
18
+ (running_mean_std): ModuleDict(
19
+ (obs): RunningMeanStdInPlace()
20
+ )
21
+ )
22
+ )
23
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
24
+ (encoder): MultiInputEncoder(
25
+ (encoders): ModuleDict(
26
+ (obs): MlpEncoder(
27
+ (mlp_head): RecursiveScriptModule(
28
+ original_name=Sequential
29
+ (0): RecursiveScriptModule(original_name=Linear)
30
+ (1): RecursiveScriptModule(original_name=ELU)
31
+ (2): RecursiveScriptModule(original_name=Linear)
32
+ (3): RecursiveScriptModule(original_name=ELU)
33
+ (4): RecursiveScriptModule(original_name=Linear)
34
+ (5): RecursiveScriptModule(original_name=ELU)
35
+ )
36
+ )
37
+ )
38
+ )
39
+ (core): ModelCoreIdentity()
40
+ (decoder): MlpDecoder(
41
+ (mlp): Identity()
42
+ )
43
+ (critic_linear): Linear(in_features=128, out_features=1, bias=True)
44
+ (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
45
+ (distribution_linear): Linear(in_features=128, out_features=17, bias=True)
46
+ )
47
+ )
48
+ [2023-01-11 19:03:27,771][454600] Using optimizer <class 'torch.optim.adam.Adam'>
49
+ [2023-01-11 19:03:27,775][454600] No checkpoints found
50
+ [2023-01-11 19:03:27,775][454600] Did not load from checkpoint, starting from scratch!
51
+ [2023-01-11 19:03:27,775][454600] Initialized policy 0 weights for model version 0
52
+ [2023-01-11 19:03:27,776][454600] LearnerWorker_p0 finished initialization!
53
+ [2023-01-11 19:03:27,776][454600] Using GPUs [0] for process 0 (actually maps to GPUs [0])
54
+ [2023-01-11 19:03:27,811][454600] Inference worker 0-0 is ready!
55
+ [2023-01-11 19:03:27,812][454600] All inference workers are ready! Signal rollout workers to start!
56
+ [2023-01-11 19:03:27,812][454600] EnvRunner 0-0 uses policy 0
57
+ [2023-01-11 19:03:29,365][454600] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
58
+ [2023-01-11 19:03:36,383][454600] reset() done, obs.shape=torch.Size([2048, 240])!
59
+ [2023-01-11 19:03:36,392][454600] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
60
+ [2023-01-11 19:03:49,705][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 153.8. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
61
+ [2023-01-11 19:04:02,759][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 155.3. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
62
+ [2023-01-11 19:04:02,763][454600] Heartbeat connected on Batcher_0
63
+ [2023-01-11 19:04:02,763][454600] Heartbeat connected on LearnerWorker_p0
64
+ [2023-01-11 19:04:02,764][454600] Heartbeat connected on InferenceWorker_p0-w0
65
+ [2023-01-11 19:04:02,764][454600] Heartbeat connected on RolloutWorker_w0
66
+ [2023-01-11 19:04:02,764][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 155.3. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
67
+ [2023-01-11 19:04:02,772][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 232.9. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
68
+ [2023-01-11 19:04:02,776][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 232.9. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
69
+ [2023-01-11 19:04:02,782][454600] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 310.4. Samples: 8192. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
70
+ [2023-01-11 19:04:07,026][454600] Fps is (10 sec: 154179.0, 60 sec: 21392.9, 300 sec: 21392.9). Total num frames: 655360. Throughput: 0: 20791.2. Samples: 636928. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
71
+ [2023-01-11 19:04:07,027][454600] Avg episode reward: [(0, '253.034')]
72
+ [2023-01-11 19:04:12,022][454600] Fps is (10 sec: 198596.7, 60 sec: 51502.3, 300 sec: 51502.3). Total num frames: 1835008. Throughput: 0: 37247.2. Samples: 1327104. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
73
+ [2023-01-11 19:04:12,022][454600] Avg episode reward: [(0, '599.874')]
74
+ [2023-01-11 19:04:17,023][454600] Fps is (10 sec: 236000.6, 60 sec: 74195.2, 300 sec: 74195.2). Total num frames: 3014656. Throughput: 0: 67743.4. Samples: 2752512. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
75
+ [2023-01-11 19:04:17,024][454600] Avg episode reward: [(0, '1565.579')]
76
+ [2023-01-11 19:04:22,024][454600] Fps is (10 sec: 235864.8, 60 sec: 91915.0, 300 sec: 91915.0). Total num frames: 4194304. Throughput: 0: 128953.6. Samples: 4169728. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
77
+ [2023-01-11 19:04:22,025][454600] Avg episode reward: [(0, '2174.293')]
78
+ [2023-01-11 19:04:27,022][454600] Fps is (10 sec: 235962.2, 60 sec: 106141.6, 300 sec: 106141.6). Total num frames: 5373952. Throughput: 0: 199881.7. Samples: 4853760. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
79
+ [2023-01-11 19:04:27,023][454600] Avg episode reward: [(0, '3782.828')]
80
+ [2023-01-11 19:04:27,029][454600] Saving new best policy, reward=3782.828!
81
+ [2023-01-11 19:04:32,067][454600] Fps is (10 sec: 234929.6, 60 sec: 117711.8, 300 sec: 117711.8). Total num frames: 6553600. Throughput: 0: 213516.5. Samples: 6260736. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
82
+ [2023-01-11 19:04:32,068][454600] Avg episode reward: [(0, '4961.963')]
83
+ [2023-01-11 19:04:32,078][454600] Saving new best policy, reward=4961.963!
84
+ [2023-01-11 19:04:37,022][454600] Fps is (10 sec: 229381.5, 60 sec: 162051.1, 300 sec: 126467.7). Total num frames: 7667712. Throughput: 0: 223515.5. Samples: 7661568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
85
+ [2023-01-11 19:04:37,022][454600] Avg episode reward: [(0, '6290.788')]
86
+ [2023-01-11 19:04:37,029][454600] Saving new best policy, reward=6290.788!
87
+ [2023-01-11 19:04:42,023][454600] Fps is (10 sec: 230389.8, 60 sec: 225332.4, 300 sec: 134804.7). Total num frames: 8847360. Throughput: 0: 212329.0. Samples: 8339456. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
88
+ [2023-01-11 19:04:42,023][454600] Avg episode reward: [(0, '10632.291')]
89
+ [2023-01-11 19:04:42,034][454600] Saving new best policy, reward=10632.291!
90
+ [2023-01-11 19:04:47,077][454600] Fps is (10 sec: 234644.0, 60 sec: 226279.6, 300 sec: 141855.6). Total num frames: 10027008. Throughput: 0: 219619.4. Samples: 9736192. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
91
+ [2023-01-11 19:04:47,077][454600] Avg episode reward: [(0, '12825.875')]
92
+ [2023-01-11 19:04:47,080][454600] Saving new best policy, reward=12825.875!
93
+ [2023-01-11 19:04:52,023][454600] Fps is (10 sec: 229370.2, 60 sec: 226208.8, 300 sec: 147308.5). Total num frames: 11141120. Throughput: 0: 233124.6. Samples: 11126784. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
94
+ [2023-01-11 19:04:52,024][454600] Avg episode reward: [(0, '15167.174')]
95
+ [2023-01-11 19:04:52,036][454600] Saving new best policy, reward=15167.174!
96
+ [2023-01-11 19:04:57,025][454600] Fps is (10 sec: 230575.5, 60 sec: 227116.0, 300 sec: 152801.4). Total num frames: 12320768. Throughput: 0: 232637.7. Samples: 11796480. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
97
+ [2023-01-11 19:04:57,026][454600] Avg episode reward: [(0, '16256.997')]
98
+ [2023-01-11 19:04:57,032][454600] Saving new best policy, reward=16256.997!
99
+ [2023-01-11 19:05:02,097][454600] Fps is (10 sec: 234190.3, 60 sec: 227602.7, 300 sec: 157521.0). Total num frames: 13500416. Throughput: 0: 231907.0. Samples: 13205504. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
100
+ [2023-01-11 19:05:02,098][454600] Avg episode reward: [(0, '19859.633')]
101
+ [2023-01-11 19:05:02,105][454600] Saving new best policy, reward=19859.633!
102
+ [2023-01-11 19:05:07,023][454600] Fps is (10 sec: 229423.3, 60 sec: 232668.0, 300 sec: 161254.0). Total num frames: 14614528. Throughput: 0: 231888.8. Samples: 14604288. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
103
+ [2023-01-11 19:05:07,023][454600] Avg episode reward: [(0, '20474.635')]
104
+ [2023-01-11 19:05:07,029][454600] Saving new best policy, reward=20474.635!
105
+ [2023-01-11 19:05:12,023][454600] Fps is (10 sec: 231103.2, 60 sec: 232648.6, 300 sec: 165157.9). Total num frames: 15794176. Throughput: 0: 231465.9. Samples: 15269888. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
106
+ [2023-01-11 19:05:12,023][454600] Avg episode reward: [(0, '22723.672')]
107
+ [2023-01-11 19:05:12,030][454600] Saving new best policy, reward=22723.672!
108
+ [2023-01-11 19:05:17,021][454600] Fps is (10 sec: 229405.8, 60 sec: 231569.1, 300 sec: 168025.6). Total num frames: 16908288. Throughput: 0: 231431.7. Samples: 16664576. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
109
+ [2023-01-11 19:05:17,022][454600] Avg episode reward: [(0, '23173.908')]
110
+ [2023-01-11 19:05:17,028][454600] Saving new best policy, reward=23173.908!
111
+ [2023-01-11 19:05:22,022][454600] Fps is (10 sec: 229391.6, 60 sec: 231569.5, 300 sec: 171238.5). Total num frames: 18087936. Throughput: 0: 230922.0. Samples: 18053120. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
112
+ [2023-01-11 19:05:22,023][454600] Avg episode reward: [(0, '23684.512')]
113
+ [2023-01-11 19:05:22,034][454600] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002760_18087936.pth...
114
+ [2023-01-11 19:05:22,076][454600] Saving new best policy, reward=23684.512!
115
+ [2023-01-11 19:05:27,022][454600] Fps is (10 sec: 229354.9, 60 sec: 230467.9, 300 sec: 173569.8). Total num frames: 19202048. Throughput: 0: 231064.2. Samples: 18737152. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
116
+ [2023-01-11 19:05:27,023][454600] Avg episode reward: [(0, '24190.295')]
117
+ [2023-01-11 19:05:27,029][454600] Saving new best policy, reward=24190.295!
118
+ [2023-01-11 19:05:32,025][454600] Fps is (10 sec: 229313.7, 60 sec: 230630.4, 300 sec: 176262.3). Total num frames: 20381696. Throughput: 0: 231007.3. Samples: 20119552. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
119
+ [2023-01-11 19:05:32,025][454600] Avg episode reward: [(0, '24498.295')]
120
+ [2023-01-11 19:05:32,038][454600] Saving new best policy, reward=24498.295!
121
+ [2023-01-11 19:05:37,067][454600] Fps is (10 sec: 234886.9, 60 sec: 231388.1, 300 sec: 178673.5). Total num frames: 21561344. Throughput: 0: 230201.2. Samples: 21495808. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
122
+ [2023-01-11 19:05:37,067][454600] Avg episode reward: [(0, '25790.773')]
123
+ [2023-01-11 19:05:37,069][454600] Saving new best policy, reward=25790.773!
124
+ [2023-01-11 19:05:42,024][454600] Fps is (10 sec: 229405.2, 60 sec: 230466.2, 300 sec: 180491.8). Total num frames: 22675456. Throughput: 0: 231566.0. Samples: 22216704. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
125
+ [2023-01-11 19:05:42,024][454600] Avg episode reward: [(0, '25062.875')]
126
+ [2023-01-11 19:05:47,024][454600] Fps is (10 sec: 230367.0, 60 sec: 230672.4, 300 sec: 182613.7). Total num frames: 23855104. Throughput: 0: 231212.4. Samples: 23592960. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
127
+ [2023-01-11 19:05:47,024][454600] Avg episode reward: [(0, '25982.887')]
128
+ [2023-01-11 19:05:47,030][454600] Saving new best policy, reward=25982.887!
129
+ [2023-01-11 19:05:52,021][454600] Fps is (10 sec: 229444.6, 60 sec: 230478.6, 300 sec: 184100.0). Total num frames: 24969216. Throughput: 0: 230342.0. Samples: 24969216. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
130
+ [2023-01-11 19:05:52,021][454600] Avg episode reward: [(0, '25676.555')]
131
+ [2023-01-11 19:05:57,021][454600] Fps is (10 sec: 229438.2, 60 sec: 230482.8, 300 sec: 185942.5). Total num frames: 26148864. Throughput: 0: 230887.8. Samples: 25659392. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
132
+ [2023-01-11 19:05:57,022][454600] Avg episode reward: [(0, '24945.438')]
133
+ [2023-01-11 19:06:02,021][454600] Fps is (10 sec: 235920.5, 60 sec: 230762.8, 300 sec: 187658.6). Total num frames: 27328512. Throughput: 0: 231152.5. Samples: 27066368. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
134
+ [2023-01-11 19:06:02,022][454600] Avg episode reward: [(0, '25933.258')]
135
+ [2023-01-11 19:06:07,021][454600] Fps is (10 sec: 229375.9, 60 sec: 230474.8, 300 sec: 188825.9). Total num frames: 28442624. Throughput: 0: 230884.3. Samples: 28442624. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
136
+ [2023-01-11 19:06:07,021][454600] Avg episode reward: [(0, '26500.414')]
137
+ [2023-01-11 19:06:07,028][454600] Saving new best policy, reward=26500.414!
138
+ [2023-01-11 19:06:12,054][454600] Fps is (10 sec: 228615.9, 60 sec: 230347.7, 300 sec: 190298.5). Total num frames: 29622272. Throughput: 0: 230213.4. Samples: 29104128. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
139
+ [2023-01-11 19:06:12,055][454600] Avg episode reward: [(0, '27414.232')]
140
+ [2023-01-11 19:06:12,063][454600] Saving new best policy, reward=27414.232!
141
+ [2023-01-11 19:06:17,022][454600] Fps is (10 sec: 229354.4, 60 sec: 230466.2, 300 sec: 191349.3). Total num frames: 30736384. Throughput: 0: 230119.7. Samples: 30474240. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
142
+ [2023-01-11 19:06:17,022][454600] Avg episode reward: [(0, '28162.598')]
143
+ [2023-01-11 19:06:17,028][454600] Saving new best policy, reward=28162.598!
144
+ [2023-01-11 19:06:22,022][454600] Fps is (10 sec: 223541.1, 60 sec: 229376.2, 300 sec: 192299.1). Total num frames: 31850496. Throughput: 0: 230332.1. Samples: 31850496. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
145
+ [2023-01-11 19:06:22,023][454600] Avg episode reward: [(0, '28589.760')]
146
+ [2023-01-11 19:06:22,034][454600] Saving new best policy, reward=28589.760!
147
+ [2023-01-11 19:06:27,032][454600] Fps is (10 sec: 222594.8, 60 sec: 229338.4, 300 sec: 193182.2). Total num frames: 32964608. Throughput: 0: 227239.8. Samples: 32444416. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
148
+ [2023-01-11 19:06:27,033][454600] Avg episode reward: [(0, '28386.152')]
149
+ [2023-01-11 19:06:32,024][454600] Fps is (10 sec: 222781.7, 60 sec: 228287.3, 300 sec: 194035.0). Total num frames: 34078720. Throughput: 0: 227690.4. Samples: 33839104. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
150
+ [2023-01-11 19:06:32,024][454600] Avg episode reward: [(0, '29366.443')]
151
+ [2023-01-11 19:06:32,029][454600] Saving new best policy, reward=29366.443!
152
+ [2023-01-11 19:06:37,021][454600] Fps is (10 sec: 229635.4, 60 sec: 228458.3, 300 sec: 195198.1). Total num frames: 35258368. Throughput: 0: 227782.3. Samples: 35219456. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
153
+ [2023-01-11 19:06:37,021][454600] Avg episode reward: [(0, '29529.770')]
154
+ [2023-01-11 19:06:37,028][454600] Saving new best policy, reward=29529.770!
155
+ [2023-01-11 19:06:42,025][454600] Fps is (10 sec: 229348.6, 60 sec: 228278.0, 300 sec: 195937.5). Total num frames: 36372480. Throughput: 0: 227579.7. Samples: 35901440. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
156
+ [2023-01-11 19:06:42,026][454600] Avg episode reward: [(0, '28905.598')]
157
+ [2023-01-11 19:06:47,083][454600] Fps is (10 sec: 227954.9, 60 sec: 228057.6, 300 sec: 196926.6). Total num frames: 37552128. Throughput: 0: 226423.9. Samples: 37269504. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
158
+ [2023-01-11 19:06:47,084][454600] Avg episode reward: [(0, '30232.330')]
159
+ [2023-01-11 19:06:47,086][454600] Saving new best policy, reward=30232.330!
160
+ [2023-01-11 19:06:52,023][454600] Fps is (10 sec: 222876.0, 60 sec: 227183.5, 300 sec: 197314.2). Total num frames: 38600704. Throughput: 0: 225726.0. Samples: 38600704. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
161
+ [2023-01-11 19:06:52,023][454600] Avg episode reward: [(0, '30448.795')]
162
+ [2023-01-11 19:06:52,123][454600] Saving new best policy, reward=30448.795!
163
+ [2023-01-11 19:06:57,075][454600] Fps is (10 sec: 223000.7, 60 sec: 226986.4, 300 sec: 198224.8). Total num frames: 39780352. Throughput: 0: 225585.0. Samples: 39260160. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
164
+ [2023-01-11 19:06:57,076][454600] Avg episode reward: [(0, '31469.381')]
165
+ [2023-01-11 19:06:57,078][454600] Saving new best policy, reward=31469.381!
166
+ [2023-01-11 19:07:02,025][454600] Fps is (10 sec: 229324.8, 60 sec: 226084.3, 300 sec: 198871.3). Total num frames: 40894464. Throughput: 0: 225719.5. Samples: 40632320. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
167
+ [2023-01-11 19:07:02,025][454600] Avg episode reward: [(0, '30854.137')]
168
+ [2023-01-11 19:07:07,024][454600] Fps is (10 sec: 230546.7, 60 sec: 227178.5, 300 sec: 199751.5). Total num frames: 42074112. Throughput: 0: 226315.5. Samples: 42035200. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
169
+ [2023-01-11 19:07:07,025][454600] Avg episode reward: [(0, '29817.902')]
170
+ [2023-01-11 19:07:12,104][454600] Fps is (10 sec: 234082.2, 60 sec: 227003.7, 300 sec: 200516.5). Total num frames: 43253760. Throughput: 0: 228192.7. Samples: 42729472. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
171
+ [2023-01-11 19:07:12,104][454600] Avg episode reward: [(0, '31047.578')]
172
+ [2023-01-11 19:07:17,024][454600] Fps is (10 sec: 229380.1, 60 sec: 227182.7, 300 sec: 201094.4). Total num frames: 44367872. Throughput: 0: 228146.1. Samples: 44105728. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
173
+ [2023-01-11 19:07:17,025][454600] Avg episode reward: [(0, '31452.555')]
174
+ [2023-01-11 19:07:22,021][454600] Fps is (10 sec: 224671.6, 60 sec: 227193.5, 300 sec: 201578.2). Total num frames: 45481984. Throughput: 0: 227961.1. Samples: 45477888. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
175
+ [2023-01-11 19:07:22,022][454600] Avg episode reward: [(0, '31025.244')]
176
+ [2023-01-11 19:07:22,033][454600] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006940_45481984.pth...
177
+ [2023-01-11 19:07:27,030][454600] Fps is (10 sec: 222694.8, 60 sec: 227199.7, 300 sec: 202031.5). Total num frames: 46596096. Throughput: 0: 226166.3. Samples: 46080000. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
178
+ [2023-01-11 19:07:27,030][454600] Avg episode reward: [(0, '28707.133')]
179
+ [2023-01-11 19:07:32,024][454600] Fps is (10 sec: 222775.5, 60 sec: 227192.4, 300 sec: 202478.0). Total num frames: 47710208. Throughput: 0: 226671.7. Samples: 47456256. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
180
+ [2023-01-11 19:07:32,024][454600] Avg episode reward: [(0, '30637.068')]
181
+ [2023-01-11 19:07:37,025][454600] Fps is (10 sec: 229488.7, 60 sec: 227175.5, 300 sec: 203172.0). Total num frames: 48889856. Throughput: 0: 227953.7. Samples: 48859136. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
182
+ [2023-01-11 19:07:37,026][454600] Avg episode reward: [(0, '31947.164')]
183
+ [2023-01-11 19:07:37,028][454600] Saving new best policy, reward=31947.164!
184
+ [2023-01-11 19:07:42,024][454600] Fps is (10 sec: 222807.4, 60 sec: 226102.1, 300 sec: 203305.7). Total num frames: 49938432. Throughput: 0: 226400.1. Samples: 49436672. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
185
+ [2023-01-11 19:07:42,025][454600] Avg episode reward: [(0, '30685.115')]
186
+ [2023-01-11 19:07:47,024][454600] Fps is (10 sec: 216277.6, 60 sec: 225226.7, 300 sec: 203694.9). Total num frames: 51052544. Throughput: 0: 226465.2. Samples: 50823168. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
187
+ [2023-01-11 19:07:47,025][454600] Avg episode reward: [(0, '32077.797')]
188
+ [2023-01-11 19:07:47,030][454600] Saving new best policy, reward=32077.797!
189
+ [2023-01-11 19:07:52,048][454600] Fps is (10 sec: 222283.4, 60 sec: 226001.8, 300 sec: 204049.8). Total num frames: 52166656. Throughput: 0: 223703.1. Samples: 52107264. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
190
+ [2023-01-11 19:07:52,049][454600] Avg episode reward: [(0, '31880.949')]
191
+ [2023-01-11 19:07:57,023][454600] Fps is (10 sec: 222862.4, 60 sec: 225203.4, 300 sec: 204430.1). Total num frames: 53280768. Throughput: 0: 224501.3. Samples: 52813824. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
192
+ [2023-01-11 19:07:57,023][454600] Avg episode reward: [(0, '32334.512')]
193
+ [2023-01-11 19:07:57,029][454600] Saving new best policy, reward=32334.512!
194
+ [2023-01-11 19:08:02,020][454600] Fps is (10 sec: 216877.2, 60 sec: 223931.1, 300 sec: 204531.4). Total num frames: 54329344. Throughput: 0: 222112.1. Samples: 54099968. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
195
+ [2023-01-11 19:08:02,021][454600] Avg episode reward: [(0, '31227.248')]
196
+ [2023-01-11 19:08:07,021][454600] Fps is (10 sec: 222869.4, 60 sec: 223928.3, 300 sec: 205111.4). Total num frames: 55508992. Throughput: 0: 222189.7. Samples: 55476224. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
197
+ [2023-01-11 19:08:07,021][454600] Avg episode reward: [(0, '32576.342')]
198
+ [2023-01-11 19:08:07,028][454600] Saving new best policy, reward=32576.342!
199
+ [2023-01-11 19:08:12,023][454600] Fps is (10 sec: 229315.5, 60 sec: 223122.5, 300 sec: 205430.8). Total num frames: 56623104. Throughput: 0: 223948.1. Samples: 56156160. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
200
+ [2023-01-11 19:08:12,024][454600] Avg episode reward: [(0, '32309.672')]
201
+ [2023-01-11 19:08:17,070][454600] Fps is (10 sec: 228250.9, 60 sec: 223743.9, 300 sec: 205939.8). Total num frames: 57802752. Throughput: 0: 223457.3. Samples: 57522176. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
202
+ [2023-01-11 19:08:17,070][454600] Avg episode reward: [(0, '32164.748')]
203
+ [2023-01-11 19:08:22,022][454600] Fps is (10 sec: 229412.0, 60 sec: 223914.5, 300 sec: 206270.2). Total num frames: 58916864. Throughput: 0: 223294.2. Samples: 58906624. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
204
+ [2023-01-11 19:08:22,022][454600] Avg episode reward: [(0, '30573.240')]
205
+ [2023-01-11 19:08:27,087][454600] Fps is (10 sec: 228991.7, 60 sec: 224793.9, 300 sec: 206734.2). Total num frames: 60096512. Throughput: 0: 224922.6. Samples: 59572224. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
206
+ [2023-01-11 19:08:27,087][454600] Avg episode reward: [(0, '31910.117')]
207
+ [2023-01-11 19:08:32,023][454600] Fps is (10 sec: 229352.0, 60 sec: 225010.7, 300 sec: 216814.9). Total num frames: 61210624. Throughput: 0: 225016.5. Samples: 60948480. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
208
+ [2023-01-11 19:08:32,023][454600] Avg episode reward: [(0, '33219.871')]
209
+ [2023-01-11 19:08:32,034][454600] Saving new best policy, reward=33219.871!
210
+ [2023-01-11 19:08:37,100][454600] Fps is (10 sec: 229064.5, 60 sec: 224724.7, 300 sec: 227418.8). Total num frames: 62390272. Throughput: 0: 226794.1. Samples: 62324736. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
211
+ [2023-01-11 19:08:37,101][454600] Avg episode reward: [(0, '32330.926')]
212
+ [2023-01-11 19:08:42,021][454600] Fps is (10 sec: 229421.0, 60 sec: 226112.9, 300 sec: 227405.2). Total num frames: 63504384. Throughput: 0: 226792.4. Samples: 63019008. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
213
+ [2023-01-11 19:08:42,021][454600] Avg episode reward: [(0, '32682.035')]
214
+ [2023-01-11 19:08:47,072][454600] Fps is (10 sec: 230036.3, 60 sec: 227013.4, 300 sec: 227520.5). Total num frames: 64684032. Throughput: 0: 228706.7. Samples: 64403456. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
215
+ [2023-01-11 19:08:47,072][454600] Avg episode reward: [(0, '33019.711')]
216
+ [2023-01-11 19:08:52,024][454600] Fps is (10 sec: 229291.9, 60 sec: 227283.2, 300 sec: 227479.7). Total num frames: 65798144. Throughput: 0: 229311.7. Samples: 65796096. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
217
+ [2023-01-11 19:08:52,025][454600] Avg episode reward: [(0, '33007.113')]
218
+ [2023-01-11 19:08:57,103][454600] Fps is (10 sec: 228646.2, 60 sec: 227976.8, 300 sec: 227566.6). Total num frames: 66977792. Throughput: 0: 228421.9. Samples: 66453504. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
219
+ [2023-01-11 19:08:57,104][454600] Avg episode reward: [(0, '33615.703')]
220
+ [2023-01-11 19:08:57,107][454600] Saving new best policy, reward=33615.703!
221
+ [2023-01-11 19:09:02,026][454600] Fps is (10 sec: 229327.9, 60 sec: 229353.3, 300 sec: 228598.5). Total num frames: 68091904. Throughput: 0: 229279.0. Samples: 67829760. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
222
+ [2023-01-11 19:09:02,027][454600] Avg episode reward: [(0, '33588.613')]
223
+ [2023-01-11 19:09:07,024][454600] Fps is (10 sec: 224609.2, 60 sec: 228271.1, 300 sec: 228374.6). Total num frames: 69206016. Throughput: 0: 228863.2. Samples: 69206016. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
224
+ [2023-01-11 19:09:07,024][454600] Avg episode reward: [(0, '33701.699')]
225
+ [2023-01-11 19:09:07,031][454600] Saving new best policy, reward=33701.699!
226
+ [2023-01-11 19:09:12,079][454600] Fps is (10 sec: 228179.7, 60 sec: 229163.2, 300 sec: 228333.4). Total num frames: 70385664. Throughput: 0: 228778.8. Samples: 69865472. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
227
+ [2023-01-11 19:09:12,079][454600] Avg episode reward: [(0, '34000.129')]
228
+ [2023-01-11 19:09:12,086][454600] Saving new best policy, reward=34000.129!
229
+ [2023-01-11 19:09:17,023][454600] Fps is (10 sec: 229400.4, 60 sec: 228462.8, 300 sec: 228155.4). Total num frames: 71499776. Throughput: 0: 228646.4. Samples: 71237632. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
230
+ [2023-01-11 19:09:17,023][454600] Avg episode reward: [(0, '34372.684')]
231
+ [2023-01-11 19:09:17,030][454600] Saving new best policy, reward=34372.684!
232
+ [2023-01-11 19:09:22,024][454600] Fps is (10 sec: 224056.5, 60 sec: 228275.4, 300 sec: 227930.7). Total num frames: 72613888. Throughput: 0: 228855.0. Samples: 72605696. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
233
+ [2023-01-11 19:09:22,024][454600] Avg episode reward: [(0, '33211.176')]
234
+ [2023-01-11 19:09:22,032][454600] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000011080_72613888.pth...
235
+ [2023-01-11 19:09:22,100][454600] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000002760_18087936.pth
236
+ [2023-01-11 19:09:27,024][454600] Fps is (10 sec: 222802.5, 60 sec: 227429.9, 300 sec: 227743.2). Total num frames: 73728000. Throughput: 0: 227767.3. Samples: 73269248. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
237
+ [2023-01-11 19:09:27,024][454600] Avg episode reward: [(0, '34268.336')]
238
+ [2023-01-11 19:09:32,022][454600] Fps is (10 sec: 229424.9, 60 sec: 228287.4, 300 sec: 227932.2). Total num frames: 74907648. Throughput: 0: 227854.0. Samples: 74645504. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
239
+ [2023-01-11 19:09:32,022][454600] Avg episode reward: [(0, '33945.703')]
240
+ [2023-01-11 19:09:37,023][454600] Fps is (10 sec: 229387.8, 60 sec: 227483.5, 300 sec: 227709.6). Total num frames: 76021760. Throughput: 0: 227242.3. Samples: 76021760. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
241
+ [2023-01-11 19:09:37,024][454600] Avg episode reward: [(0, '32851.797')]
242
+ [2023-01-11 19:09:42,042][454600] Fps is (10 sec: 228909.9, 60 sec: 228202.5, 300 sec: 227736.6). Total num frames: 77201408. Throughput: 0: 227775.9. Samples: 76689408. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
243
+ [2023-01-11 19:09:42,043][454600] Avg episode reward: [(0, '33755.750')]
244
+ [2023-01-11 19:09:47,023][454600] Fps is (10 sec: 229373.2, 60 sec: 227374.1, 300 sec: 227709.7). Total num frames: 78315520. Throughput: 0: 227434.3. Samples: 78063616. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
245
+ [2023-01-11 19:09:47,024][454600] Avg episode reward: [(0, '34199.566')]
246
+ [2023-01-11 19:09:52,048][454600] Fps is (10 sec: 229235.7, 60 sec: 228193.2, 300 sec: 227691.7). Total num frames: 79495168. Throughput: 0: 227296.9. Samples: 79439872. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
247
+ [2023-01-11 19:09:52,049][454600] Avg episode reward: [(0, '34146.453')]
248
+ [2023-01-11 19:09:57,024][454600] Fps is (10 sec: 229366.0, 60 sec: 227493.6, 300 sec: 227544.5). Total num frames: 80609280. Throughput: 0: 228289.8. Samples: 80125952. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
249
+ [2023-01-11 19:09:57,024][454600] Avg episode reward: [(0, '33985.309')]
250
+ [2023-01-11 19:10:02,023][454600] Fps is (10 sec: 223382.2, 60 sec: 227204.2, 300 sec: 227487.3). Total num frames: 81723392. Throughput: 0: 227645.7. Samples: 81481728. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
251
+ [2023-01-11 19:10:02,024][454600] Avg episode reward: [(0, '34004.363')]
252
+ [2023-01-11 19:10:07,025][454600] Fps is (10 sec: 229351.9, 60 sec: 228280.2, 300 sec: 227486.1). Total num frames: 82903040. Throughput: 0: 227914.0. Samples: 82862080. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
253
+ [2023-01-11 19:10:07,026][454600] Avg episode reward: [(0, '34364.102')]
254
+ [2023-01-11 19:10:12,025][454600] Fps is (10 sec: 229339.5, 60 sec: 227396.9, 300 sec: 227485.1). Total num frames: 84017152. Throughput: 0: 228643.4. Samples: 83558400. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
255
+ [2023-01-11 19:10:12,025][454600] Avg episode reward: [(0, '34041.414')]
256
+ [2023-01-11 19:10:17,024][454600] Fps is (10 sec: 229386.7, 60 sec: 228277.9, 300 sec: 227485.9). Total num frames: 85196800. Throughput: 0: 228633.7. Samples: 84934656. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
257
+ [2023-01-11 19:10:17,025][454600] Avg episode reward: [(0, '33549.828')]
258
+ [2023-01-11 19:10:18,676][454600] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000007
259
+ [2023-01-11 19:10:22,109][454600] Fps is (10 sec: 233959.5, 60 sec: 229051.2, 300 sec: 227642.9). Total num frames: 86376448. Throughput: 0: 228213.7. Samples: 86310912. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
260
+ [2023-01-11 19:10:22,109][454600] Avg episode reward: [(0, '35236.949')]
261
+ [2023-01-11 19:10:22,116][454600] Saving new best policy, reward=35236.949!
262
+ [2023-01-11 19:10:27,025][454600] Fps is (10 sec: 229361.7, 60 sec: 229371.2, 300 sec: 227487.5). Total num frames: 87490560. Throughput: 0: 228734.1. Samples: 86978560. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
263
+ [2023-01-11 19:10:27,026][454600] Avg episode reward: [(0, '32445.162')]
264
+ [2023-01-11 19:10:32,023][454600] Fps is (10 sec: 224758.3, 60 sec: 228279.6, 300 sec: 227299.3). Total num frames: 88604672. Throughput: 0: 228787.7. Samples: 88358912. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
265
+ [2023-01-11 19:10:32,023][454600] Avg episode reward: [(0, '34647.660')]
266
+ [2023-01-11 19:10:37,031][454600] Fps is (10 sec: 229240.0, 60 sec: 229346.6, 300 sec: 227482.0). Total num frames: 89784320. Throughput: 0: 228735.0. Samples: 89729024. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
267
+ [2023-01-11 19:10:37,032][454600] Avg episode reward: [(0, '33732.051')]
268
+ [2023-01-11 19:10:42,022][454600] Fps is (10 sec: 229395.1, 60 sec: 228360.3, 300 sec: 227266.8). Total num frames: 90898432. Throughput: 0: 229067.3. Samples: 90433536. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
269
+ [2023-01-11 19:10:42,022][454600] Avg episode reward: [(0, '34734.863')]
270
+ [2023-01-11 19:10:47,051][454600] Fps is (10 sec: 228906.3, 60 sec: 229268.6, 300 sec: 227463.8). Total num frames: 92078080. Throughput: 0: 229094.7. Samples: 91797504. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
271
+ [2023-01-11 19:10:47,052][454600] Avg episode reward: [(0, '35127.297')]
272
+ [2023-01-11 19:10:52,104][454600] Fps is (10 sec: 234005.2, 60 sec: 229162.1, 300 sec: 227423.5). Total num frames: 93257728. Throughput: 0: 229154.5. Samples: 93192192. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
273
+ [2023-01-11 19:10:52,105][454600] Avg episode reward: [(0, '34548.809')]
274
+ [2023-01-11 19:10:57,024][454600] Fps is (10 sec: 230008.8, 60 sec: 229375.4, 300 sec: 227263.2). Total num frames: 94371840. Throughput: 0: 229334.0. Samples: 93878272. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
275
+ [2023-01-11 19:10:57,024][454600] Avg episode reward: [(0, '34352.727')]
276
+ [2023-01-11 19:11:02,022][454600] Fps is (10 sec: 224670.1, 60 sec: 229380.5, 300 sec: 227264.7). Total num frames: 95485952. Throughput: 0: 229070.3. Samples: 95242240. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
277
+ [2023-01-11 19:11:02,022][454600] Avg episode reward: [(0, '33813.215')]
278
+ [2023-01-11 19:11:07,023][454600] Fps is (10 sec: 229401.0, 60 sec: 229383.6, 300 sec: 227289.7). Total num frames: 96665600. Throughput: 0: 229587.1. Samples: 96622592. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
279
+ [2023-01-11 19:11:07,023][454600] Avg episode reward: [(0, '34654.340')]
280
+ [2023-01-11 19:11:12,023][454600] Fps is (10 sec: 229344.1, 60 sec: 229381.2, 300 sec: 227264.4). Total num frames: 97779712. Throughput: 0: 229840.1. Samples: 97320960. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
281
+ [2023-01-11 19:11:12,024][454600] Avg episode reward: [(0, '35312.879')]
282
+ [2023-01-11 19:11:12,035][454600] Saving new best policy, reward=35312.879!
283
+ [2023-01-11 19:11:17,024][454600] Fps is (10 sec: 229354.7, 60 sec: 229378.3, 300 sec: 227486.3). Total num frames: 98959360. Throughput: 0: 229734.5. Samples: 98697216. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
284
+ [2023-01-11 19:11:17,024][454600] Avg episode reward: [(0, '34817.219')]
285
+ [2023-01-11 19:11:21,772][454600] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015264_100073472.pth...
286
+ [2023-01-11 19:11:21,814][454600] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000006940_45481984.pth
287
+ [2023-01-11 19:11:21,818][454600] Stopping InferenceWorker_p0-w0...
288
+ [2023-01-11 19:11:21,818][454600] Stopping Batcher_0...
289
+ [2023-01-11 19:11:21,819][454600] Stopping RolloutWorker_w0...
290
+ [2023-01-11 19:11:21,819][454600] Component InferenceWorker_p0-w0 stopped!
291
+ [2023-01-11 19:11:21,819][454600] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/02_v083_brax_basic_benchmark_see_2322090_env_humanoid_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015264_100073472.pth...
292
+ [2023-01-11 19:11:21,863][454600] Stopping LearnerWorker_p0...
293
+ [2023-01-11 19:11:21,864][454600] Component Batcher_0 stopped!
294
+ [2023-01-11 19:11:21,864][454600] Component RolloutWorker_w0 stopped!
295
+ [2023-01-11 19:11:21,864][454600] Component LearnerWorker_p0 stopped!
296
+ [2023-01-11 19:11:21,864][454600] Batcher 0 profile tree view:
297
+ batching: 0.3849, releasing_batches: 0.0669
298
+ [2023-01-11 19:11:21,865][454600] InferenceWorker_p0-w0 profile tree view:
299
+ update_model: 0.4926
300
+ one_step: 0.0013
301
+ handle_policy_step: 65.3049
302
+ deserialize: 0.5337, stack: 0.0729, obs_to_device_normalize: 11.5510, forward: 41.6508, prepare_outputs: 7.0596, send_messages: 0.9048
303
+ [2023-01-11 19:11:21,865][454600] Learner 0 profile tree view:
304
+ misc: 0.0056, prepare_batch: 5.9925
305
+ train: 138.8240
306
+ epoch_init: 0.0650, minibatch_init: 1.0265, losses_postprocess: 48.8531, kl_divergence: 5.8442, after_optimizer: 0.4433
307
+ calculate_losses: 18.3675
308
+ losses_init: 0.0348, forward_head: 3.0278, bptt_initial: 0.1284, bptt: 0.1504, tail: 8.9931, advantages_returns: 1.1807, losses: 3.5237
309
+ update: 62.2542
310
+ clip: 9.0029
311
+ [2023-01-11 19:11:21,865][454600] RolloutWorker_w0 profile tree view:
312
+ wait_for_trajectories: 0.0862, enqueue_policy_requests: 6.0452, process_policy_outputs: 3.8395, env_step: 203.9272, finalize_trajectories: 0.1686, complete_rollouts: 0.0726
313
+ post_env_step: 21.0621
314
+ process_env_step: 8.4224
315
+ [2023-01-11 19:11:21,865][454600] Loop Runner_EvtLoop terminating...
316
+ [2023-01-11 19:11:21,866][454600] Runner profile tree view:
317
+ main_loop: 474.1841
318
+ [2023-01-11 19:11:21,866][454600] Collected {0: 100073472}, FPS: 211043.5