Upload . with huggingface_hub
Browse files- .gitattributes +1 -0
- .summary/0/events.out.tfevents.1673494048.brain1.usc.edu +3 -0
- README.md +56 -0
- checkpoint_p0/best_000012096_79298560_reward_6510.601.pth +3 -0
- checkpoint_p0/checkpoint_000012096_79298560.pth +3 -0
- checkpoint_p0/checkpoint_000015266_100073472.pth +3 -0
- config.json +147 -0
- git.diff +0 -0
- replay.mp4 +3 -0
- sf_log.txt +225 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
replay.mp4 filter=lfs diff=lfs merge=lfs -text
|
.summary/0/events.out.tfevents.1673494048.brain1.usc.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bca7a1c73a35092c6cdbb661b84cb9147c6377b5d2904872cecd045640458cc
|
3 |
+
size 82406
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: walker2d
|
15 |
+
type: walker2d
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 5459.17 +/- 2198.74
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **walker2d** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_walker2d
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m sf_examples.brax.enjoy_brax --algo=APPO --env=walker2d --train_dir=./train_dir --experiment=sample_factory_brax_walker2d
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m sf_examples.brax.train_brax --algo=APPO --env=walker2d --train_dir=./train_dir --experiment=sample_factory_brax_walker2d --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/best_000012096_79298560_reward_6510.601.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58d7763d594c03f5451a12411ea120e61ea3780548750d961ef2c943b1af80df
|
3 |
+
size 570871
|
checkpoint_p0/checkpoint_000012096_79298560.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cb90a861093886fbbb0fadc5bcc93d7b2a779d5e2ad9030aa633b45718782f0
|
3 |
+
size 571183
|
checkpoint_p0/checkpoint_000015266_100073472.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b60a75e3590a4533206876a72a1bc2b5d318be1d12c00986a60f815a0400fd69
|
3 |
+
size 571183
|
config.json
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "walker2d",
|
5 |
+
"experiment": "06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5",
|
6 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": 2322090,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": false,
|
12 |
+
"serial_mode": true,
|
13 |
+
"batched_sampling": true,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 1,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 1,
|
19 |
+
"num_envs_per_worker": 1,
|
20 |
+
"batch_size": 32768,
|
21 |
+
"num_batches_per_epoch": 2,
|
22 |
+
"num_epochs": 5,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 1,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 0.01,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": true,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.0,
|
32 |
+
"value_loss_coeff": 2.0,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "entropy",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.2,
|
37 |
+
"ppo_clip_value": 1.0,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 1.0,
|
46 |
+
"learning_rate": 0.0003,
|
47 |
+
"lr_schedule": "kl_adaptive_epoch",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.002,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 1.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [
|
58 |
+
0
|
59 |
+
],
|
60 |
+
"set_workers_cpu_affinity": true,
|
61 |
+
"force_envs_single_thread": false,
|
62 |
+
"default_niceness": 0,
|
63 |
+
"log_to_file": true,
|
64 |
+
"experiment_summaries_interval": 10,
|
65 |
+
"flush_summaries_interval": 30,
|
66 |
+
"stats_avg": 100,
|
67 |
+
"summaries_use_frameskip": true,
|
68 |
+
"heartbeat_interval": 20,
|
69 |
+
"heartbeat_reporting_interval": 180,
|
70 |
+
"train_for_env_steps": 100000000,
|
71 |
+
"train_for_seconds": 10000000000,
|
72 |
+
"save_every_sec": 120,
|
73 |
+
"keep_checkpoints": 2,
|
74 |
+
"load_checkpoint_kind": "latest",
|
75 |
+
"save_milestones_sec": -1,
|
76 |
+
"save_best_every_sec": 5,
|
77 |
+
"save_best_metric": "reward",
|
78 |
+
"save_best_after": 5000000,
|
79 |
+
"benchmark": false,
|
80 |
+
"encoder_mlp_layers": [
|
81 |
+
256,
|
82 |
+
128,
|
83 |
+
64
|
84 |
+
],
|
85 |
+
"encoder_conv_architecture": "convnet_simple",
|
86 |
+
"encoder_conv_mlp_layers": [
|
87 |
+
512
|
88 |
+
],
|
89 |
+
"use_rnn": false,
|
90 |
+
"rnn_size": 512,
|
91 |
+
"rnn_type": "gru",
|
92 |
+
"rnn_num_layers": 1,
|
93 |
+
"decoder_mlp_layers": [],
|
94 |
+
"nonlinearity": "elu",
|
95 |
+
"policy_initialization": "torch_default",
|
96 |
+
"policy_init_gain": 1.0,
|
97 |
+
"actor_critic_share_weights": true,
|
98 |
+
"adaptive_stddev": false,
|
99 |
+
"continuous_tanh_scale": 0.0,
|
100 |
+
"initial_stddev": 1.0,
|
101 |
+
"use_env_info_cache": false,
|
102 |
+
"env_gpu_actions": true,
|
103 |
+
"env_gpu_observations": true,
|
104 |
+
"env_frameskip": 1,
|
105 |
+
"env_framestack": 1,
|
106 |
+
"pixel_format": "CHW",
|
107 |
+
"use_record_episode_statistics": false,
|
108 |
+
"with_wandb": true,
|
109 |
+
"wandb_user": null,
|
110 |
+
"wandb_project": "sample_factory",
|
111 |
+
"wandb_group": null,
|
112 |
+
"wandb_job_type": "SF",
|
113 |
+
"wandb_tags": [],
|
114 |
+
"with_pbt": false,
|
115 |
+
"pbt_mix_policies_in_one_env": true,
|
116 |
+
"pbt_period_env_steps": 5000000,
|
117 |
+
"pbt_start_mutation": 20000000,
|
118 |
+
"pbt_replace_fraction": 0.3,
|
119 |
+
"pbt_mutation_rate": 0.15,
|
120 |
+
"pbt_replace_reward_gap": 0.1,
|
121 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
122 |
+
"pbt_optimize_gamma": false,
|
123 |
+
"pbt_target_objective": "true_objective",
|
124 |
+
"pbt_perturb_min": 1.1,
|
125 |
+
"pbt_perturb_max": 1.5,
|
126 |
+
"env_agents": 2048,
|
127 |
+
"clamp_actions": false,
|
128 |
+
"clamp_rew_obs": false,
|
129 |
+
"command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=walker2d --use_rnn=False --num_epochs=5 --experiment=06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
130 |
+
"cli_args": {
|
131 |
+
"env": "walker2d",
|
132 |
+
"experiment": "06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5",
|
133 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
134 |
+
"seed": 2322090,
|
135 |
+
"num_epochs": 5,
|
136 |
+
"actor_worker_gpus": [
|
137 |
+
0
|
138 |
+
],
|
139 |
+
"use_rnn": false,
|
140 |
+
"with_wandb": true,
|
141 |
+
"wandb_project": "sample_factory"
|
142 |
+
},
|
143 |
+
"git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
|
144 |
+
"git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
|
145 |
+
"train_script": "sf_examples.brax.train_brax",
|
146 |
+
"wandb_unique_id": "06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5_20230111_192722_009099"
|
147 |
+
}
|
git.diff
ADDED
File without changes
|
replay.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:079c0cabb6ad9a946bdf746ad4c134ad0090319fe4f7f059747280a4adaa5575
|
3 |
+
size 1782125
|
sf_log.txt
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-01-11 19:27:33,824][2153185] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/config.json...
|
2 |
+
[2023-01-11 19:27:34,003][2153185] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3 |
+
[2023-01-11 19:27:34,005][2153185] Rollout worker 0 uses device cuda:0
|
4 |
+
[2023-01-11 19:27:34,006][2153185] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
|
5 |
+
[2023-01-11 19:27:34,067][2153185] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
6 |
+
[2023-01-11 19:27:34,068][2153185] InferenceWorker_p0-w0: min num requests: 1
|
7 |
+
[2023-01-11 19:27:34,069][2153185] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8 |
+
[2023-01-11 19:27:34,070][2153185] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
|
9 |
+
[2023-01-11 19:27:34,070][2153185] Setting fixed seed 2322090
|
10 |
+
[2023-01-11 19:27:34,071][2153185] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
11 |
+
[2023-01-11 19:27:34,071][2153185] Initializing actor-critic model on device cuda:0
|
12 |
+
[2023-01-11 19:27:34,072][2153185] RunningMeanStd input shape: (17,)
|
13 |
+
[2023-01-11 19:27:34,072][2153185] RunningMeanStd input shape: (1,)
|
14 |
+
[2023-01-11 19:27:34,154][2153185] Created Actor Critic model with architecture:
|
15 |
+
[2023-01-11 19:27:34,154][2153185] ActorCriticSharedWeights(
|
16 |
+
(obs_normalizer): ObservationNormalizer(
|
17 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
18 |
+
(running_mean_std): ModuleDict(
|
19 |
+
(obs): RunningMeanStdInPlace()
|
20 |
+
)
|
21 |
+
)
|
22 |
+
)
|
23 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
24 |
+
(encoder): MultiInputEncoder(
|
25 |
+
(encoders): ModuleDict(
|
26 |
+
(obs): MlpEncoder(
|
27 |
+
(mlp_head): RecursiveScriptModule(
|
28 |
+
original_name=Sequential
|
29 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
30 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
31 |
+
(2): RecursiveScriptModule(original_name=Linear)
|
32 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
33 |
+
(4): RecursiveScriptModule(original_name=Linear)
|
34 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
35 |
+
)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
(core): ModelCoreIdentity()
|
40 |
+
(decoder): MlpDecoder(
|
41 |
+
(mlp): Identity()
|
42 |
+
)
|
43 |
+
(critic_linear): Linear(in_features=64, out_features=1, bias=True)
|
44 |
+
(action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
|
45 |
+
(distribution_linear): Linear(in_features=64, out_features=6, bias=True)
|
46 |
+
)
|
47 |
+
)
|
48 |
+
[2023-01-11 19:27:34,156][2153185] Using optimizer <class 'torch.optim.adam.Adam'>
|
49 |
+
[2023-01-11 19:27:34,159][2153185] No checkpoints found
|
50 |
+
[2023-01-11 19:27:34,160][2153185] Did not load from checkpoint, starting from scratch!
|
51 |
+
[2023-01-11 19:27:34,161][2153185] Initialized policy 0 weights for model version 0
|
52 |
+
[2023-01-11 19:27:34,161][2153185] LearnerWorker_p0 finished initialization!
|
53 |
+
[2023-01-11 19:27:34,162][2153185] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
54 |
+
[2023-01-11 19:27:34,167][2153185] Inference worker 0-0 is ready!
|
55 |
+
[2023-01-11 19:27:34,167][2153185] All inference workers are ready! Signal rollout workers to start!
|
56 |
+
[2023-01-11 19:27:34,168][2153185] EnvRunner 0-0 uses policy 0
|
57 |
+
[2023-01-11 19:27:35,507][2153185] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
|
58 |
+
[2023-01-11 19:27:38,375][2153185] reset() done, obs.shape=torch.Size([2048, 17])!
|
59 |
+
[2023-01-11 19:27:47,676][2153185] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
60 |
+
[2023-01-11 19:27:56,420][2153185] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
61 |
+
[2023-01-11 19:27:56,424][2153185] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 234.1. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
62 |
+
[2023-01-11 19:27:56,429][2153185] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 468.0. Samples: 6144. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
63 |
+
[2023-01-11 19:27:56,432][2153185] Heartbeat connected on Batcher_0
|
64 |
+
[2023-01-11 19:27:56,432][2153185] Heartbeat connected on LearnerWorker_p0
|
65 |
+
[2023-01-11 19:27:56,432][2153185] Heartbeat connected on InferenceWorker_p0-w0
|
66 |
+
[2023-01-11 19:27:56,432][2153185] Heartbeat connected on RolloutWorker_w0
|
67 |
+
[2023-01-11 19:27:58,473][2153185] Fps is (10 sec: 127937.9, 60 sec: 24280.7, 300 sec: 24280.7). Total num frames: 262144. Throughput: 0: 15365.1. Samples: 167936. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
68 |
+
[2023-01-11 19:27:58,474][2153185] Avg episode reward: [(0, '23.990')]
|
69 |
+
[2023-01-11 19:28:03,423][2153185] Fps is (10 sec: 318590.1, 60 sec: 141502.0, 300 sec: 141502.0). Total num frames: 2228224. Throughput: 0: 87268.2. Samples: 1376256. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
70 |
+
[2023-01-11 19:28:03,424][2153185] Avg episode reward: [(0, '618.064')]
|
71 |
+
[2023-01-11 19:28:08,424][2153185] Fps is (10 sec: 381986.4, 60 sec: 195844.4, 300 sec: 195844.4). Total num frames: 4063232. Throughput: 0: 174917.4. Samples: 3631104. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
72 |
+
[2023-01-11 19:28:08,425][2153185] Avg episode reward: [(0, '630.121')]
|
73 |
+
[2023-01-11 19:28:13,422][2153185] Fps is (10 sec: 367046.5, 60 sec: 229095.9, 300 sec: 229095.9). Total num frames: 5898240. Throughput: 0: 227027.7. Samples: 5847040. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
74 |
+
[2023-01-11 19:28:13,423][2153185] Avg episode reward: [(0, '783.157')]
|
75 |
+
[2023-01-11 19:28:13,483][2153185] Saving new best policy, reward=783.157!
|
76 |
+
[2023-01-11 19:28:18,424][2153185] Fps is (10 sec: 373558.6, 60 sec: 253642.4, 300 sec: 253642.4). Total num frames: 7798784. Throughput: 0: 227065.9. Samples: 6983680. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
77 |
+
[2023-01-11 19:28:18,424][2153185] Avg episode reward: [(0, '2312.752')]
|
78 |
+
[2023-01-11 19:28:18,432][2153185] Saving new best policy, reward=2312.752!
|
79 |
+
[2023-01-11 19:28:23,431][2153185] Fps is (10 sec: 379766.5, 60 sec: 271273.9, 300 sec: 271273.9). Total num frames: 9699328. Throughput: 0: 258386.1. Samples: 9240576. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
80 |
+
[2023-01-11 19:28:23,432][2153185] Avg episode reward: [(0, '2002.332')]
|
81 |
+
[2023-01-11 19:28:28,423][2153185] Fps is (10 sec: 373562.0, 60 sec: 283072.2, 300 sec: 283072.2). Total num frames: 11534336. Throughput: 0: 358297.9. Samples: 11468800. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
82 |
+
[2023-01-11 19:28:28,424][2153185] Avg episode reward: [(0, '2368.916')]
|
83 |
+
[2023-01-11 19:28:28,429][2153185] Saving new best policy, reward=2368.916!
|
84 |
+
[2023-01-11 19:28:33,443][2153185] Fps is (10 sec: 373110.6, 60 sec: 293551.7, 300 sec: 293551.7). Total num frames: 13434880. Throughput: 0: 339901.9. Samples: 12587008. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
85 |
+
[2023-01-11 19:28:33,444][2153185] Avg episode reward: [(0, '2996.578')]
|
86 |
+
[2023-01-11 19:28:33,446][2153185] Saving new best policy, reward=2996.578!
|
87 |
+
[2023-01-11 19:28:38,422][2153185] Fps is (10 sec: 373599.0, 60 sec: 300909.4, 300 sec: 300909.4). Total num frames: 15269888. Throughput: 0: 352559.6. Samples: 14811136. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
88 |
+
[2023-01-11 19:28:38,423][2153185] Avg episode reward: [(0, '3076.930')]
|
89 |
+
[2023-01-11 19:28:38,431][2153185] Saving new best policy, reward=3076.930!
|
90 |
+
[2023-01-11 19:28:43,424][2153185] Fps is (10 sec: 367721.8, 60 sec: 363906.9, 300 sec: 306830.4). Total num frames: 17104896. Throughput: 0: 375696.5. Samples: 17055744. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
91 |
+
[2023-01-11 19:28:43,424][2153185] Avg episode reward: [(0, '3859.353')]
|
92 |
+
[2023-01-11 19:28:43,428][2153185] Saving new best policy, reward=3859.353!
|
93 |
+
[2023-01-11 19:28:48,422][2153185] Fps is (10 sec: 373566.4, 60 sec: 365502.6, 300 sec: 312869.9). Total num frames: 19005440. Throughput: 0: 372839.0. Samples: 18153472. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
94 |
+
[2023-01-11 19:28:48,423][2153185] Avg episode reward: [(0, '3764.592')]
|
95 |
+
[2023-01-11 19:28:53,424][2153185] Fps is (10 sec: 373543.7, 60 sec: 365657.5, 300 sec: 316977.7). Total num frames: 20840448. Throughput: 0: 373417.6. Samples: 20434944. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
96 |
+
[2023-01-11 19:28:53,425][2153185] Avg episode reward: [(0, '4154.622')]
|
97 |
+
[2023-01-11 19:28:53,429][2153185] Saving new best policy, reward=4154.622!
|
98 |
+
[2023-01-11 19:28:58,456][2153185] Fps is (10 sec: 372296.1, 60 sec: 374754.2, 300 sec: 321294.3). Total num frames: 22740992. Throughput: 0: 373094.6. Samples: 22648832. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
99 |
+
[2023-01-11 19:28:58,457][2153185] Avg episode reward: [(0, '4481.427')]
|
100 |
+
[2023-01-11 19:28:58,463][2153185] Saving new best policy, reward=4481.427!
|
101 |
+
[2023-01-11 19:29:03,470][2153185] Fps is (10 sec: 384889.5, 60 sec: 374357.4, 300 sec: 325979.1). Total num frames: 24707072. Throughput: 0: 373080.5. Samples: 23789568. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
102 |
+
[2023-01-11 19:29:03,471][2153185] Avg episode reward: [(0, '4315.631')]
|
103 |
+
[2023-01-11 19:29:08,424][2153185] Fps is (10 sec: 381321.0, 60 sec: 374645.7, 300 sec: 328704.5). Total num frames: 26542080. Throughput: 0: 375071.6. Samples: 26116096. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
104 |
+
[2023-01-11 19:29:08,425][2153185] Avg episode reward: [(0, '5075.545')]
|
105 |
+
[2023-01-11 19:29:08,433][2153185] Saving new best policy, reward=5075.545!
|
106 |
+
[2023-01-11 19:29:12,089][2153185] Early stopping after 3 epochs (6 sgd steps), loss delta 0.0000006
|
107 |
+
[2023-01-11 19:29:13,468][2153185] Fps is (10 sec: 380166.0, 60 sec: 376542.0, 300 sec: 332294.2). Total num frames: 28508160. Throughput: 0: 375819.4. Samples: 28397568. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
108 |
+
[2023-01-11 19:29:13,469][2153185] Avg episode reward: [(0, '4851.746')]
|
109 |
+
[2023-01-11 19:29:17,083][2153185] Early stopping after 5 epochs (10 sgd steps), loss delta 0.0000005
|
110 |
+
[2023-01-11 19:29:18,424][2153185] Fps is (10 sec: 380103.1, 60 sec: 375736.5, 300 sec: 334368.5). Total num frames: 30343168. Throughput: 0: 377264.1. Samples: 29556736. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
111 |
+
[2023-01-11 19:29:18,425][2153185] Avg episode reward: [(0, '3804.353')]
|
112 |
+
[2023-01-11 19:29:23,430][2153185] Fps is (10 sec: 374981.1, 60 sec: 375744.9, 300 sec: 336735.2). Total num frames: 32243712. Throughput: 0: 377128.2. Samples: 31784960. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
113 |
+
[2023-01-11 19:29:23,431][2153185] Avg episode reward: [(0, '4149.015')]
|
114 |
+
[2023-01-11 19:29:28,424][2153185] Fps is (10 sec: 373559.1, 60 sec: 375736.0, 300 sec: 338258.5). Total num frames: 34078720. Throughput: 0: 376827.7. Samples: 34013184. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
115 |
+
[2023-01-11 19:29:28,425][2153185] Avg episode reward: [(0, '4615.276')]
|
116 |
+
[2023-01-11 19:29:28,433][2153185] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005196_34078720.pth...
|
117 |
+
[2023-01-11 19:29:33,423][2153185] Fps is (10 sec: 373832.6, 60 sec: 375866.1, 300 sec: 340240.8). Total num frames: 35979264. Throughput: 0: 377460.9. Samples: 35139584. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
118 |
+
[2023-01-11 19:29:33,424][2153185] Avg episode reward: [(0, '5411.330')]
|
119 |
+
[2023-01-11 19:29:33,427][2153185] Saving new best policy, reward=5411.330!
|
120 |
+
[2023-01-11 19:29:38,480][2153185] Fps is (10 sec: 378011.1, 60 sec: 376472.7, 300 sec: 341866.0). Total num frames: 37879808. Throughput: 0: 376411.6. Samples: 37394432. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
|
121 |
+
[2023-01-11 19:29:38,480][2153185] Avg episode reward: [(0, '5203.336')]
|
122 |
+
[2023-01-11 19:29:43,422][2153185] Fps is (10 sec: 373588.1, 60 sec: 376841.2, 300 sec: 343121.6). Total num frames: 39714816. Throughput: 0: 378071.0. Samples: 39649280. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
|
123 |
+
[2023-01-11 19:29:43,423][2153185] Avg episode reward: [(0, '5195.713')]
|
124 |
+
[2023-01-11 19:29:48,444][2153185] Fps is (10 sec: 374897.6, 60 sec: 376695.4, 300 sec: 344591.4). Total num frames: 41615360. Throughput: 0: 377415.6. Samples: 40763392. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
125 |
+
[2023-01-11 19:29:48,444][2153185] Avg episode reward: [(0, '4986.651')]
|
126 |
+
[2023-01-11 19:29:53,424][2153185] Fps is (10 sec: 373487.3, 60 sec: 376831.7, 300 sec: 345536.9). Total num frames: 43450368. Throughput: 0: 375558.6. Samples: 43016192. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
127 |
+
[2023-01-11 19:29:53,425][2153185] Avg episode reward: [(0, '5789.914')]
|
128 |
+
[2023-01-11 19:29:53,428][2153185] Saving new best policy, reward=5789.914!
|
129 |
+
[2023-01-11 19:29:58,424][2153185] Fps is (10 sec: 374311.5, 60 sec: 377034.7, 300 sec: 346859.9). Total num frames: 45350912. Throughput: 0: 375659.2. Samples: 45285376. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
130 |
+
[2023-01-11 19:29:58,424][2153185] Avg episode reward: [(0, '5175.173')]
|
131 |
+
[2023-01-11 19:30:03,424][2153185] Fps is (10 sec: 373562.6, 60 sec: 374936.3, 300 sec: 347601.4). Total num frames: 47185920. Throughput: 0: 374287.2. Samples: 46399488. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
132 |
+
[2023-01-11 19:30:03,424][2153185] Avg episode reward: [(0, '5716.311')]
|
133 |
+
[2023-01-11 19:30:08,481][2153185] Fps is (10 sec: 377936.6, 60 sec: 376474.3, 300 sec: 349079.7). Total num frames: 49152000. Throughput: 0: 374544.6. Samples: 48658432. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
134 |
+
[2023-01-11 19:30:08,482][2153185] Avg episode reward: [(0, '5753.585')]
|
135 |
+
[2023-01-11 19:30:13,423][2153185] Fps is (10 sec: 380151.6, 60 sec: 374933.9, 300 sec: 349834.5). Total num frames: 50987008. Throughput: 0: 375843.1. Samples: 50925568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
136 |
+
[2023-01-11 19:30:13,423][2153185] Avg episode reward: [(0, '5924.315')]
|
137 |
+
[2023-01-11 19:30:13,427][2153185] Saving new best policy, reward=5924.315!
|
138 |
+
[2023-01-11 19:30:18,422][2153185] Fps is (10 sec: 375757.5, 60 sec: 375750.7, 300 sec: 350839.0). Total num frames: 52887552. Throughput: 0: 375698.9. Samples: 52045824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
139 |
+
[2023-01-11 19:30:18,423][2153185] Avg episode reward: [(0, '6123.117')]
|
140 |
+
[2023-01-11 19:30:18,430][2153185] Saving new best policy, reward=6123.117!
|
141 |
+
[2023-01-11 19:30:23,424][2153185] Fps is (10 sec: 373514.6, 60 sec: 374689.5, 300 sec: 351355.1). Total num frames: 54722560. Throughput: 0: 375341.2. Samples: 54263808. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
142 |
+
[2023-01-11 19:30:23,424][2153185] Avg episode reward: [(0, '5322.819')]
|
143 |
+
[2023-01-11 19:30:28,423][2153185] Fps is (10 sec: 373533.7, 60 sec: 375746.5, 300 sec: 352250.9). Total num frames: 56623104. Throughput: 0: 375732.2. Samples: 56557568. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
144 |
+
[2023-01-11 19:30:28,423][2153185] Avg episode reward: [(0, '5822.090')]
|
145 |
+
[2023-01-11 19:30:33,424][2153185] Fps is (10 sec: 380091.8, 60 sec: 375732.6, 300 sec: 353088.9). Total num frames: 58523648. Throughput: 0: 375903.8. Samples: 57671680. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
146 |
+
[2023-01-11 19:30:33,425][2153185] Avg episode reward: [(0, '5254.855')]
|
147 |
+
[2023-01-11 19:30:38,462][2153185] Fps is (10 sec: 378651.3, 60 sec: 375853.0, 300 sec: 353802.7). Total num frames: 60424192. Throughput: 0: 376290.2. Samples: 59963392. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
148 |
+
[2023-01-11 19:30:38,462][2153185] Avg episode reward: [(0, '5823.812')]
|
149 |
+
[2023-01-11 19:30:43,424][2153185] Fps is (10 sec: 373560.6, 60 sec: 375728.0, 300 sec: 354253.7). Total num frames: 62259200. Throughput: 0: 375736.2. Samples: 62193664. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
150 |
+
[2023-01-11 19:30:43,425][2153185] Avg episode reward: [(0, '5819.084')]
|
151 |
+
[2023-01-11 19:30:48,476][2153185] Fps is (10 sec: 373017.6, 60 sec: 375538.5, 300 sec: 354867.0). Total num frames: 64159744. Throughput: 0: 375304.5. Samples: 63307776. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
152 |
+
[2023-01-11 19:30:48,477][2153185] Avg episode reward: [(0, '4931.663')]
|
153 |
+
[2023-01-11 19:30:53,422][2153185] Fps is (10 sec: 380175.9, 60 sec: 376842.7, 300 sec: 355649.0). Total num frames: 66060288. Throughput: 0: 376687.2. Samples: 65587200. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
154 |
+
[2023-01-11 19:30:53,423][2153185] Avg episode reward: [(0, '5748.724')]
|
155 |
+
[2023-01-11 19:30:58,423][2153185] Fps is (10 sec: 395291.0, 60 sec: 379017.5, 300 sec: 356975.1). Total num frames: 68091904. Throughput: 0: 379737.8. Samples: 68014080. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
156 |
+
[2023-01-11 19:30:58,424][2153185] Avg episode reward: [(0, '5415.865')]
|
157 |
+
[2023-01-11 19:31:03,441][2153185] Fps is (10 sec: 392479.6, 60 sec: 379999.5, 300 sec: 357533.9). Total num frames: 69992448. Throughput: 0: 379724.7. Samples: 69140480. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
158 |
+
[2023-01-11 19:31:03,442][2153185] Avg episode reward: [(0, '5741.709')]
|
159 |
+
[2023-01-11 19:31:08,422][2153185] Fps is (10 sec: 373597.7, 60 sec: 378294.7, 300 sec: 357803.1). Total num frames: 71827456. Throughput: 0: 380484.8. Samples: 71385088. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
160 |
+
[2023-01-11 19:31:08,423][2153185] Avg episode reward: [(0, '5806.858')]
|
161 |
+
[2023-01-11 19:31:13,425][2153185] Fps is (10 sec: 374146.0, 60 sec: 379000.1, 300 sec: 358340.1). Total num frames: 73728000. Throughput: 0: 379498.7. Samples: 73635840. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
162 |
+
[2023-01-11 19:31:13,426][2153185] Avg episode reward: [(0, '5602.576')]
|
163 |
+
[2023-01-11 19:31:18,435][2153185] Fps is (10 sec: 379630.6, 60 sec: 378937.9, 300 sec: 358840.0). Total num frames: 75628544. Throughput: 0: 380018.1. Samples: 74776576. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
164 |
+
[2023-01-11 19:31:18,436][2153185] Avg episode reward: [(0, '5567.227')]
|
165 |
+
[2023-01-11 19:31:23,423][2153185] Fps is (10 sec: 373655.4, 60 sec: 379023.9, 300 sec: 359049.8). Total num frames: 77463552. Throughput: 0: 379163.0. Samples: 77010944. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
166 |
+
[2023-01-11 19:31:23,423][2153185] Avg episode reward: [(0, '6320.190')]
|
167 |
+
[2023-01-11 19:31:23,427][2153185] Saving new best policy, reward=6320.190!
|
168 |
+
[2023-01-11 19:31:28,423][2153185] Fps is (10 sec: 367428.0, 60 sec: 377922.6, 300 sec: 359228.6). Total num frames: 79298560. Throughput: 0: 378658.5. Samples: 79233024. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
169 |
+
[2023-01-11 19:31:28,424][2153185] Avg episode reward: [(0, '6510.601')]
|
170 |
+
[2023-01-11 19:31:28,431][2153185] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012096_79298560.pth...
|
171 |
+
[2023-01-11 19:31:28,499][2153185] Saving new best policy, reward=6510.601!
|
172 |
+
[2023-01-11 19:31:33,423][2153185] Fps is (10 sec: 366976.3, 60 sec: 376837.8, 300 sec: 359400.9). Total num frames: 81133568. Throughput: 0: 378048.5. Samples: 80300032. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
173 |
+
[2023-01-11 19:31:33,424][2153185] Avg episode reward: [(0, '6344.935')]
|
174 |
+
[2023-01-11 19:31:38,466][2153185] Fps is (10 sec: 378491.2, 60 sec: 377895.9, 300 sec: 360066.8). Total num frames: 83099648. Throughput: 0: 377647.8. Samples: 82597888. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
175 |
+
[2023-01-11 19:31:38,467][2153185] Avg episode reward: [(0, '6454.606')]
|
176 |
+
[2023-01-11 19:31:43,421][2153185] Fps is (10 sec: 380194.7, 60 sec: 377943.4, 300 sec: 360282.8). Total num frames: 84934656. Throughput: 0: 374349.5. Samples: 84858880. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
177 |
+
[2023-01-11 19:31:43,421][2153185] Avg episode reward: [(0, '6075.015')]
|
178 |
+
[2023-01-11 19:31:48,424][2153185] Fps is (10 sec: 375147.6, 60 sec: 378254.3, 300 sec: 360690.6). Total num frames: 86835200. Throughput: 0: 374792.6. Samples: 85999616. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
179 |
+
[2023-01-11 19:31:48,424][2153185] Avg episode reward: [(0, '6092.570')]
|
180 |
+
[2023-01-11 19:31:53,423][2153185] Fps is (10 sec: 380028.3, 60 sec: 377919.0, 300 sec: 361086.4). Total num frames: 88735744. Throughput: 0: 375369.1. Samples: 88276992. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
181 |
+
[2023-01-11 19:31:53,424][2153185] Avg episode reward: [(0, '5996.674')]
|
182 |
+
[2023-01-11 19:31:58,423][2153185] Fps is (10 sec: 373559.8, 60 sec: 374647.3, 300 sec: 361203.8). Total num frames: 90570752. Throughput: 0: 375071.7. Samples: 90513408. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
183 |
+
[2023-01-11 19:31:58,424][2153185] Avg episode reward: [(0, '6051.410')]
|
184 |
+
[2023-01-11 19:32:03,422][2153185] Fps is (10 sec: 373595.6, 60 sec: 374766.2, 300 sec: 361575.5). Total num frames: 92471296. Throughput: 0: 375119.1. Samples: 91652096. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
185 |
+
[2023-01-11 19:32:03,423][2153185] Avg episode reward: [(0, '6075.545')]
|
186 |
+
[2023-01-11 19:32:08,424][2153185] Fps is (10 sec: 380091.6, 60 sec: 375729.7, 300 sec: 361928.2). Total num frames: 94371840. Throughput: 0: 375819.3. Samples: 93923328. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
187 |
+
[2023-01-11 19:32:08,425][2153185] Avg episode reward: [(0, '5770.103')]
|
188 |
+
[2023-01-11 19:32:13,424][2153185] Fps is (10 sec: 373479.6, 60 sec: 374654.9, 300 sec: 362023.5). Total num frames: 96206848. Throughput: 0: 376097.7. Samples: 96157696. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
189 |
+
[2023-01-11 19:32:13,425][2153185] Avg episode reward: [(0, '6145.203')]
|
190 |
+
[2023-01-11 19:32:18,423][2153185] Fps is (10 sec: 386687.5, 60 sec: 376905.1, 300 sec: 362842.7). Total num frames: 98238464. Throughput: 0: 379653.4. Samples: 97384448. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
191 |
+
[2023-01-11 19:32:18,424][2153185] Avg episode reward: [(0, '6026.312')]
|
192 |
+
[2023-01-11 19:32:23,203][2153185] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015266_100073472.pth...
|
193 |
+
[2023-01-11 19:32:23,219][2153185] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000005196_34078720.pth
|
194 |
+
[2023-01-11 19:32:23,220][2153185] Stopping Batcher_0...
|
195 |
+
[2023-01-11 19:32:23,221][2153185] Stopping InferenceWorker_p0-w0...
|
196 |
+
[2023-01-11 19:32:23,221][2153185] Stopping RolloutWorker_w0...
|
197 |
+
[2023-01-11 19:32:23,221][2153185] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/06_v083_brax_basic_benchmark_see_2322090_env_walker2d_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015266_100073472.pth...
|
198 |
+
[2023-01-11 19:32:23,235][2153185] Stopping LearnerWorker_p0...
|
199 |
+
[2023-01-11 19:32:23,235][2153185] Component Batcher_0 stopped!
|
200 |
+
[2023-01-11 19:32:23,236][2153185] Component InferenceWorker_p0-w0 stopped!
|
201 |
+
[2023-01-11 19:32:23,236][2153185] Component RolloutWorker_w0 stopped!
|
202 |
+
[2023-01-11 19:32:23,236][2153185] Component LearnerWorker_p0 stopped!
|
203 |
+
[2023-01-11 19:32:23,236][2153185] Batcher 0 profile tree view:
|
204 |
+
batching: 0.3521, releasing_batches: 0.0642
|
205 |
+
[2023-01-11 19:32:23,236][2153185] InferenceWorker_p0-w0 profile tree view:
|
206 |
+
update_model: 0.4320
|
207 |
+
one_step: 0.0012
|
208 |
+
handle_policy_step: 57.3554
|
209 |
+
deserialize: 0.4865, stack: 0.0681, obs_to_device_normalize: 10.3711, forward: 35.6708, prepare_outputs: 6.7457, send_messages: 0.8052
|
210 |
+
[2023-01-11 19:32:23,237][2153185] Learner 0 profile tree view:
|
211 |
+
misc: 0.0050, prepare_batch: 5.5690
|
212 |
+
train: 87.4892
|
213 |
+
epoch_init: 0.0595, minibatch_init: 0.9957, losses_postprocess: 3.1702, kl_divergence: 5.7630, after_optimizer: 0.3136
|
214 |
+
calculate_losses: 17.7913
|
215 |
+
losses_init: 0.0331, forward_head: 2.8800, bptt_initial: 0.1263, bptt: 0.1281, tail: 8.7123, advantages_returns: 1.1141, losses: 3.4779
|
216 |
+
update: 57.5417
|
217 |
+
clip: 8.7673
|
218 |
+
[2023-01-11 19:32:23,237][2153185] RolloutWorker_w0 profile tree view:
|
219 |
+
wait_for_trajectories: 0.0819, enqueue_policy_requests: 5.6467, process_policy_outputs: 3.6701, env_step: 90.1515, finalize_trajectories: 0.1553, complete_rollouts: 0.0656
|
220 |
+
post_env_step: 19.1398
|
221 |
+
process_env_step: 7.7900
|
222 |
+
[2023-01-11 19:32:23,237][2153185] Loop Runner_EvtLoop terminating...
|
223 |
+
[2023-01-11 19:32:23,237][2153185] Runner profile tree view:
|
224 |
+
main_loop: 289.1680
|
225 |
+
[2023-01-11 19:32:23,238][2153185] Collected {0: 100073472}, FPS: 346073.8
|