andrewzhang505 commited on
Commit
206942c
1 Parent(s): b16d53e

Upload . with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1670186873.andrew-gpu ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43b5413e94b50c334b2b093667bd2d720ba516a657964401af2294afa3c992b
3
+ size 70933
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ ---
8
+
9
+ A(n) **APPO** model trained on the **mujoco_ant** environment.
10
+
11
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
12
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
13
+
14
+
15
+ ## Downloading the model
16
+
17
+ After installing Sample-Factory, download the model with:
18
+ ```
19
+ python -m sample_factory.huggingface.load_from_hub -r andrewzhang505/ant_test2
20
+ ```
21
+
22
+
checkpoint_p0/best_000000928_475136_reward_321.313.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a270ae0aaa4d62d1cc85c340c7bc1f5917af859419a227346a58323c755e74
3
+ size 89730
checkpoint_p0/checkpoint_000000928_475136.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920e8e287f9e8564ad2da100a86f8050bdfa28428635072a4dc315076a25a387
3
+ size 89730
checkpoint_p0/checkpoint_000000952_487424.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df941a254fb9b62986160dad41587b474e099476b2f7e76299e09883dd0142dd
3
+ size 89730
config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "mujoco_ant",
5
+ "experiment": "ant_test",
6
+ "train_dir": "/home/andrew_huggingface_co/sample-factory/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": false,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 8,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 4,
22
+ "num_epochs": 2,
23
+ "rollout": 64,
24
+ "recurrence": 1,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": true,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 1.3,
33
+ "kl_loss_coeff": 0.1,
34
+ "exploration_loss": "entropy",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
+ "ppo_clip_value": 1.0,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 3.5,
46
+ "learning_rate": 0.00295,
47
+ "lr_schedule": "linear_decay",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "obs_subtract_mean": 0.0,
50
+ "obs_scale": 1.0,
51
+ "normalize_input": true,
52
+ "normalize_input_keys": null,
53
+ "decorrelate_experience_max_seconds": 0,
54
+ "decorrelate_envs_on_one_worker": true,
55
+ "actor_worker_gpus": [],
56
+ "set_workers_cpu_affinity": true,
57
+ "force_envs_single_thread": false,
58
+ "default_niceness": 0,
59
+ "log_to_file": true,
60
+ "experiment_summaries_interval": 3,
61
+ "flush_summaries_interval": 30,
62
+ "stats_avg": 100,
63
+ "summaries_use_frameskip": true,
64
+ "heartbeat_interval": 20,
65
+ "heartbeat_reporting_interval": 180,
66
+ "train_for_env_steps": 10000000,
67
+ "train_for_seconds": 10000000000,
68
+ "save_every_sec": 15,
69
+ "keep_checkpoints": 2,
70
+ "load_checkpoint_kind": "latest",
71
+ "save_milestones_sec": -1,
72
+ "save_best_every_sec": 5,
73
+ "save_best_metric": "reward",
74
+ "save_best_after": 100000,
75
+ "benchmark": false,
76
+ "encoder_mlp_layers": [
77
+ 64,
78
+ 64
79
+ ],
80
+ "encoder_conv_architecture": "convnet_simple",
81
+ "encoder_conv_mlp_layers": [
82
+ 512
83
+ ],
84
+ "use_rnn": false,
85
+ "rnn_size": 512,
86
+ "rnn_type": "gru",
87
+ "rnn_num_layers": 1,
88
+ "decoder_mlp_layers": [],
89
+ "nonlinearity": "tanh",
90
+ "policy_initialization": "torch_default",
91
+ "policy_init_gain": 1.0,
92
+ "actor_critic_share_weights": true,
93
+ "adaptive_stddev": false,
94
+ "continuous_tanh_scale": 0.0,
95
+ "initial_stddev": 1.0,
96
+ "use_env_info_cache": false,
97
+ "env_gpu_actions": false,
98
+ "env_gpu_observations": true,
99
+ "env_frameskip": 1,
100
+ "env_framestack": 1,
101
+ "pixel_format": "CHW",
102
+ "use_record_episode_statistics": false,
103
+ "with_wandb": false,
104
+ "wandb_user": null,
105
+ "wandb_project": "sample_factory",
106
+ "wandb_group": null,
107
+ "wandb_job_type": "SF",
108
+ "wandb_tags": [],
109
+ "with_pbt": false,
110
+ "pbt_mix_policies_in_one_env": true,
111
+ "pbt_period_env_steps": 5000000,
112
+ "pbt_start_mutation": 20000000,
113
+ "pbt_replace_fraction": 0.3,
114
+ "pbt_mutation_rate": 0.15,
115
+ "pbt_replace_reward_gap": 0.1,
116
+ "pbt_replace_reward_gap_absolute": 1e-06,
117
+ "pbt_optimize_gamma": false,
118
+ "pbt_target_objective": "true_objective",
119
+ "pbt_perturb_min": 1.1,
120
+ "pbt_perturb_max": 1.5,
121
+ "command_line": "--algo=APPO --env=mujoco_ant --experiment=ant_test",
122
+ "cli_args": {
123
+ "algo": "APPO",
124
+ "env": "mujoco_ant",
125
+ "experiment": "ant_test"
126
+ },
127
+ "git_hash": "162ce7329569a2a3abaa26ecb8162a74f833b63e",
128
+ "git_repo_name": "https://github.com/andrewzhang505/sample-factory.git",
129
+ "train_script": "sf_examples.mujoco.train_mujoco"
130
+ }
git.diff ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/sample_factory/huggingface/huggingface_utils.py b/sample_factory/huggingface/huggingface_utils.py
2
+ index b6b10fc2..88ebd2c4 100644
3
+ --- a/sample_factory/huggingface/huggingface_utils.py
4
+ +++ b/sample_factory/huggingface/huggingface_utils.py
5
+ @@ -117,27 +117,40 @@ def push_to_hf(dir_path: str, repo_name: str, num_policies: int = 1):
6
+ exist_ok=True,
7
+ )
8
+
9
+ - # Upload folders
10
+ - folders = [".summary"]
11
+ - for policy_id in range(num_policies):
12
+ - folders.append(f"checkpoint_p{policy_id}")
13
+ - for f in folders:
14
+ - if os.path.exists(os.path.join(dir_path, f)):
15
+ - upload_folder(
16
+ - repo_id=repo_name,
17
+ - folder_path=os.path.join(dir_path, f),
18
+ - path_in_repo=f,
19
+ - )
20
+ -
21
+ - # Upload files
22
+ - files = ["config.json", "README.md", "replay.mp4"]
23
+ - for f in files:
24
+ - if os.path.exists(os.path.join(dir_path, f)):
25
+ - upload_file(
26
+ - repo_id=repo_name,
27
+ - path_or_fileobj=os.path.join(dir_path, f),
28
+ - path_in_repo=f,
29
+ - )
30
+ + upload_folder(
31
+ + repo_id=repo_name,
32
+ + folder_path=dir_path,
33
+ + path_in_repo=f,
34
+ + allow_patterns=[
35
+ + ".summary/*",
36
+ + "config.json",
37
+ + "README.md",
38
+ + "replay.mp4",
39
+ + ]
40
+ + + [f"checkpoint_p{policy_id}/*" for policy_id in range(num_policies)],
41
+ + )
42
+ +
43
+ + # # Upload folders
44
+ + # folders = [".summary"]
45
+ + # for policy_id in range(num_policies):
46
+ + # folders.append(f"checkpoint_p{policy_id}")
47
+ + # for f in folders:
48
+ + # if os.path.exists(os.path.join(dir_path, f)):
49
+ + # upload_folder(
50
+ + # repo_id=repo_name,
51
+ + # folder_path=os.path.join(dir_path, f),
52
+ + # path_in_repo=f,
53
+ + # )
54
+ +
55
+ + # # Upload files
56
+ + # files = ["config.json", "README.md", "replay.mp4"]
57
+ + # for f in files:
58
+ + # if os.path.exists(os.path.join(dir_path, f)):
59
+ + # upload_file(
60
+ + # repo_id=repo_name,
61
+ + # path_or_fileobj=os.path.join(dir_path, f),
62
+ + # path_in_repo=f,
63
+ + # )
64
+
65
+ log.info(f"The model has been pushed to {repo_url}")
66
+
sf_log.txt ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2022-12-04 20:47:56,451][04266] Saving configuration to /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/config.json...
2
+ [2022-12-04 20:47:56,464][04266] Rollout worker 0 uses device cpu
3
+ [2022-12-04 20:47:56,464][04266] Rollout worker 1 uses device cpu
4
+ [2022-12-04 20:47:56,464][04266] Rollout worker 2 uses device cpu
5
+ [2022-12-04 20:47:56,465][04266] Rollout worker 3 uses device cpu
6
+ [2022-12-04 20:47:56,465][04266] Rollout worker 4 uses device cpu
7
+ [2022-12-04 20:47:56,465][04266] Rollout worker 5 uses device cpu
8
+ [2022-12-04 20:47:56,465][04266] Rollout worker 6 uses device cpu
9
+ [2022-12-04 20:47:56,465][04266] Rollout worker 7 uses device cpu
10
+ [2022-12-04 20:47:56,465][04266] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
11
+ [2022-12-04 20:47:56,487][04266] Using GPUs [0] for process 0 (actually maps to GPUs [0])
12
+ [2022-12-04 20:47:56,487][04266] InferenceWorker_p0-w0: min num requests: 2
13
+ [2022-12-04 20:47:56,519][04266] Starting all processes...
14
+ [2022-12-04 20:47:56,520][04266] Starting process learner_proc0
15
+ [2022-12-04 20:47:56,570][04266] Starting all processes...
16
+ [2022-12-04 20:47:56,577][04266] Starting process inference_proc0-0
17
+ [2022-12-04 20:47:56,577][04266] Starting process rollout_proc0
18
+ [2022-12-04 20:47:56,578][04266] Starting process rollout_proc1
19
+ [2022-12-04 20:47:56,578][04266] Starting process rollout_proc2
20
+ [2022-12-04 20:47:56,578][04266] Starting process rollout_proc3
21
+ [2022-12-04 20:47:56,579][04266] Starting process rollout_proc4
22
+ [2022-12-04 20:47:56,579][04266] Starting process rollout_proc5
23
+ [2022-12-04 20:47:56,584][04266] Starting process rollout_proc6
24
+ [2022-12-04 20:47:56,591][04266] Starting process rollout_proc7
25
+ [2022-12-04 20:47:58,489][04366] Worker 5 uses CPU cores [5]
26
+ [2022-12-04 20:47:58,561][04361] Worker 0 uses CPU cores [0]
27
+ [2022-12-04 20:47:58,611][04360] Using GPUs [0] for process 0 (actually maps to GPUs [0])
28
+ [2022-12-04 20:47:58,612][04360] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
29
+ [2022-12-04 20:47:58,705][04367] Worker 4 uses CPU cores [4]
30
+ [2022-12-04 20:47:58,733][04363] Worker 6 uses CPU cores [6]
31
+ [2022-12-04 20:47:58,765][04368] Worker 2 uses CPU cores [2]
32
+ [2022-12-04 20:47:58,779][04365] Worker 3 uses CPU cores [3]
33
+ [2022-12-04 20:47:58,824][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
34
+ [2022-12-04 20:47:58,825][04340] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
35
+ [2022-12-04 20:47:58,834][04364] Worker 7 uses CPU cores [7]
36
+ [2022-12-04 20:47:58,885][04362] Worker 1 uses CPU cores [1]
37
+ [2022-12-04 20:47:59,427][04360] Num visible devices: 1
38
+ [2022-12-04 20:47:59,428][04340] Num visible devices: 1
39
+ [2022-12-04 20:47:59,446][04340] Starting seed is not provided
40
+ [2022-12-04 20:47:59,446][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
41
+ [2022-12-04 20:47:59,446][04340] Initializing actor-critic model on device cuda:0
42
+ [2022-12-04 20:47:59,446][04340] RunningMeanStd input shape: (27,)
43
+ [2022-12-04 20:47:59,447][04340] RunningMeanStd input shape: (1,)
44
+ [2022-12-04 20:47:59,522][04340] Created Actor Critic model with architecture:
45
+ [2022-12-04 20:47:59,522][04340] ActorCriticSharedWeights(
46
+ (obs_normalizer): ObservationNormalizer(
47
+ (running_mean_std): RunningMeanStdDictInPlace(
48
+ (running_mean_std): ModuleDict(
49
+ (obs): RunningMeanStdInPlace()
50
+ )
51
+ )
52
+ )
53
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
54
+ (encoder): MultiInputEncoder(
55
+ (encoders): ModuleDict(
56
+ (obs): MlpEncoder(
57
+ (mlp_head): RecursiveScriptModule(
58
+ original_name=Sequential
59
+ (0): RecursiveScriptModule(original_name=Linear)
60
+ (1): RecursiveScriptModule(original_name=Tanh)
61
+ (2): RecursiveScriptModule(original_name=Linear)
62
+ (3): RecursiveScriptModule(original_name=Tanh)
63
+ )
64
+ )
65
+ )
66
+ )
67
+ (core): ModelCoreIdentity()
68
+ (decoder): MlpDecoder(
69
+ (mlp): Identity()
70
+ )
71
+ (critic_linear): Linear(in_features=64, out_features=1, bias=True)
72
+ (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
73
+ (distribution_linear): Linear(in_features=64, out_features=8, bias=True)
74
+ )
75
+ )
76
+ [2022-12-04 20:48:03,416][04340] Using optimizer <class 'torch.optim.adam.Adam'>
77
+ [2022-12-04 20:48:03,417][04340] No checkpoints found
78
+ [2022-12-04 20:48:03,417][04340] Did not load from checkpoint, starting from scratch!
79
+ [2022-12-04 20:48:03,417][04340] Initialized policy 0 weights for model version 0
80
+ [2022-12-04 20:48:03,422][04340] LearnerWorker_p0 finished initialization!
81
+ [2022-12-04 20:48:03,424][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
82
+ [2022-12-04 20:48:03,551][04360] RunningMeanStd input shape: (27,)
83
+ [2022-12-04 20:48:03,552][04360] RunningMeanStd input shape: (1,)
84
+ [2022-12-04 20:48:03,650][04266] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
85
+ [2022-12-04 20:48:07,105][04266] Inference worker 0-0 is ready!
86
+ [2022-12-04 20:48:07,105][04266] All inference workers are ready! Signal rollout workers to start!
87
+ [2022-12-04 20:48:07,303][04364] Decorrelating experience for 0 frames...
88
+ [2022-12-04 20:48:07,303][04362] Decorrelating experience for 0 frames...
89
+ [2022-12-04 20:48:07,305][04363] Decorrelating experience for 0 frames...
90
+ [2022-12-04 20:48:07,306][04362] Decorrelating experience for 64 frames...
91
+ [2022-12-04 20:48:07,305][04367] Decorrelating experience for 0 frames...
92
+ [2022-12-04 20:48:07,305][04364] Decorrelating experience for 64 frames...
93
+ [2022-12-04 20:48:07,305][04361] Decorrelating experience for 0 frames...
94
+ [2022-12-04 20:48:07,305][04368] Decorrelating experience for 0 frames...
95
+ [2022-12-04 20:48:07,306][04366] Decorrelating experience for 0 frames...
96
+ [2022-12-04 20:48:07,307][04367] Decorrelating experience for 64 frames...
97
+ [2022-12-04 20:48:07,307][04363] Decorrelating experience for 64 frames...
98
+ [2022-12-04 20:48:07,307][04365] Decorrelating experience for 0 frames...
99
+ [2022-12-04 20:48:07,308][04368] Decorrelating experience for 64 frames...
100
+ [2022-12-04 20:48:07,308][04366] Decorrelating experience for 64 frames...
101
+ [2022-12-04 20:48:07,308][04361] Decorrelating experience for 64 frames...
102
+ [2022-12-04 20:48:07,309][04365] Decorrelating experience for 64 frames...
103
+ [2022-12-04 20:48:07,359][04364] Decorrelating experience for 128 frames...
104
+ [2022-12-04 20:48:07,360][04363] Decorrelating experience for 128 frames...
105
+ [2022-12-04 20:48:07,362][04366] Decorrelating experience for 128 frames...
106
+ [2022-12-04 20:48:07,361][04362] Decorrelating experience for 128 frames...
107
+ [2022-12-04 20:48:07,362][04361] Decorrelating experience for 128 frames...
108
+ [2022-12-04 20:48:07,362][04365] Decorrelating experience for 128 frames...
109
+ [2022-12-04 20:48:07,362][04367] Decorrelating experience for 128 frames...
110
+ [2022-12-04 20:48:07,362][04368] Decorrelating experience for 128 frames...
111
+ [2022-12-04 20:48:07,467][04363] Decorrelating experience for 192 frames...
112
+ [2022-12-04 20:48:07,467][04364] Decorrelating experience for 192 frames...
113
+ [2022-12-04 20:48:07,469][04367] Decorrelating experience for 192 frames...
114
+ [2022-12-04 20:48:07,469][04365] Decorrelating experience for 192 frames...
115
+ [2022-12-04 20:48:07,470][04366] Decorrelating experience for 192 frames...
116
+ [2022-12-04 20:48:07,471][04361] Decorrelating experience for 192 frames...
117
+ [2022-12-04 20:48:07,472][04362] Decorrelating experience for 192 frames...
118
+ [2022-12-04 20:48:07,474][04368] Decorrelating experience for 192 frames...
119
+ [2022-12-04 20:48:07,650][04364] Decorrelating experience for 256 frames...
120
+ [2022-12-04 20:48:07,658][04363] Decorrelating experience for 256 frames...
121
+ [2022-12-04 20:48:07,658][04365] Decorrelating experience for 256 frames...
122
+ [2022-12-04 20:48:07,659][04367] Decorrelating experience for 256 frames...
123
+ [2022-12-04 20:48:07,659][04362] Decorrelating experience for 256 frames...
124
+ [2022-12-04 20:48:07,661][04361] Decorrelating experience for 256 frames...
125
+ [2022-12-04 20:48:07,662][04366] Decorrelating experience for 256 frames...
126
+ [2022-12-04 20:48:07,664][04368] Decorrelating experience for 256 frames...
127
+ [2022-12-04 20:48:07,856][04364] Decorrelating experience for 320 frames...
128
+ [2022-12-04 20:48:07,863][04363] Decorrelating experience for 320 frames...
129
+ [2022-12-04 20:48:07,864][04365] Decorrelating experience for 320 frames...
130
+ [2022-12-04 20:48:07,866][04362] Decorrelating experience for 320 frames...
131
+ [2022-12-04 20:48:07,866][04361] Decorrelating experience for 320 frames...
132
+ [2022-12-04 20:48:07,871][04366] Decorrelating experience for 320 frames...
133
+ [2022-12-04 20:48:07,872][04367] Decorrelating experience for 320 frames...
134
+ [2022-12-04 20:48:07,877][04368] Decorrelating experience for 320 frames...
135
+ [2022-12-04 20:48:08,114][04364] Decorrelating experience for 384 frames...
136
+ [2022-12-04 20:48:08,119][04363] Decorrelating experience for 384 frames...
137
+ [2022-12-04 20:48:08,121][04365] Decorrelating experience for 384 frames...
138
+ [2022-12-04 20:48:08,123][04361] Decorrelating experience for 384 frames...
139
+ [2022-12-04 20:48:08,128][04362] Decorrelating experience for 384 frames...
140
+ [2022-12-04 20:48:08,129][04366] Decorrelating experience for 384 frames...
141
+ [2022-12-04 20:48:08,131][04367] Decorrelating experience for 384 frames...
142
+ [2022-12-04 20:48:08,144][04368] Decorrelating experience for 384 frames...
143
+ [2022-12-04 20:48:08,431][04364] Decorrelating experience for 448 frames...
144
+ [2022-12-04 20:48:08,433][04363] Decorrelating experience for 448 frames...
145
+ [2022-12-04 20:48:08,437][04365] Decorrelating experience for 448 frames...
146
+ [2022-12-04 20:48:08,437][04361] Decorrelating experience for 448 frames...
147
+ [2022-12-04 20:48:08,440][04362] Decorrelating experience for 448 frames...
148
+ [2022-12-04 20:48:08,444][04367] Decorrelating experience for 448 frames...
149
+ [2022-12-04 20:48:08,452][04366] Decorrelating experience for 448 frames...
150
+ [2022-12-04 20:48:08,466][04368] Decorrelating experience for 448 frames...
151
+ [2022-12-04 20:48:08,650][04266] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
152
+ [2022-12-04 20:48:08,652][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth...
153
+ [2022-12-04 20:48:13,650][04266] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 8192. Throughput: 0: 846.4. Samples: 8464. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
154
+ [2022-12-04 20:48:13,650][04266] Avg episode reward: [(0, '-160.026')]
155
+ [2022-12-04 20:48:16,478][04266] Heartbeat connected on Batcher_0
156
+ [2022-12-04 20:48:16,482][04266] Heartbeat connected on LearnerWorker_p0
157
+ [2022-12-04 20:48:16,492][04266] Heartbeat connected on InferenceWorker_p0-w0
158
+ [2022-12-04 20:48:16,493][04266] Heartbeat connected on RolloutWorker_w0
159
+ [2022-12-04 20:48:16,503][04266] Heartbeat connected on RolloutWorker_w2
160
+ [2022-12-04 20:48:16,503][04266] Heartbeat connected on RolloutWorker_w1
161
+ [2022-12-04 20:48:16,510][04266] Heartbeat connected on RolloutWorker_w4
162
+ [2022-12-04 20:48:16,511][04266] Heartbeat connected on RolloutWorker_w3
163
+ [2022-12-04 20:48:16,516][04266] Heartbeat connected on RolloutWorker_w5
164
+ [2022-12-04 20:48:16,521][04266] Heartbeat connected on RolloutWorker_w6
165
+ [2022-12-04 20:48:16,529][04266] Heartbeat connected on RolloutWorker_w7
166
+ [2022-12-04 20:48:18,650][04266] Fps is (10 sec: 3686.4, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 36864. Throughput: 0: 1698.1. Samples: 25472. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
167
+ [2022-12-04 20:48:18,651][04266] Avg episode reward: [(0, '-169.308')]
168
+ [2022-12-04 20:48:18,924][04360] Updated weights for policy 0, policy_version 80 (0.0006)
169
+ [2022-12-04 20:48:23,650][04266] Fps is (10 sec: 5734.3, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 65536. Throughput: 0: 2930.0. Samples: 58600. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
170
+ [2022-12-04 20:48:23,651][04266] Avg episode reward: [(0, '-249.723')]
171
+ [2022-12-04 20:48:23,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000128_65536.pth...
172
+ [2022-12-04 20:48:26,260][04360] Updated weights for policy 0, policy_version 160 (0.0007)
173
+ [2022-12-04 20:48:28,650][04266] Fps is (10 sec: 5734.4, 60 sec: 3768.3, 300 sec: 3768.3). Total num frames: 94208. Throughput: 0: 3705.3. Samples: 92632. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
174
+ [2022-12-04 20:48:28,651][04266] Avg episode reward: [(0, '-89.994')]
175
+ [2022-12-04 20:48:33,559][04360] Updated weights for policy 0, policy_version 240 (0.0006)
176
+ [2022-12-04 20:48:33,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 122880. Throughput: 0: 3641.5. Samples: 109244. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
177
+ [2022-12-04 20:48:33,651][04266] Avg episode reward: [(0, '-153.751')]
178
+ [2022-12-04 20:48:33,651][04340] Saving new best policy, reward=-153.751!
179
+ [2022-12-04 20:48:38,650][04266] Fps is (10 sec: 5324.8, 60 sec: 4213.0, 300 sec: 4213.0). Total num frames: 147456. Throughput: 0: 4093.4. Samples: 143268. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
180
+ [2022-12-04 20:48:38,650][04266] Avg episode reward: [(0, '-137.350')]
181
+ [2022-12-04 20:48:38,669][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000296_151552.pth...
182
+ [2022-12-04 20:48:38,675][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth
183
+ [2022-12-04 20:48:38,675][04340] Saving new best policy, reward=-137.350!
184
+ [2022-12-04 20:48:40,889][04360] Updated weights for policy 0, policy_version 320 (0.0006)
185
+ [2022-12-04 20:48:43,650][04266] Fps is (10 sec: 5324.8, 60 sec: 4403.2, 300 sec: 4403.2). Total num frames: 176128. Throughput: 0: 4415.1. Samples: 176604. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
186
+ [2022-12-04 20:48:43,651][04266] Avg episode reward: [(0, '-69.206')]
187
+ [2022-12-04 20:48:43,651][04340] Saving new best policy, reward=-69.206!
188
+ [2022-12-04 20:48:48,177][04360] Updated weights for policy 0, policy_version 400 (0.0006)
189
+ [2022-12-04 20:48:48,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4551.1, 300 sec: 4551.1). Total num frames: 204800. Throughput: 0: 4290.7. Samples: 193080. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
190
+ [2022-12-04 20:48:48,651][04266] Avg episode reward: [(0, '-52.726')]
191
+ [2022-12-04 20:48:48,651][04340] Saving new best policy, reward=-52.726!
192
+ [2022-12-04 20:48:53,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4669.5, 300 sec: 4669.5). Total num frames: 233472. Throughput: 0: 5054.2. Samples: 227440. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
193
+ [2022-12-04 20:48:53,650][04266] Avg episode reward: [(0, '-33.694')]
194
+ [2022-12-04 20:48:53,657][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000456_233472.pth...
195
+ [2022-12-04 20:48:53,664][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000128_65536.pth
196
+ [2022-12-04 20:48:53,664][04340] Saving new best policy, reward=-33.694!
197
+ [2022-12-04 20:48:55,518][04360] Updated weights for policy 0, policy_version 480 (0.0006)
198
+ [2022-12-04 20:48:58,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4766.3, 300 sec: 4766.3). Total num frames: 262144. Throughput: 0: 5586.5. Samples: 259856. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
199
+ [2022-12-04 20:48:58,651][04266] Avg episode reward: [(0, '-45.611')]
200
+ [2022-12-04 20:49:03,653][04266] Fps is (10 sec: 4913.5, 60 sec: 4710.1, 300 sec: 4710.1). Total num frames: 282624. Throughput: 0: 5596.9. Samples: 277352. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
201
+ [2022-12-04 20:49:03,654][04266] Avg episode reward: [(0, '-29.953')]
202
+ [2022-12-04 20:49:03,655][04340] Saving new best policy, reward=-29.953!
203
+ [2022-12-04 20:49:04,937][04360] Updated weights for policy 0, policy_version 560 (0.0008)
204
+ [2022-12-04 20:49:08,650][04266] Fps is (10 sec: 4096.0, 60 sec: 5051.7, 300 sec: 4663.1). Total num frames: 303104. Throughput: 0: 5336.0. Samples: 298720. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
205
+ [2022-12-04 20:49:08,650][04266] Avg episode reward: [(0, '-29.014')]
206
+ [2022-12-04 20:49:08,678][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000600_307200.pth...
207
+ [2022-12-04 20:49:08,686][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000296_151552.pth
208
+ [2022-12-04 20:49:08,686][04340] Saving new best policy, reward=-29.014!
209
+ [2022-12-04 20:49:12,321][04360] Updated weights for policy 0, policy_version 640 (0.0007)
210
+ [2022-12-04 20:49:13,650][04266] Fps is (10 sec: 4916.9, 60 sec: 5393.1, 300 sec: 4739.7). Total num frames: 331776. Throughput: 0: 5326.1. Samples: 332308. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
211
+ [2022-12-04 20:49:13,650][04266] Avg episode reward: [(0, '-0.035')]
212
+ [2022-12-04 20:49:13,651][04340] Saving new best policy, reward=-0.035!
213
+ [2022-12-04 20:49:18,650][04266] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 4806.0). Total num frames: 360448. Throughput: 0: 5338.0. Samples: 349452. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
214
+ [2022-12-04 20:49:18,650][04266] Avg episode reward: [(0, '26.827')]
215
+ [2022-12-04 20:49:18,651][04340] Saving new best policy, reward=26.827!
216
+ [2022-12-04 20:49:19,490][04360] Updated weights for policy 0, policy_version 720 (0.0006)
217
+ [2022-12-04 20:49:23,650][04266] Fps is (10 sec: 5734.3, 60 sec: 5393.1, 300 sec: 4864.0). Total num frames: 389120. Throughput: 0: 5356.0. Samples: 384288. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
218
+ [2022-12-04 20:49:23,651][04266] Avg episode reward: [(0, '75.358')]
219
+ [2022-12-04 20:49:23,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000760_389120.pth...
220
+ [2022-12-04 20:49:23,665][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000456_233472.pth
221
+ [2022-12-04 20:49:23,665][04340] Saving new best policy, reward=75.358!
222
+ [2022-12-04 20:49:26,586][04360] Updated weights for policy 0, policy_version 800 (0.0006)
223
+ [2022-12-04 20:49:28,650][04266] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 4915.2). Total num frames: 417792. Throughput: 0: 5375.7. Samples: 418512. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
224
+ [2022-12-04 20:49:28,650][04266] Avg episode reward: [(0, '153.991')]
225
+ [2022-12-04 20:49:28,651][04340] Saving new best policy, reward=153.991!
226
+ [2022-12-04 20:49:33,650][04266] Fps is (10 sec: 5734.5, 60 sec: 5393.1, 300 sec: 4960.7). Total num frames: 446464. Throughput: 0: 5396.6. Samples: 435928. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
227
+ [2022-12-04 20:49:33,650][04266] Avg episode reward: [(0, '231.230')]
228
+ [2022-12-04 20:49:33,671][04340] Saving new best policy, reward=231.230!
229
+ [2022-12-04 20:49:33,672][04360] Updated weights for policy 0, policy_version 880 (0.0006)
230
+ [2022-12-04 20:49:38,650][04266] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5001.4). Total num frames: 475136. Throughput: 0: 5398.1. Samples: 470356. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
231
+ [2022-12-04 20:49:38,651][04266] Avg episode reward: [(0, '321.313')]
232
+ [2022-12-04 20:49:38,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000928_475136.pth...
233
+ [2022-12-04 20:49:38,664][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000600_307200.pth
234
+ [2022-12-04 20:49:38,665][04340] Saving new best policy, reward=321.313!
235
+ [2022-12-04 20:49:40,419][04266] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 4266], exiting...
236
+ [2022-12-04 20:49:40,420][04266] Runner profile tree view:
237
+ main_loop: 103.9009
238
+ [2022-12-04 20:49:40,421][04266] Collected {0: 487424}, FPS: 4691.2
239
+ [2022-12-04 20:49:40,421][04340] Stopping Batcher_0...
240
+ [2022-12-04 20:49:40,421][04340] Loop batcher_evt_loop terminating...
241
+ [2022-12-04 20:49:40,421][04365] Stopping RolloutWorker_w3...
242
+ [2022-12-04 20:49:40,422][04365] Loop rollout_proc3_evt_loop terminating...
243
+ [2022-12-04 20:49:40,422][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000952_487424.pth...
244
+ [2022-12-04 20:49:40,424][04366] Stopping RolloutWorker_w5...
245
+ [2022-12-04 20:49:40,424][04366] Loop rollout_proc5_evt_loop terminating...
246
+ [2022-12-04 20:49:40,425][04361] Stopping RolloutWorker_w0...
247
+ [2022-12-04 20:49:40,425][04362] Stopping RolloutWorker_w1...
248
+ [2022-12-04 20:49:40,426][04363] Stopping RolloutWorker_w6...
249
+ [2022-12-04 20:49:40,426][04361] Loop rollout_proc0_evt_loop terminating...
250
+ [2022-12-04 20:49:40,426][04362] Loop rollout_proc1_evt_loop terminating...
251
+ [2022-12-04 20:49:40,426][04368] Stopping RolloutWorker_w2...
252
+ [2022-12-04 20:49:40,426][04363] Loop rollout_proc6_evt_loop terminating...
253
+ [2022-12-04 20:49:40,426][04368] Loop rollout_proc2_evt_loop terminating...
254
+ [2022-12-04 20:49:40,429][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000760_389120.pth
255
+ [2022-12-04 20:49:40,429][04340] Stopping LearnerWorker_p0...
256
+ [2022-12-04 20:49:40,430][04340] Loop learner_proc0_evt_loop terminating...
257
+ [2022-12-04 20:49:40,436][04360] Weights refcount: 2 0
258
+ [2022-12-04 20:49:40,437][04360] Stopping InferenceWorker_p0-w0...
259
+ [2022-12-04 20:49:40,438][04360] Loop inference_proc0-0_evt_loop terminating...
260
+ [2022-12-04 20:49:40,474][04364] Stopping RolloutWorker_w7...
261
+ [2022-12-04 20:49:40,475][04364] Loop rollout_proc7_evt_loop terminating...
262
+ [2022-12-04 20:49:40,498][04367] Stopping RolloutWorker_w4...
263
+ [2022-12-04 20:49:40,521][04367] Loop rollout_proc4_evt_loop terminating...