andrewzhang505 commited on
Commit
4cffbfd
1 Parent(s): 303088e

Upload . with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1673417676.andrew-gpu ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f5bcafd032d21d96cc07da746dd765a91f8560d22fcfe38b7f88fe133805a25
3
+ size 61881
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ ---
8
+
9
+ A(n) **APPO** model trained on the **mujoco_ant** environment.
10
+
11
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
12
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
13
+
14
+
15
+ ## Downloading the model
16
+
17
+ After installing Sample-Factory, download the model with:
18
+ ```
19
+ python -m sample_factory.huggingface.load_from_hub -r andrewzhang505/ant_test4
20
+ ```
21
+
22
+
23
+ ## Using the model
24
+
25
+ To run the model after download, use the `enjoy` script corresponding to this environment:
26
+ ```
27
+ python -m <path.to.enjoy.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4
28
+ ```
29
+
30
+
31
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
32
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
33
+
34
+ ## Training with this model
35
+
36
+ To continue training with this model, use the `train` script corresponding to this environment:
37
+ ```
38
+ python -m <path.to.train.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4 --restart_behavior=resume --train_for_env_steps=10000000000
39
+ ```
40
+
41
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
42
+
checkpoint_p0/best_000000760_389120_reward_42.463.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a81f537790ed40ff09681497ef77143cc8d01ccc205c906aba34cb26b5de07d
3
+ size 89730
checkpoint_p0/checkpoint_000000704_360448.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73dc6a475005ac33458fe996f3e419e919e6bd0796db60dc206c5fe6796b842c
3
+ size 89730
checkpoint_p0/checkpoint_000000792_405504.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d04c7d06b796cecc234541c6283270915c8bae02161032240b1ca8da7b346b8
3
+ size 89730
config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "mujoco_ant",
5
+ "experiment": "ant_test",
6
+ "train_dir": "/home/andrew_huggingface_co/sample-factory/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": false,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 8,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 4,
22
+ "num_epochs": 2,
23
+ "rollout": 64,
24
+ "recurrence": 1,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": true,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 1.3,
33
+ "kl_loss_coeff": 0.1,
34
+ "exploration_loss": "entropy",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
+ "ppo_clip_value": 1.0,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 3.5,
46
+ "learning_rate": 0.00295,
47
+ "lr_schedule": "linear_decay",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "obs_subtract_mean": 0.0,
50
+ "obs_scale": 1.0,
51
+ "normalize_input": true,
52
+ "normalize_input_keys": null,
53
+ "decorrelate_experience_max_seconds": 0,
54
+ "decorrelate_envs_on_one_worker": true,
55
+ "actor_worker_gpus": [],
56
+ "set_workers_cpu_affinity": true,
57
+ "force_envs_single_thread": false,
58
+ "default_niceness": 0,
59
+ "log_to_file": true,
60
+ "experiment_summaries_interval": 3,
61
+ "flush_summaries_interval": 30,
62
+ "stats_avg": 100,
63
+ "summaries_use_frameskip": true,
64
+ "heartbeat_interval": 20,
65
+ "heartbeat_reporting_interval": 180,
66
+ "train_for_env_steps": 10000000,
67
+ "train_for_seconds": 10000000000,
68
+ "save_every_sec": 15,
69
+ "keep_checkpoints": 2,
70
+ "load_checkpoint_kind": "latest",
71
+ "save_milestones_sec": -1,
72
+ "save_best_every_sec": 5,
73
+ "save_best_metric": "reward",
74
+ "save_best_after": 100000,
75
+ "benchmark": false,
76
+ "encoder_mlp_layers": [
77
+ 64,
78
+ 64
79
+ ],
80
+ "encoder_conv_architecture": "convnet_simple",
81
+ "encoder_conv_mlp_layers": [
82
+ 512
83
+ ],
84
+ "use_rnn": false,
85
+ "rnn_size": 512,
86
+ "rnn_type": "gru",
87
+ "rnn_num_layers": 1,
88
+ "decoder_mlp_layers": [],
89
+ "nonlinearity": "tanh",
90
+ "policy_initialization": "torch_default",
91
+ "policy_init_gain": 1.0,
92
+ "actor_critic_share_weights": true,
93
+ "adaptive_stddev": false,
94
+ "continuous_tanh_scale": 0.0,
95
+ "initial_stddev": 1.0,
96
+ "use_env_info_cache": false,
97
+ "env_gpu_actions": false,
98
+ "env_gpu_observations": true,
99
+ "env_frameskip": 1,
100
+ "env_framestack": 1,
101
+ "pixel_format": "CHW",
102
+ "use_record_episode_statistics": false,
103
+ "with_wandb": false,
104
+ "wandb_user": null,
105
+ "wandb_project": "sample_factory",
106
+ "wandb_group": null,
107
+ "wandb_job_type": "SF",
108
+ "wandb_tags": [],
109
+ "with_pbt": false,
110
+ "pbt_mix_policies_in_one_env": true,
111
+ "pbt_period_env_steps": 5000000,
112
+ "pbt_start_mutation": 20000000,
113
+ "pbt_replace_fraction": 0.3,
114
+ "pbt_mutation_rate": 0.15,
115
+ "pbt_replace_reward_gap": 0.1,
116
+ "pbt_replace_reward_gap_absolute": 1e-06,
117
+ "pbt_optimize_gamma": false,
118
+ "pbt_target_objective": "true_objective",
119
+ "pbt_perturb_min": 1.1,
120
+ "pbt_perturb_max": 1.5,
121
+ "command_line": "--algo=APPO --env=mujoco_ant --experiment=ant_test",
122
+ "cli_args": {
123
+ "algo": "APPO",
124
+ "env": "mujoco_ant",
125
+ "experiment": "ant_test"
126
+ },
127
+ "git_hash": "7355a9d939997e11f02d311f71e4ca3be2e9258f",
128
+ "git_repo_name": "https://github.com/andrewzhang505/sample-factory.git"
129
+ }
git.diff ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/docs/10-huggingface/huggingface.md b/docs/10-huggingface/huggingface.md
2
+ index 8846da73..1f1fae6f 100644
3
+ --- a/docs/10-huggingface/huggingface.md
4
+ +++ b/docs/10-huggingface/huggingface.md
5
+ @@ -77,10 +77,16 @@ You can also save a video of the model during evaluation to upload to the hub wi
6
+
7
+ - `--video_name`: The name of the video to save as. If `None`, will save to `replay.mp4` in your experiment directory
8
+
9
+ +Also, you can include information in the Hugging Face Hub model card for how to train and enjoy using this model. These parameters are optional:
10
+ +
11
+ +- `--train_script`: The module path for training this model
12
+ +
13
+ +- `--enjoy_script`: The module path for enjoying this model
14
+ +
15
+ For example:
16
+
17
+ ```
18
+ -python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render
19
+ +python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render --enjoy_script=sf_examples.mujoco.enjoy_mujoco --train_script=sf_examples.mujoco.train_mujoco
20
+ ```
21
+
22
+ #### Using the push_to_hub Script
23
+ @@ -95,4 +101,6 @@ The command line arguments are:
24
+
25
+ - `-r`: The repo_id to save on HF Hub. This is the same as `hf_repository` in the enjoy script and must be in the form `<hf_username>/<hf_repo_name>`
26
+
27
+ -- `-d`: The full path to your experiment directory to upload
28
+
29
+ +- `-d`: The full path to your experiment directory to upload
30
+ +
31
+ +The optional arguments of `--train_script` and `--enjoy_script` can also be used. See the above section for more details
32
+
33
+ diff --git a/sample_factory/cfg/arguments.py b/sample_factory/cfg/arguments.py
34
+ index 820efce6..f736342d 100644
35
+ --- a/sample_factory/cfg/arguments.py
36
+ +++ b/sample_factory/cfg/arguments.py
37
+ @@ -18,7 +18,7 @@ from sample_factory.cfg.cfg import (
38
+ )
39
+ from sample_factory.utils.attr_dict import AttrDict
40
+ from sample_factory.utils.typing import Config
41
+ -from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, get_top_level_script, log
42
+ +from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, log
43
+
44
+
45
+ def parse_sf_args(
46
+ @@ -91,7 +91,6 @@ def postprocess_args(args, argv, parser) -> argparse.Namespace:
47
+
48
+ args.cli_args = vars(cli_args)
49
+ args.git_hash, args.git_repo_name = get_git_commit_hash()
50
+ - args.train_script = get_top_level_script()
51
+ return args
52
+
53
+
54
+ diff --git a/sample_factory/cfg/cfg.py b/sample_factory/cfg/cfg.py
55
+ index 43393da1..360e6895 100644
56
+ --- a/sample_factory/cfg/cfg.py
57
+ +++ b/sample_factory/cfg/cfg.py
58
+ @@ -675,6 +675,19 @@ def add_eval_args(parser):
59
+ help="False to sample from action distributions at test time. True to just use the argmax.",
60
+ )
61
+
62
+ + parser.add_argument(
63
+ + "--train_script",
64
+ + default=None,
65
+ + type=str,
66
+ + help="Module name used to run training script. Used to generate HF model card",
67
+ + )
68
+ + parser.add_argument(
69
+ + "--enjoy_script",
70
+ + default=None,
71
+ + type=str,
72
+ + help="Module name used to run training script. Used to generate HF model card",
73
+ + )
74
+ +
75
+
76
+ def add_wandb_args(p: ArgumentParser):
77
+ """Weights and Biases experiment monitoring."""
78
+ diff --git a/sample_factory/enjoy.py b/sample_factory/enjoy.py
79
+ index 341b537b..b620c532 100644
80
+ --- a/sample_factory/enjoy.py
81
+ +++ b/sample_factory/enjoy.py
82
+ @@ -21,7 +21,7 @@ from sample_factory.model.actor_critic import create_actor_critic
83
+ from sample_factory.model.model_utils import get_rnn_size
84
+ from sample_factory.utils.attr_dict import AttrDict
85
+ from sample_factory.utils.typing import Config, StatusCode
86
+ -from sample_factory.utils.utils import debug_log_every_n, experiment_dir, get_top_level_script, log
87
+ +from sample_factory.utils.utils import debug_log_every_n, experiment_dir, log
88
+
89
+
90
+ def visualize_policy_inputs(normalized_obs: Dict[str, Tensor]) -> None:
91
+ @@ -260,9 +260,8 @@ def enjoy(cfg: Config) -> Tuple[StatusCode, float]:
92
+ generate_replay_video(experiment_dir(cfg=cfg), video_frames, fps)
93
+
94
+ if cfg.push_to_hub:
95
+ - enjoy_name = get_top_level_script()
96
+ generate_model_card(
97
+ - experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, enjoy_name, cfg.train_script
98
+ + experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, cfg.enjoy_script, cfg.train_script
99
+ )
100
+ push_to_hf(experiment_dir(cfg=cfg), cfg.hf_repository)
101
+
102
+ diff --git a/sample_factory/huggingface/huggingface_utils.py b/sample_factory/huggingface/huggingface_utils.py
103
+ index 90184da7..5b4a6b14 100644
104
+ --- a/sample_factory/huggingface/huggingface_utils.py
105
+ +++ b/sample_factory/huggingface/huggingface_utils.py
106
+ @@ -57,8 +57,10 @@ python -m sample_factory.huggingface.load_from_hub -r {repo_id}
107
+ ```\n
108
+ """
109
+
110
+ - if enjoy_name is not None:
111
+ - readme += f"""
112
+ + if enjoy_name is None:
113
+ + enjoy_name = "<path.to.enjoy.module>"
114
+ +
115
+ + readme += f"""
116
+ ## Using the model\n
117
+ To run the model after download, use the `enjoy` script corresponding to this environment:
118
+ ```
119
+ @@ -67,17 +69,19 @@ python -m {enjoy_name} --algo={algo} --env={env} --train_dir=./train_dir --exper
120
+ \n
121
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
122
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
123
+ - """
124
+ + """
125
+
126
+ - if train_name is not None:
127
+ - readme += f"""
128
+ + if train_name is None:
129
+ + train_name = "<path.to.train.module>"
130
+ +
131
+ + readme += f"""
132
+ ## Training with this model\n
133
+ To continue training with this model, use the `train` script corresponding to this environment:
134
+ ```
135
+ python -m {train_name} --algo={algo} --env={env} --train_dir=./train_dir --experiment={repo_name} --restart_behavior=resume --train_for_env_steps=10000000000
136
+ ```\n
137
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
138
+ - """
139
+ + """
140
+
141
+ with open(readme_path, "w", encoding="utf-8") as f:
142
+ f.write(readme)
143
+ diff --git a/sample_factory/huggingface/push_to_hub.py b/sample_factory/huggingface/push_to_hub.py
144
+ index dbd5c382..d67806ad 100644
145
+ --- a/sample_factory/huggingface/push_to_hub.py
146
+ +++ b/sample_factory/huggingface/push_to_hub.py
147
+ @@ -16,6 +16,18 @@ def main():
148
+ type=str,
149
+ )
150
+ parser.add_argument("-d", "--experiment_dir", help="Path to your experiment directory", type=str)
151
+ + parser.add_argument(
152
+ + "--train_script",
153
+ + default=None,
154
+ + type=str,
155
+ + help="Module name used to run training script. Used to generate HF model card",
156
+ + )
157
+ + parser.add_argument(
158
+ + "--enjoy_script",
159
+ + default=None,
160
+ + type=str,
161
+ + help="Module name used to run training script. Used to generate HF model card",
162
+ + )
163
+ args = parser.parse_args()
164
+
165
+ cfg_file = os.path.join(args.experiment_dir, "config.json")
166
+ @@ -34,7 +46,7 @@ def main():
167
+ json_params = json.load(json_file)
168
+ cfg = AttrDict(json_params)
169
+
170
+ - generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository)
171
+ + generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository, enjoy_name=args.enjoy_script, train_name=args.train_script)
172
+ push_to_hf(args.experiment_dir, args.hf_repository)
173
+
174
+
175
+ diff --git a/sample_factory/utils/utils.py b/sample_factory/utils/utils.py
176
+ index 99db3c10..fcd335c5 100644
177
+ --- a/sample_factory/utils/utils.py
178
+ +++ b/sample_factory/utils/utils.py
179
+ @@ -493,5 +493,5 @@ def debug_log_every_n(n, msg, *args, **kwargs):
180
+ log_every_n(n, logging.DEBUG, msg, *args, **kwargs)
181
+
182
+
183
+ -def get_top_level_script():
184
+ - return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")
185
+ +# def get_top_level_script():
186
+ +# return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")
sf_log.txt ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-01-11 06:14:39,279][18976] Saving configuration to /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/config.json...
2
+ [2023-01-11 06:14:39,292][18976] Rollout worker 0 uses device cpu
3
+ [2023-01-11 06:14:39,292][18976] Rollout worker 1 uses device cpu
4
+ [2023-01-11 06:14:39,293][18976] Rollout worker 2 uses device cpu
5
+ [2023-01-11 06:14:39,293][18976] Rollout worker 3 uses device cpu
6
+ [2023-01-11 06:14:39,293][18976] Rollout worker 4 uses device cpu
7
+ [2023-01-11 06:14:39,293][18976] Rollout worker 5 uses device cpu
8
+ [2023-01-11 06:14:39,293][18976] Rollout worker 6 uses device cpu
9
+ [2023-01-11 06:14:39,293][18976] Rollout worker 7 uses device cpu
10
+ [2023-01-11 06:14:39,293][18976] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
11
+ [2023-01-11 06:14:39,316][18976] Using GPUs [0] for process 0 (actually maps to GPUs [0])
12
+ [2023-01-11 06:14:39,316][18976] InferenceWorker_p0-w0: min num requests: 2
13
+ [2023-01-11 06:14:39,350][18976] Starting all processes...
14
+ [2023-01-11 06:14:39,350][18976] Starting process learner_proc0
15
+ [2023-01-11 06:14:39,356][18976] Starting all processes...
16
+ [2023-01-11 06:14:39,362][18976] Starting process inference_proc0-0
17
+ [2023-01-11 06:14:39,362][18976] Starting process rollout_proc0
18
+ [2023-01-11 06:14:39,363][18976] Starting process rollout_proc1
19
+ [2023-01-11 06:14:39,363][18976] Starting process rollout_proc2
20
+ [2023-01-11 06:14:39,363][18976] Starting process rollout_proc3
21
+ [2023-01-11 06:14:39,363][18976] Starting process rollout_proc4
22
+ [2023-01-11 06:14:39,364][18976] Starting process rollout_proc5
23
+ [2023-01-11 06:14:39,364][18976] Starting process rollout_proc6
24
+ [2023-01-11 06:14:39,364][18976] Starting process rollout_proc7
25
+ [2023-01-11 06:14:41,133][19078] Worker 3 uses CPU cores [3]
26
+ [2023-01-11 06:14:41,149][19080] Worker 4 uses CPU cores [4]
27
+ [2023-01-11 06:14:41,173][19081] Worker 6 uses CPU cores [6]
28
+ [2023-01-11 06:14:41,236][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
29
+ [2023-01-11 06:14:41,236][19062] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
30
+ [2023-01-11 06:14:41,237][19079] Worker 2 uses CPU cores [2]
31
+ [2023-01-11 06:14:41,419][19077] Worker 0 uses CPU cores [0]
32
+ [2023-01-11 06:14:41,431][19076] Worker 1 uses CPU cores [1]
33
+ [2023-01-11 06:14:41,458][19075] Using GPUs [0] for process 0 (actually maps to GPUs [0])
34
+ [2023-01-11 06:14:41,458][19075] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
35
+ [2023-01-11 06:14:41,495][19082] Worker 5 uses CPU cores [5]
36
+ [2023-01-11 06:14:41,525][19083] Worker 7 uses CPU cores [7]
37
+ [2023-01-11 06:14:41,971][19062] Num visible devices: 1
38
+ [2023-01-11 06:14:41,971][19075] Num visible devices: 1
39
+ [2023-01-11 06:14:42,022][19062] Starting seed is not provided
40
+ [2023-01-11 06:14:42,022][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
41
+ [2023-01-11 06:14:42,022][19062] Initializing actor-critic model on device cuda:0
42
+ [2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (27,)
43
+ [2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (1,)
44
+ [2023-01-11 06:14:42,127][19062] Created Actor Critic model with architecture:
45
+ [2023-01-11 06:14:42,128][19062] ActorCriticSharedWeights(
46
+ (obs_normalizer): ObservationNormalizer(
47
+ (running_mean_std): RunningMeanStdDictInPlace(
48
+ (running_mean_std): ModuleDict(
49
+ (obs): RunningMeanStdInPlace()
50
+ )
51
+ )
52
+ )
53
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
54
+ (encoder): MultiInputEncoder(
55
+ (encoders): ModuleDict(
56
+ (obs): MlpEncoder(
57
+ (mlp_head): RecursiveScriptModule(
58
+ original_name=Sequential
59
+ (0): RecursiveScriptModule(original_name=Linear)
60
+ (1): RecursiveScriptModule(original_name=Tanh)
61
+ (2): RecursiveScriptModule(original_name=Linear)
62
+ (3): RecursiveScriptModule(original_name=Tanh)
63
+ )
64
+ )
65
+ )
66
+ )
67
+ (core): ModelCoreIdentity()
68
+ (decoder): MlpDecoder(
69
+ (mlp): Identity()
70
+ )
71
+ (critic_linear): Linear(in_features=64, out_features=1, bias=True)
72
+ (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
73
+ (distribution_linear): Linear(in_features=64, out_features=8, bias=True)
74
+ )
75
+ )
76
+ [2023-01-11 06:14:47,316][19062] Using optimizer <class 'torch.optim.adam.Adam'>
77
+ [2023-01-11 06:14:47,317][19062] No checkpoints found
78
+ [2023-01-11 06:14:47,317][19062] Did not load from checkpoint, starting from scratch!
79
+ [2023-01-11 06:14:47,318][19062] Initialized policy 0 weights for model version 0
80
+ [2023-01-11 06:14:47,321][19062] LearnerWorker_p0 finished initialization!
81
+ [2023-01-11 06:14:47,322][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
82
+ [2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (27,)
83
+ [2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (1,)
84
+ [2023-01-11 06:14:50,677][18976] Inference worker 0-0 is ready!
85
+ [2023-01-11 06:14:50,678][18976] All inference workers are ready! Signal rollout workers to start!
86
+ [2023-01-11 06:14:50,880][19081] Decorrelating experience for 0 frames...
87
+ [2023-01-11 06:14:50,881][19076] Decorrelating experience for 0 frames...
88
+ [2023-01-11 06:14:50,882][19081] Decorrelating experience for 64 frames...
89
+ [2023-01-11 06:14:50,883][19076] Decorrelating experience for 64 frames...
90
+ [2023-01-11 06:14:50,883][19077] Decorrelating experience for 0 frames...
91
+ [2023-01-11 06:14:50,884][19080] Decorrelating experience for 0 frames...
92
+ [2023-01-11 06:14:50,884][19079] Decorrelating experience for 0 frames...
93
+ [2023-01-11 06:14:50,885][19082] Decorrelating experience for 0 frames...
94
+ [2023-01-11 06:14:50,885][19078] Decorrelating experience for 0 frames...
95
+ [2023-01-11 06:14:50,885][19083] Decorrelating experience for 0 frames...
96
+ [2023-01-11 06:14:50,885][19077] Decorrelating experience for 64 frames...
97
+ [2023-01-11 06:14:50,886][19080] Decorrelating experience for 64 frames...
98
+ [2023-01-11 06:14:50,886][19079] Decorrelating experience for 64 frames...
99
+ [2023-01-11 06:14:50,887][19082] Decorrelating experience for 64 frames...
100
+ [2023-01-11 06:14:50,887][19078] Decorrelating experience for 64 frames...
101
+ [2023-01-11 06:14:50,887][19083] Decorrelating experience for 64 frames...
102
+ [2023-01-11 06:14:50,938][19076] Decorrelating experience for 128 frames...
103
+ [2023-01-11 06:14:50,939][19081] Decorrelating experience for 128 frames...
104
+ [2023-01-11 06:14:50,940][19077] Decorrelating experience for 128 frames...
105
+ [2023-01-11 06:14:50,941][19080] Decorrelating experience for 128 frames...
106
+ [2023-01-11 06:14:50,943][19083] Decorrelating experience for 128 frames...
107
+ [2023-01-11 06:14:50,943][19078] Decorrelating experience for 128 frames...
108
+ [2023-01-11 06:14:50,943][19079] Decorrelating experience for 128 frames...
109
+ [2023-01-11 06:14:50,944][19082] Decorrelating experience for 128 frames...
110
+ [2023-01-11 06:14:51,044][19076] Decorrelating experience for 192 frames...
111
+ [2023-01-11 06:14:51,047][19081] Decorrelating experience for 192 frames...
112
+ [2023-01-11 06:14:51,047][19077] Decorrelating experience for 192 frames...
113
+ [2023-01-11 06:14:51,050][19083] Decorrelating experience for 192 frames...
114
+ [2023-01-11 06:14:51,050][19080] Decorrelating experience for 192 frames...
115
+ [2023-01-11 06:14:51,051][19078] Decorrelating experience for 192 frames...
116
+ [2023-01-11 06:14:51,051][19082] Decorrelating experience for 192 frames...
117
+ [2023-01-11 06:14:51,054][19079] Decorrelating experience for 192 frames...
118
+ [2023-01-11 06:14:51,235][19076] Decorrelating experience for 256 frames...
119
+ [2023-01-11 06:14:51,238][19083] Decorrelating experience for 256 frames...
120
+ [2023-01-11 06:14:51,239][19077] Decorrelating experience for 256 frames...
121
+ [2023-01-11 06:14:51,242][19078] Decorrelating experience for 256 frames...
122
+ [2023-01-11 06:14:51,243][19081] Decorrelating experience for 256 frames...
123
+ [2023-01-11 06:14:51,247][19080] Decorrelating experience for 256 frames...
124
+ [2023-01-11 06:14:51,248][19082] Decorrelating experience for 256 frames...
125
+ [2023-01-11 06:14:51,251][19079] Decorrelating experience for 256 frames...
126
+ [2023-01-11 06:14:51,333][18976] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
127
+ [2023-01-11 06:14:51,335][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth...
128
+ [2023-01-11 06:14:51,445][19083] Decorrelating experience for 320 frames...
129
+ [2023-01-11 06:14:51,447][19077] Decorrelating experience for 320 frames...
130
+ [2023-01-11 06:14:51,450][19076] Decorrelating experience for 320 frames...
131
+ [2023-01-11 06:14:51,452][19078] Decorrelating experience for 320 frames...
132
+ [2023-01-11 06:14:51,453][19081] Decorrelating experience for 320 frames...
133
+ [2023-01-11 06:14:51,457][19082] Decorrelating experience for 320 frames...
134
+ [2023-01-11 06:14:51,458][19080] Decorrelating experience for 320 frames...
135
+ [2023-01-11 06:14:51,465][19079] Decorrelating experience for 320 frames...
136
+ [2023-01-11 06:14:51,706][19083] Decorrelating experience for 384 frames...
137
+ [2023-01-11 06:14:51,712][19076] Decorrelating experience for 384 frames...
138
+ [2023-01-11 06:14:51,716][19081] Decorrelating experience for 384 frames...
139
+ [2023-01-11 06:14:51,717][19078] Decorrelating experience for 384 frames...
140
+ [2023-01-11 06:14:51,717][19077] Decorrelating experience for 384 frames...
141
+ [2023-01-11 06:14:51,723][19082] Decorrelating experience for 384 frames...
142
+ [2023-01-11 06:14:51,724][19080] Decorrelating experience for 384 frames...
143
+ [2023-01-11 06:14:51,737][19079] Decorrelating experience for 384 frames...
144
+ [2023-01-11 06:14:52,017][19083] Decorrelating experience for 448 frames...
145
+ [2023-01-11 06:14:52,029][19076] Decorrelating experience for 448 frames...
146
+ [2023-01-11 06:14:52,036][19077] Decorrelating experience for 448 frames...
147
+ [2023-01-11 06:14:52,037][19082] Decorrelating experience for 448 frames...
148
+ [2023-01-11 06:14:52,038][19081] Decorrelating experience for 448 frames...
149
+ [2023-01-11 06:14:52,041][19078] Decorrelating experience for 448 frames...
150
+ [2023-01-11 06:14:52,053][19080] Decorrelating experience for 448 frames...
151
+ [2023-01-11 06:14:52,064][19079] Decorrelating experience for 448 frames...
152
+ [2023-01-11 06:14:56,333][18976] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 4096. Throughput: 0: 177.6. Samples: 888. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
153
+ [2023-01-11 06:14:56,333][18976] Avg episode reward: [(0, '-126.473')]
154
+ [2023-01-11 06:14:59,307][18976] Heartbeat connected on Batcher_0
155
+ [2023-01-11 06:14:59,311][18976] Heartbeat connected on LearnerWorker_p0
156
+ [2023-01-11 06:14:59,322][18976] Heartbeat connected on RolloutWorker_w0
157
+ [2023-01-11 06:14:59,326][18976] Heartbeat connected on InferenceWorker_p0-w0
158
+ [2023-01-11 06:14:59,331][18976] Heartbeat connected on RolloutWorker_w1
159
+ [2023-01-11 06:14:59,335][18976] Heartbeat connected on RolloutWorker_w2
160
+ [2023-01-11 06:14:59,340][18976] Heartbeat connected on RolloutWorker_w3
161
+ [2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w5
162
+ [2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w4
163
+ [2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w6
164
+ [2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w7
165
+ [2023-01-11 06:15:01,333][18976] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 32768. Throughput: 0: 2408.4. Samples: 24084. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
166
+ [2023-01-11 06:15:01,333][18976] Avg episode reward: [(0, '-130.992')]
167
+ [2023-01-11 06:15:03,917][19075] Updated weights for policy 0, policy_version 80 (0.0008)
168
+ [2023-01-11 06:15:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 3003.7). Total num frames: 45056. Throughput: 0: 3008.0. Samples: 45120. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
169
+ [2023-01-11 06:15:06,334][18976] Avg episode reward: [(0, '-227.535')]
170
+ [2023-01-11 06:15:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth...
171
+ [2023-01-11 06:15:11,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3686.4). Total num frames: 73728. Throughput: 0: 3020.8. Samples: 60416. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
172
+ [2023-01-11 06:15:11,333][18976] Avg episode reward: [(0, '-297.337')]
173
+ [2023-01-11 06:15:12,628][19075] Updated weights for policy 0, policy_version 160 (0.0007)
174
+ [2023-01-11 06:15:16,332][18976] Fps is (10 sec: 5324.9, 60 sec: 3932.2, 300 sec: 3932.2). Total num frames: 98304. Throughput: 0: 3737.8. Samples: 93444. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
175
+ [2023-01-11 06:15:16,333][18976] Avg episode reward: [(0, '-127.822')]
176
+ [2023-01-11 06:15:20,209][19075] Updated weights for policy 0, policy_version 240 (0.0007)
177
+ [2023-01-11 06:15:21,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4232.5, 300 sec: 4232.5). Total num frames: 126976. Throughput: 0: 4195.5. Samples: 125864. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
178
+ [2023-01-11 06:15:21,333][18976] Avg episode reward: [(0, '-105.405')]
179
+ [2023-01-11 06:15:21,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth...
180
+ [2023-01-11 06:15:21,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth
181
+ [2023-01-11 06:15:21,348][19062] Saving new best policy, reward=-105.405!
182
+ [2023-01-11 06:15:26,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4447.1, 300 sec: 4447.1). Total num frames: 155648. Throughput: 0: 4065.3. Samples: 142284. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
183
+ [2023-01-11 06:15:26,333][18976] Avg episode reward: [(0, '-109.426')]
184
+ [2023-01-11 06:15:27,645][19075] Updated weights for policy 0, policy_version 320 (0.0007)
185
+ [2023-01-11 06:15:31,332][18976] Fps is (10 sec: 5324.9, 60 sec: 4505.6, 300 sec: 4505.6). Total num frames: 180224. Throughput: 0: 4384.3. Samples: 175372. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
186
+ [2023-01-11 06:15:31,333][18976] Avg episode reward: [(0, '-72.518')]
187
+ [2023-01-11 06:15:31,333][19062] Saving new best policy, reward=-72.518!
188
+ [2023-01-11 06:15:35,206][19075] Updated weights for policy 0, policy_version 400 (0.0007)
189
+ [2023-01-11 06:15:36,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4642.1, 300 sec: 4642.1). Total num frames: 208896. Throughput: 0: 4618.4. Samples: 207828. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
190
+ [2023-01-11 06:15:36,333][18976] Avg episode reward: [(0, '-35.778')]
191
+ [2023-01-11 06:15:36,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth...
192
+ [2023-01-11 06:15:36,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth
193
+ [2023-01-11 06:15:36,347][19062] Saving new best policy, reward=-35.778!
194
+ [2023-01-11 06:15:41,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4751.4, 300 sec: 4751.4). Total num frames: 237568. Throughput: 0: 4961.7. Samples: 224164. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
195
+ [2023-01-11 06:15:41,333][18976] Avg episode reward: [(0, '-30.077')]
196
+ [2023-01-11 06:15:41,334][19062] Saving new best policy, reward=-30.077!
197
+ [2023-01-11 06:15:42,735][19075] Updated weights for policy 0, policy_version 480 (0.0007)
198
+ [2023-01-11 06:15:46,333][18976] Fps is (10 sec: 5324.9, 60 sec: 4766.3, 300 sec: 4766.3). Total num frames: 262144. Throughput: 0: 5175.8. Samples: 256996. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
199
+ [2023-01-11 06:15:46,333][18976] Avg episode reward: [(0, '-40.853')]
200
+ [2023-01-11 06:15:50,214][19075] Updated weights for policy 0, policy_version 560 (0.0007)
201
+ [2023-01-11 06:15:51,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4846.9, 300 sec: 4846.9). Total num frames: 290816. Throughput: 0: 5434.8. Samples: 289688. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
202
+ [2023-01-11 06:15:51,333][18976] Avg episode reward: [(0, '-38.381')]
203
+ [2023-01-11 06:15:51,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth...
204
+ [2023-01-11 06:15:51,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth
205
+ [2023-01-11 06:15:56,333][18976] Fps is (10 sec: 5734.3, 60 sec: 5256.5, 300 sec: 4915.2). Total num frames: 319488. Throughput: 0: 5459.1. Samples: 306076. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
206
+ [2023-01-11 06:15:56,333][18976] Avg episode reward: [(0, '-20.183')]
207
+ [2023-01-11 06:15:56,334][19062] Saving new best policy, reward=-20.183!
208
+ [2023-01-11 06:15:57,590][19075] Updated weights for policy 0, policy_version 640 (0.0006)
209
+ [2023-01-11 06:16:01,333][18976] Fps is (10 sec: 5324.9, 60 sec: 5188.3, 300 sec: 4915.2). Total num frames: 344064. Throughput: 0: 5477.0. Samples: 339908. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
210
+ [2023-01-11 06:16:01,333][18976] Avg episode reward: [(0, '7.282')]
211
+ [2023-01-11 06:16:01,335][19062] Saving new best policy, reward=7.282!
212
+ [2023-01-11 06:16:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 5256.5, 300 sec: 4806.0). Total num frames: 360448. Throughput: 0: 5188.2. Samples: 359332. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
213
+ [2023-01-11 06:16:06,333][18976] Avg episode reward: [(0, '20.695')]
214
+ [2023-01-11 06:16:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000704_360448.pth...
215
+ [2023-01-11 06:16:06,346][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth
216
+ [2023-01-11 06:16:06,347][19062] Saving new best policy, reward=20.695!
217
+ [2023-01-11 06:16:07,412][19075] Updated weights for policy 0, policy_version 720 (0.0007)
218
+ [2023-01-11 06:16:11,332][18976] Fps is (10 sec: 4505.6, 60 sec: 5256.5, 300 sec: 4864.0). Total num frames: 389120. Throughput: 0: 5189.2. Samples: 375796. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
219
+ [2023-01-11 06:16:11,333][18976] Avg episode reward: [(0, '42.463')]
220
+ [2023-01-11 06:16:11,334][19062] Saving new best policy, reward=42.463!
221
+ [2023-01-11 06:16:14,150][18976] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 18976], exiting...
222
+ [2023-01-11 06:16:14,150][18976] Runner profile tree view:
223
+ main_loop: 94.8007
224
+ [2023-01-11 06:16:14,150][19080] Stopping RolloutWorker_w4...
225
+ [2023-01-11 06:16:14,151][19079] Stopping RolloutWorker_w2...
226
+ [2023-01-11 06:16:14,151][18976] Collected {0: 401408}, FPS: 4234.2
227
+ [2023-01-11 06:16:14,151][19082] Stopping RolloutWorker_w5...
228
+ [2023-01-11 06:16:14,151][19079] Loop rollout_proc2_evt_loop terminating...
229
+ [2023-01-11 06:16:14,151][19080] Loop rollout_proc4_evt_loop terminating...
230
+ [2023-01-11 06:16:14,152][19082] Loop rollout_proc5_evt_loop terminating...
231
+ [2023-01-11 06:16:14,151][19083] Stopping RolloutWorker_w7...
232
+ [2023-01-11 06:16:14,151][19076] Stopping RolloutWorker_w1...
233
+ [2023-01-11 06:16:14,151][19078] Stopping RolloutWorker_w3...
234
+ [2023-01-11 06:16:14,154][19083] Loop rollout_proc7_evt_loop terminating...
235
+ [2023-01-11 06:16:14,154][19078] Loop rollout_proc3_evt_loop terminating...
236
+ [2023-01-11 06:16:14,154][19076] Loop rollout_proc1_evt_loop terminating...
237
+ [2023-01-11 06:16:14,151][19062] Stopping Batcher_0...
238
+ [2023-01-11 06:16:14,151][19081] Stopping RolloutWorker_w6...
239
+ [2023-01-11 06:16:14,151][19077] Stopping RolloutWorker_w0...
240
+ [2023-01-11 06:16:14,157][19062] Loop batcher_evt_loop terminating...
241
+ [2023-01-11 06:16:14,157][19077] Loop rollout_proc0_evt_loop terminating...
242
+ [2023-01-11 06:16:14,157][19081] Loop rollout_proc6_evt_loop terminating...
243
+ [2023-01-11 06:16:14,211][19075] Weights refcount: 2 0
244
+ [2023-01-11 06:16:14,213][19075] Stopping InferenceWorker_p0-w0...
245
+ [2023-01-11 06:16:14,214][19075] Loop inference_proc0-0_evt_loop terminating...
246
+ [2023-01-11 06:16:14,224][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000792_405504.pth...
247
+ [2023-01-11 06:16:14,232][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth
248
+ [2023-01-11 06:16:14,233][19062] Stopping LearnerWorker_p0...
249
+ [2023-01-11 06:16:14,233][19062] Loop learner_proc0_evt_loop terminating...