chavicoski commited on
Commit
16f1b1a
1 Parent(s): b9aeb22

Upload . with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1677403925.14111cc73324 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b064f2402f25d91bf4706a638e4917af6f13be9e5f4b9eff531bc3c5c376d589
3
+ size 2086
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_health_gathering_supreme
15
+ type: doom_health_gathering_supreme
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 4.10 +/- 0.81
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r chavicoski/vizdoom_health_gathering_supreme
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=vizdoom_health_gathering_supreme
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/checkpoint_000000004_16384.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2784c5c0f2fa7f0f8f5596c27c93bda6dc515070626fad4b0cd067093ce1877
3
+ size 34928836
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_health_gathering_supreme",
5
+ "experiment": "default_experiment",
6
+ "train_dir": "/workspace/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 12,
19
+ "num_envs_per_worker": 4,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 10000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --num_workers=12 --num_envs_per_worker=4 --train_for_env_steps=10000",
134
+ "cli_args": {
135
+ "env": "doom_health_gathering_supreme",
136
+ "num_workers": 12,
137
+ "num_envs_per_worker": 4,
138
+ "train_for_env_steps": 10000
139
+ },
140
+ "git_hash": "unknown",
141
+ "git_repo_name": "not a git repository"
142
+ }
sf_log.txt ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-02-26 09:32:07,567][00001] Saving configuration to /workspace/train_dir/default_experiment/config.json...
2
+ [2023-02-26 09:32:07,568][00001] Rollout worker 0 uses device cpu
3
+ [2023-02-26 09:32:07,568][00001] Rollout worker 1 uses device cpu
4
+ [2023-02-26 09:32:07,568][00001] Rollout worker 2 uses device cpu
5
+ [2023-02-26 09:32:07,568][00001] Rollout worker 3 uses device cpu
6
+ [2023-02-26 09:32:07,568][00001] Rollout worker 4 uses device cpu
7
+ [2023-02-26 09:32:07,568][00001] Rollout worker 5 uses device cpu
8
+ [2023-02-26 09:32:07,568][00001] Rollout worker 6 uses device cpu
9
+ [2023-02-26 09:32:07,568][00001] Rollout worker 7 uses device cpu
10
+ [2023-02-26 09:32:07,568][00001] Rollout worker 8 uses device cpu
11
+ [2023-02-26 09:32:07,568][00001] Rollout worker 9 uses device cpu
12
+ [2023-02-26 09:32:07,568][00001] Rollout worker 10 uses device cpu
13
+ [2023-02-26 09:32:07,568][00001] Rollout worker 11 uses device cpu
14
+ [2023-02-26 09:32:07,624][00001] Using GPUs [0] for process 0 (actually maps to GPUs [0])
15
+ [2023-02-26 09:32:07,624][00001] InferenceWorker_p0-w0: min num requests: 4
16
+ [2023-02-26 09:32:07,647][00001] Starting all processes...
17
+ [2023-02-26 09:32:07,647][00001] Starting process learner_proc0
18
+ [2023-02-26 09:32:08,374][00001] Starting all processes...
19
+ [2023-02-26 09:32:08,377][00001] Starting process inference_proc0-0
20
+ [2023-02-26 09:32:08,377][00001] Starting process rollout_proc0
21
+ [2023-02-26 09:32:08,377][00001] Starting process rollout_proc1
22
+ [2023-02-26 09:32:08,378][00141] Using GPUs [0] for process 0 (actually maps to GPUs [0])
23
+ [2023-02-26 09:32:08,378][00141] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
24
+ [2023-02-26 09:32:08,377][00001] Starting process rollout_proc2
25
+ [2023-02-26 09:32:08,377][00001] Starting process rollout_proc3
26
+ [2023-02-26 09:32:08,377][00001] Starting process rollout_proc4
27
+ [2023-02-26 09:32:08,378][00001] Starting process rollout_proc5
28
+ [2023-02-26 09:32:08,378][00001] Starting process rollout_proc6
29
+ [2023-02-26 09:32:08,387][00141] Num visible devices: 1
30
+ [2023-02-26 09:32:08,378][00001] Starting process rollout_proc7
31
+ [2023-02-26 09:32:08,379][00001] Starting process rollout_proc8
32
+ [2023-02-26 09:32:08,380][00001] Starting process rollout_proc9
33
+ [2023-02-26 09:32:08,381][00001] Starting process rollout_proc10
34
+ [2023-02-26 09:32:08,383][00001] Starting process rollout_proc11
35
+ [2023-02-26 09:32:08,422][00141] Starting seed is not provided
36
+ [2023-02-26 09:32:08,422][00141] Using GPUs [0] for process 0 (actually maps to GPUs [0])
37
+ [2023-02-26 09:32:08,422][00141] Initializing actor-critic model on device cuda:0
38
+ [2023-02-26 09:32:08,422][00141] RunningMeanStd input shape: (3, 72, 128)
39
+ [2023-02-26 09:32:08,423][00141] RunningMeanStd input shape: (1,)
40
+ [2023-02-26 09:32:08,438][00141] ConvEncoder: input_channels=3
41
+ [2023-02-26 09:32:08,565][00141] Conv encoder output size: 512
42
+ [2023-02-26 09:32:08,566][00141] Policy head output size: 512
43
+ [2023-02-26 09:32:08,579][00141] Created Actor Critic model with architecture:
44
+ [2023-02-26 09:32:08,579][00141] ActorCriticSharedWeights(
45
+ (obs_normalizer): ObservationNormalizer(
46
+ (running_mean_std): RunningMeanStdDictInPlace(
47
+ (running_mean_std): ModuleDict(
48
+ (obs): RunningMeanStdInPlace()
49
+ )
50
+ )
51
+ )
52
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
53
+ (encoder): VizdoomEncoder(
54
+ (basic_encoder): ConvEncoder(
55
+ (enc): RecursiveScriptModule(
56
+ original_name=ConvEncoderImpl
57
+ (conv_head): RecursiveScriptModule(
58
+ original_name=Sequential
59
+ (0): RecursiveScriptModule(original_name=Conv2d)
60
+ (1): RecursiveScriptModule(original_name=ELU)
61
+ (2): RecursiveScriptModule(original_name=Conv2d)
62
+ (3): RecursiveScriptModule(original_name=ELU)
63
+ (4): RecursiveScriptModule(original_name=Conv2d)
64
+ (5): RecursiveScriptModule(original_name=ELU)
65
+ )
66
+ (mlp_layers): RecursiveScriptModule(
67
+ original_name=Sequential
68
+ (0): RecursiveScriptModule(original_name=Linear)
69
+ (1): RecursiveScriptModule(original_name=ELU)
70
+ )
71
+ )
72
+ )
73
+ )
74
+ (core): ModelCoreRNN(
75
+ (core): GRU(512, 512)
76
+ )
77
+ (decoder): MlpDecoder(
78
+ (mlp): Identity()
79
+ )
80
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
81
+ (action_parameterization): ActionParameterizationDefault(
82
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
83
+ )
84
+ )
85
+ [2023-02-26 09:32:09,423][00201] Worker 10 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
86
+ [2023-02-26 09:32:09,462][00197] Worker 8 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
87
+ [2023-02-26 09:32:09,464][00195] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
88
+ [2023-02-26 09:32:09,486][00190] Using GPUs [0] for process 0 (actually maps to GPUs [0])
89
+ [2023-02-26 09:32:09,486][00192] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
90
+ [2023-02-26 09:32:09,486][00190] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
91
+ [2023-02-26 09:32:09,488][00196] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
92
+ [2023-02-26 09:32:09,493][00189] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
93
+ [2023-02-26 09:32:09,497][00190] Num visible devices: 1
94
+ [2023-02-26 09:32:09,507][00191] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
95
+ [2023-02-26 09:32:09,513][00200] Worker 9 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
96
+ [2023-02-26 09:32:09,523][00194] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
97
+ [2023-02-26 09:32:09,534][00199] Worker 11 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
98
+ [2023-02-26 09:32:09,542][00198] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
99
+ [2023-02-26 09:32:09,561][00193] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
100
+ [2023-02-26 09:32:10,323][00141] Using optimizer <class 'torch.optim.adam.Adam'>
101
+ [2023-02-26 09:32:10,324][00141] No checkpoints found
102
+ [2023-02-26 09:32:10,324][00141] Did not load from checkpoint, starting from scratch!
103
+ [2023-02-26 09:32:10,324][00141] Initialized policy 0 weights for model version 0
104
+ [2023-02-26 09:32:10,325][00141] LearnerWorker_p0 finished initialization!
105
+ [2023-02-26 09:32:10,325][00141] Using GPUs [0] for process 0 (actually maps to GPUs [0])
106
+ [2023-02-26 09:32:10,383][00190] RunningMeanStd input shape: (3, 72, 128)
107
+ [2023-02-26 09:32:10,383][00190] RunningMeanStd input shape: (1,)
108
+ [2023-02-26 09:32:10,391][00190] ConvEncoder: input_channels=3
109
+ [2023-02-26 09:32:10,454][00190] Conv encoder output size: 512
110
+ [2023-02-26 09:32:10,454][00190] Policy head output size: 512
111
+ [2023-02-26 09:32:10,996][00001] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
112
+ [2023-02-26 09:32:11,166][00001] Inference worker 0-0 is ready!
113
+ [2023-02-26 09:32:11,166][00001] All inference workers are ready! Signal rollout workers to start!
114
+ [2023-02-26 09:32:11,194][00196] Doom resolution: 160x120, resize resolution: (128, 72)
115
+ [2023-02-26 09:32:11,199][00199] Doom resolution: 160x120, resize resolution: (128, 72)
116
+ [2023-02-26 09:32:11,206][00200] Doom resolution: 160x120, resize resolution: (128, 72)
117
+ [2023-02-26 09:32:11,206][00197] Doom resolution: 160x120, resize resolution: (128, 72)
118
+ [2023-02-26 09:32:11,214][00193] Doom resolution: 160x120, resize resolution: (128, 72)
119
+ [2023-02-26 09:32:11,214][00201] Doom resolution: 160x120, resize resolution: (128, 72)
120
+ [2023-02-26 09:32:11,220][00198] Doom resolution: 160x120, resize resolution: (128, 72)
121
+ [2023-02-26 09:32:11,227][00191] Doom resolution: 160x120, resize resolution: (128, 72)
122
+ [2023-02-26 09:32:11,233][00189] Doom resolution: 160x120, resize resolution: (128, 72)
123
+ [2023-02-26 09:32:11,234][00192] Doom resolution: 160x120, resize resolution: (128, 72)
124
+ [2023-02-26 09:32:11,235][00194] Doom resolution: 160x120, resize resolution: (128, 72)
125
+ [2023-02-26 09:32:11,235][00195] Doom resolution: 160x120, resize resolution: (128, 72)
126
+ [2023-02-26 09:32:11,359][00196] Decorrelating experience for 0 frames...
127
+ [2023-02-26 09:32:11,359][00199] Decorrelating experience for 0 frames...
128
+ [2023-02-26 09:32:11,401][00197] Decorrelating experience for 0 frames...
129
+ [2023-02-26 09:32:11,401][00193] Decorrelating experience for 0 frames...
130
+ [2023-02-26 09:32:11,401][00200] Decorrelating experience for 0 frames...
131
+ [2023-02-26 09:32:11,407][00191] Decorrelating experience for 0 frames...
132
+ [2023-02-26 09:32:11,407][00192] Decorrelating experience for 0 frames...
133
+ [2023-02-26 09:32:11,534][00201] Decorrelating experience for 0 frames...
134
+ [2023-02-26 09:32:11,578][00197] Decorrelating experience for 32 frames...
135
+ [2023-02-26 09:32:11,579][00193] Decorrelating experience for 32 frames...
136
+ [2023-02-26 09:32:11,582][00198] Decorrelating experience for 0 frames...
137
+ [2023-02-26 09:32:11,582][00194] Decorrelating experience for 0 frames...
138
+ [2023-02-26 09:32:11,586][00199] Decorrelating experience for 32 frames...
139
+ [2023-02-26 09:32:11,586][00191] Decorrelating experience for 32 frames...
140
+ [2023-02-26 09:32:11,586][00192] Decorrelating experience for 32 frames...
141
+ [2023-02-26 09:32:11,673][00201] Decorrelating experience for 32 frames...
142
+ [2023-02-26 09:32:11,691][00189] Decorrelating experience for 0 frames...
143
+ [2023-02-26 09:32:11,719][00194] Decorrelating experience for 32 frames...
144
+ [2023-02-26 09:32:11,768][00200] Decorrelating experience for 32 frames...
145
+ [2023-02-26 09:32:11,772][00197] Decorrelating experience for 64 frames...
146
+ [2023-02-26 09:32:11,776][00193] Decorrelating experience for 64 frames...
147
+ [2023-02-26 09:32:11,778][00196] Decorrelating experience for 32 frames...
148
+ [2023-02-26 09:32:11,778][00199] Decorrelating experience for 64 frames...
149
+ [2023-02-26 09:32:11,828][00198] Decorrelating experience for 32 frames...
150
+ [2023-02-26 09:32:11,860][00191] Decorrelating experience for 64 frames...
151
+ [2023-02-26 09:32:11,884][00194] Decorrelating experience for 64 frames...
152
+ [2023-02-26 09:32:11,943][00200] Decorrelating experience for 64 frames...
153
+ [2023-02-26 09:32:11,955][00196] Decorrelating experience for 64 frames...
154
+ [2023-02-26 09:32:11,962][00197] Decorrelating experience for 96 frames...
155
+ [2023-02-26 09:32:11,964][00195] Decorrelating experience for 0 frames...
156
+ [2023-02-26 09:32:11,968][00193] Decorrelating experience for 96 frames...
157
+ [2023-02-26 09:32:11,982][00189] Decorrelating experience for 32 frames...
158
+ [2023-02-26 09:32:11,992][00198] Decorrelating experience for 64 frames...
159
+ [2023-02-26 09:32:12,096][00199] Decorrelating experience for 96 frames...
160
+ [2023-02-26 09:32:12,107][00192] Decorrelating experience for 64 frames...
161
+ [2023-02-26 09:32:12,133][00191] Decorrelating experience for 96 frames...
162
+ [2023-02-26 09:32:12,140][00201] Decorrelating experience for 64 frames...
163
+ [2023-02-26 09:32:12,156][00198] Decorrelating experience for 96 frames...
164
+ [2023-02-26 09:32:12,157][00194] Decorrelating experience for 96 frames...
165
+ [2023-02-26 09:32:12,281][00196] Decorrelating experience for 96 frames...
166
+ [2023-02-26 09:32:12,305][00192] Decorrelating experience for 96 frames...
167
+ [2023-02-26 09:32:12,318][00195] Decorrelating experience for 32 frames...
168
+ [2023-02-26 09:32:12,321][00200] Decorrelating experience for 96 frames...
169
+ [2023-02-26 09:32:12,489][00201] Decorrelating experience for 96 frames...
170
+ [2023-02-26 09:32:12,502][00189] Decorrelating experience for 64 frames...
171
+ [2023-02-26 09:32:12,511][00195] Decorrelating experience for 64 frames...
172
+ [2023-02-26 09:32:12,629][00141] Signal inference workers to stop experience collection...
173
+ [2023-02-26 09:32:12,632][00190] InferenceWorker_p0-w0: stopping experience collection
174
+ [2023-02-26 09:32:12,696][00189] Decorrelating experience for 96 frames...
175
+ [2023-02-26 09:32:12,698][00195] Decorrelating experience for 96 frames...
176
+ [2023-02-26 09:32:13,348][00141] Signal inference workers to resume experience collection...
177
+ [2023-02-26 09:32:13,348][00190] InferenceWorker_p0-w0: resuming experience collection
178
+ [2023-02-26 09:32:14,002][00141] Stopping Batcher_0...
179
+ [2023-02-26 09:32:14,002][00001] Component Batcher_0 stopped!
180
+ [2023-02-26 09:32:14,002][00141] Saving /workspace/train_dir/default_experiment/checkpoint_p0/checkpoint_000000004_16384.pth...
181
+ [2023-02-26 09:32:14,010][00198] Stopping RolloutWorker_w7...
182
+ [2023-02-26 09:32:14,010][00001] Component RolloutWorker_w7 stopped!
183
+ [2023-02-26 09:32:14,011][00198] Loop rollout_proc7_evt_loop terminating...
184
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w1 stopped!
185
+ [2023-02-26 09:32:14,002][00141] Loop batcher_evt_loop terminating...
186
+ [2023-02-26 09:32:14,011][00189] Stopping RolloutWorker_w1...
187
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w10 stopped!
188
+ [2023-02-26 09:32:14,011][00201] Stopping RolloutWorker_w10...
189
+ [2023-02-26 09:32:14,011][00195] Stopping RolloutWorker_w4...
190
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w4 stopped!
191
+ [2023-02-26 09:32:14,011][00201] Loop rollout_proc10_evt_loop terminating...
192
+ [2023-02-26 09:32:14,011][00197] Stopping RolloutWorker_w8...
193
+ [2023-02-26 09:32:14,011][00189] Loop rollout_proc1_evt_loop terminating...
194
+ [2023-02-26 09:32:14,011][00195] Loop rollout_proc4_evt_loop terminating...
195
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w8 stopped!
196
+ [2023-02-26 09:32:14,011][00199] Stopping RolloutWorker_w11...
197
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w11 stopped!
198
+ [2023-02-26 09:32:14,011][00197] Loop rollout_proc8_evt_loop terminating...
199
+ [2023-02-26 09:32:14,011][00001] Component RolloutWorker_w2 stopped!
200
+ [2023-02-26 09:32:14,011][00191] Stopping RolloutWorker_w0...
201
+ [2023-02-26 09:32:14,011][00200] Stopping RolloutWorker_w9...
202
+ [2023-02-26 09:32:14,011][00192] Stopping RolloutWorker_w2...
203
+ [2023-02-26 09:32:14,011][00193] Stopping RolloutWorker_w3...
204
+ [2023-02-26 09:32:14,011][00199] Loop rollout_proc11_evt_loop terminating...
205
+ [2023-02-26 09:32:14,011][00196] Stopping RolloutWorker_w6...
206
+ [2023-02-26 09:32:14,012][00001] Component RolloutWorker_w9 stopped!
207
+ [2023-02-26 09:32:14,012][00191] Loop rollout_proc0_evt_loop terminating...
208
+ [2023-02-26 09:32:14,012][00001] Component RolloutWorker_w3 stopped!
209
+ [2023-02-26 09:32:14,011][00194] Stopping RolloutWorker_w5...
210
+ [2023-02-26 09:32:14,012][00200] Loop rollout_proc9_evt_loop terminating...
211
+ [2023-02-26 09:32:14,012][00193] Loop rollout_proc3_evt_loop terminating...
212
+ [2023-02-26 09:32:14,012][00001] Component RolloutWorker_w0 stopped!
213
+ [2023-02-26 09:32:14,012][00196] Loop rollout_proc6_evt_loop terminating...
214
+ [2023-02-26 09:32:14,012][00192] Loop rollout_proc2_evt_loop terminating...
215
+ [2023-02-26 09:32:14,012][00001] Component RolloutWorker_w6 stopped!
216
+ [2023-02-26 09:32:14,012][00194] Loop rollout_proc5_evt_loop terminating...
217
+ [2023-02-26 09:32:14,012][00001] Component RolloutWorker_w5 stopped!
218
+ [2023-02-26 09:32:14,018][00190] Weights refcount: 2 0
219
+ [2023-02-26 09:32:14,020][00001] Component InferenceWorker_p0-w0 stopped!
220
+ [2023-02-26 09:32:14,020][00190] Stopping InferenceWorker_p0-w0...
221
+ [2023-02-26 09:32:14,021][00190] Loop inference_proc0-0_evt_loop terminating...
222
+ [2023-02-26 09:32:14,053][00141] Saving /workspace/train_dir/default_experiment/checkpoint_p0/checkpoint_000000004_16384.pth...
223
+ [2023-02-26 09:32:14,118][00141] Stopping LearnerWorker_p0...
224
+ [2023-02-26 09:32:14,118][00001] Component LearnerWorker_p0 stopped!
225
+ [2023-02-26 09:32:14,119][00141] Loop learner_proc0_evt_loop terminating...
226
+ [2023-02-26 09:32:14,119][00001] Waiting for process learner_proc0 to stop...
227
+ [2023-02-26 09:32:14,900][00001] Waiting for process inference_proc0-0 to join...
228
+ [2023-02-26 09:32:14,901][00001] Waiting for process rollout_proc0 to join...
229
+ [2023-02-26 09:32:14,901][00001] Waiting for process rollout_proc1 to join...
230
+ [2023-02-26 09:32:14,901][00001] Waiting for process rollout_proc2 to join...
231
+ [2023-02-26 09:32:14,901][00001] Waiting for process rollout_proc3 to join...
232
+ [2023-02-26 09:32:14,902][00001] Waiting for process rollout_proc4 to join...
233
+ [2023-02-26 09:32:14,902][00001] Waiting for process rollout_proc5 to join...
234
+ [2023-02-26 09:32:14,902][00001] Waiting for process rollout_proc6 to join...
235
+ [2023-02-26 09:32:14,902][00001] Waiting for process rollout_proc7 to join...
236
+ [2023-02-26 09:32:14,903][00001] Waiting for process rollout_proc8 to join...
237
+ [2023-02-26 09:32:14,903][00001] Waiting for process rollout_proc9 to join...
238
+ [2023-02-26 09:32:14,903][00001] Waiting for process rollout_proc10 to join...
239
+ [2023-02-26 09:32:14,903][00001] Waiting for process rollout_proc11 to join...
240
+ [2023-02-26 09:32:14,904][00001] Batcher 0 profile tree view:
241
+ batching: 0.0462, releasing_batches: 0.0008
242
+ [2023-02-26 09:32:14,904][00001] InferenceWorker_p0-w0 profile tree view:
243
+ wait_policy: 0.0000
244
+ wait_policy_total: 0.8600
245
+ update_model: 0.2093
246
+ weight_update: 0.0513
247
+ one_step: 0.0016
248
+ handle_policy_step: 0.7327
249
+ deserialize: 0.0239, stack: 0.0026, obs_to_device_normalize: 0.1050, forward: 0.4757, send_messages: 0.0396
250
+ prepare_outputs: 0.0622
251
+ to_cpu: 0.0383
252
+ [2023-02-26 09:32:14,904][00001] Learner 0 profile tree view:
253
+ misc: 0.0000, prepare_batch: 1.1570
254
+ train: 0.2483
255
+ epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0007, kl_divergence: 0.0010, after_optimizer: 0.0080
256
+ calculate_losses: 0.0434
257
+ losses_init: 0.0000, forward_head: 0.0259, bptt_initial: 0.0108, tail: 0.0013, advantages_returns: 0.0005, losses: 0.0024
258
+ bptt: 0.0021
259
+ bptt_forward_core: 0.0020
260
+ update: 0.1943
261
+ clip: 0.0026
262
+ [2023-02-26 09:32:14,904][00001] RolloutWorker_w0 profile tree view:
263
+ wait_for_trajectories: 0.0006, enqueue_policy_requests: 0.0198, env_step: 0.3360, overhead: 0.0196, complete_rollouts: 0.0005
264
+ save_policy_outputs: 0.0217
265
+ split_output_tensors: 0.0106
266
+ [2023-02-26 09:32:14,904][00001] RolloutWorker_w11 profile tree view:
267
+ wait_for_trajectories: 0.0006, enqueue_policy_requests: 0.0212, env_step: 0.3282, overhead: 0.0214, complete_rollouts: 0.0006
268
+ save_policy_outputs: 0.0236
269
+ split_output_tensors: 0.0113
270
+ [2023-02-26 09:32:14,905][00001] Loop Runner_EvtLoop terminating...
271
+ [2023-02-26 09:32:14,905][00001] Runner profile tree view:
272
+ main_loop: 7.2583
273
+ [2023-02-26 09:32:14,905][00001] Collected {0: 16384}, FPS: 2257.3
274
+ [2023-02-26 09:32:14,921][00001] Loading existing experiment configuration from /workspace/train_dir/default_experiment/config.json
275
+ [2023-02-26 09:32:14,922][00001] Overriding arg 'num_workers' with value 1 passed from command line
276
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'no_render'=True that is not in the saved config file!
277
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'save_video'=True that is not in the saved config file!
278
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
279
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'video_name'=None that is not in the saved config file!
280
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
281
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
282
+ [2023-02-26 09:32:14,922][00001] Adding new argument 'push_to_hub'=True that is not in the saved config file!
283
+ [2023-02-26 09:32:14,923][00001] Adding new argument 'hf_repository'='chavicoski/vizdoom_health_gathering_supreme' that is not in the saved config file!
284
+ [2023-02-26 09:32:14,923][00001] Adding new argument 'policy_index'=0 that is not in the saved config file!
285
+ [2023-02-26 09:32:14,923][00001] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
286
+ [2023-02-26 09:32:14,923][00001] Adding new argument 'train_script'=None that is not in the saved config file!
287
+ [2023-02-26 09:32:14,923][00001] Adding new argument 'enjoy_script'=None that is not in the saved config file!
288
+ [2023-02-26 09:32:14,923][00001] Using frameskip 1 and render_action_repeat=4 for evaluation
289
+ [2023-02-26 09:32:14,930][00001] Doom resolution: 160x120, resize resolution: (128, 72)
290
+ [2023-02-26 09:32:14,930][00001] RunningMeanStd input shape: (3, 72, 128)
291
+ [2023-02-26 09:32:14,931][00001] RunningMeanStd input shape: (1,)
292
+ [2023-02-26 09:32:14,945][00001] ConvEncoder: input_channels=3
293
+ [2023-02-26 09:32:15,033][00001] Conv encoder output size: 512
294
+ [2023-02-26 09:32:15,034][00001] Policy head output size: 512
295
+ [2023-02-26 09:32:16,298][00001] Loading state from checkpoint /workspace/train_dir/default_experiment/checkpoint_p0/checkpoint_000000004_16384.pth...
296
+ [2023-02-26 09:32:16,922][00001] Num frames 100...
297
+ [2023-02-26 09:32:17,014][00001] Num frames 200...
298
+ [2023-02-26 09:32:17,108][00001] Num frames 300...
299
+ [2023-02-26 09:32:17,200][00001] Num frames 400...
300
+ [2023-02-26 09:32:17,293][00001] Num frames 500...
301
+ [2023-02-26 09:32:17,386][00001] Avg episode rewards: #0: 7.440, true rewards: #0: 5.440
302
+ [2023-02-26 09:32:17,387][00001] Avg episode reward: 7.440, avg true_objective: 5.440
303
+ [2023-02-26 09:32:17,463][00001] Num frames 600...
304
+ [2023-02-26 09:32:17,556][00001] Num frames 700...
305
+ [2023-02-26 09:32:17,649][00001] Num frames 800...
306
+ [2023-02-26 09:32:17,743][00001] Num frames 900...
307
+ [2023-02-26 09:32:17,837][00001] Num frames 1000...
308
+ [2023-02-26 09:32:17,972][00001] Avg episode rewards: #0: 7.940, true rewards: #0: 5.440
309
+ [2023-02-26 09:32:17,972][00001] Avg episode reward: 7.940, avg true_objective: 5.440
310
+ [2023-02-26 09:32:17,988][00001] Num frames 1100...
311
+ [2023-02-26 09:32:18,095][00001] Num frames 1200...
312
+ [2023-02-26 09:32:18,188][00001] Num frames 1300...
313
+ [2023-02-26 09:32:18,281][00001] Num frames 1400...
314
+ [2023-02-26 09:32:18,404][00001] Avg episode rewards: #0: 6.573, true rewards: #0: 4.907
315
+ [2023-02-26 09:32:18,404][00001] Avg episode reward: 6.573, avg true_objective: 4.907
316
+ [2023-02-26 09:32:18,442][00001] Num frames 1500...
317
+ [2023-02-26 09:32:18,543][00001] Num frames 1600...
318
+ [2023-02-26 09:32:18,635][00001] Num frames 1700...
319
+ [2023-02-26 09:32:18,728][00001] Num frames 1800...
320
+ [2023-02-26 09:32:18,832][00001] Avg episode rewards: #0: 5.890, true rewards: #0: 4.640
321
+ [2023-02-26 09:32:18,833][00001] Avg episode reward: 5.890, avg true_objective: 4.640
322
+ [2023-02-26 09:32:18,890][00001] Num frames 1900...
323
+ [2023-02-26 09:32:18,986][00001] Num frames 2000...
324
+ [2023-02-26 09:32:19,079][00001] Num frames 2100...
325
+ [2023-02-26 09:32:19,173][00001] Num frames 2200...
326
+ [2023-02-26 09:32:19,267][00001] Num frames 2300...
327
+ [2023-02-26 09:32:19,324][00001] Avg episode rewards: #0: 5.808, true rewards: #0: 4.608
328
+ [2023-02-26 09:32:19,324][00001] Avg episode reward: 5.808, avg true_objective: 4.608
329
+ [2023-02-26 09:32:19,440][00001] Num frames 2400...
330
+ [2023-02-26 09:32:19,532][00001] Num frames 2500...
331
+ [2023-02-26 09:32:19,627][00001] Num frames 2600...
332
+ [2023-02-26 09:32:19,762][00001] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
333
+ [2023-02-26 09:32:19,762][00001] Avg episode reward: 5.480, avg true_objective: 4.480
334
+ [2023-02-26 09:32:19,773][00001] Num frames 2700...
335
+ [2023-02-26 09:32:19,866][00001] Num frames 2800...
336
+ [2023-02-26 09:32:19,958][00001] Num frames 2900...
337
+ [2023-02-26 09:32:20,051][00001] Avg episode rewards: #0: 5.063, true rewards: #0: 4.206
338
+ [2023-02-26 09:32:20,051][00001] Avg episode reward: 5.063, avg true_objective: 4.206
339
+ [2023-02-26 09:32:20,126][00001] Num frames 3000...
340
+ [2023-02-26 09:32:20,219][00001] Num frames 3100...
341
+ [2023-02-26 09:32:20,312][00001] Num frames 3200...
342
+ [2023-02-26 09:32:20,405][00001] Num frames 3300...
343
+ [2023-02-26 09:32:20,483][00001] Avg episode rewards: #0: 4.910, true rewards: #0: 4.160
344
+ [2023-02-26 09:32:20,483][00001] Avg episode reward: 4.910, avg true_objective: 4.160
345
+ [2023-02-26 09:32:20,575][00001] Num frames 3400...
346
+ [2023-02-26 09:32:20,667][00001] Num frames 3500...
347
+ [2023-02-26 09:32:20,760][00001] Num frames 3600...
348
+ [2023-02-26 09:32:20,853][00001] Num frames 3700...
349
+ [2023-02-26 09:32:20,917][00001] Avg episode rewards: #0: 4.791, true rewards: #0: 4.124
350
+ [2023-02-26 09:32:20,917][00001] Avg episode reward: 4.791, avg true_objective: 4.124
351
+ [2023-02-26 09:32:21,019][00001] Num frames 3800...
352
+ [2023-02-26 09:32:21,112][00001] Num frames 3900...
353
+ [2023-02-26 09:32:21,204][00001] Num frames 4000...
354
+ [2023-02-26 09:32:21,346][00001] Avg episode rewards: #0: 4.696, true rewards: #0: 4.096
355
+ [2023-02-26 09:32:21,346][00001] Avg episode reward: 4.696, avg true_objective: 4.096
356
+ [2023-02-26 09:32:22,553][00001] Replay video saved to /workspace/train_dir/default_experiment/replay.mp4!