RamonAnkersmit commited on
Commit
3824c40
1 Parent(s): 93f425e

Upload . with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1677101945.355362e7601a ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1da7f33290987b826292b695f615948309a3a82fb5c743e63b902c5bd7ba02
3
+ size 2302815
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_deadly_corridor
15
+ type: doom_deadly_corridor
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 10.42 +/- 8.36
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_deadly_corridor** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r RamonAnkersmit/rl_course_doom_deadly_corridor
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_deadly_corridor --train_dir=./train_dir --experiment=rl_course_doom_deadly_corridor
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_deadly_corridor --train_dir=./train_dir --experiment=rl_course_doom_deadly_corridor --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000002275_9318400_reward_17.231.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d0346b43af0479ecef1f1dd45a431e8162972561fe0a5b0e2fc2e5c5b56a76
3
+ size 34965478
checkpoint_p0/checkpoint_000002372_9715712.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26e3561611732efd9a991633aad7d40c33142b682a8292f355e3b3f84218ea48
3
+ size 34965892
checkpoint_p0/checkpoint_000002443_10006528.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dea399edf996b131058ec439aceb24067a3f8da11e27099113f9215a0e02fa5
3
+ size 34965892
config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_deadly_corridor",
5
+ "experiment": "doom_deadly_corridor",
6
+ "train_dir": "/content/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 4,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 10000000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_deadly_corridor --experiment=doom_deadly_corridor --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=10000000",
134
+ "cli_args": {
135
+ "env": "doom_deadly_corridor",
136
+ "experiment": "doom_deadly_corridor",
137
+ "num_workers": 8,
138
+ "num_envs_per_worker": 4,
139
+ "train_for_env_steps": 10000000
140
+ },
141
+ "git_hash": "unknown",
142
+ "git_repo_name": "not a git repository"
143
+ }
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12335cd0d24082e9d0eb7544fb2f9dcf472d9227bd7a464517d331d46654e4c5
3
+ size 1703429
sf_log.txt ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-02-22 21:39:18,790][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
2
+ [2023-02-22 21:39:18,795][44343] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
3
+ [2023-02-22 21:39:18,877][44343] Num visible devices: 1
4
+ [2023-02-22 21:39:18,911][44343] Starting seed is not provided
5
+ [2023-02-22 21:39:18,911][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
6
+ [2023-02-22 21:39:18,911][44343] Initializing actor-critic model on device cuda:0
7
+ [2023-02-22 21:39:18,912][44343] RunningMeanStd input shape: (3, 72, 128)
8
+ [2023-02-22 21:39:18,916][44343] RunningMeanStd input shape: (1,)
9
+ [2023-02-22 21:39:18,966][44343] ConvEncoder: input_channels=3
10
+ [2023-02-22 21:39:19,495][44358] Worker 0 uses CPU cores [0]
11
+ [2023-02-22 21:39:19,655][44357] Using GPUs [0] for process 0 (actually maps to GPUs [0])
12
+ [2023-02-22 21:39:19,661][44357] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
13
+ [2023-02-22 21:39:19,700][44357] Num visible devices: 1
14
+ [2023-02-22 21:39:19,810][44359] Worker 1 uses CPU cores [1]
15
+ [2023-02-22 21:39:19,950][44343] Conv encoder output size: 512
16
+ [2023-02-22 21:39:19,950][44343] Policy head output size: 512
17
+ [2023-02-22 21:39:20,035][44343] Created Actor Critic model with architecture:
18
+ [2023-02-22 21:39:20,035][44343] ActorCriticSharedWeights(
19
+ (obs_normalizer): ObservationNormalizer(
20
+ (running_mean_std): RunningMeanStdDictInPlace(
21
+ (running_mean_std): ModuleDict(
22
+ (obs): RunningMeanStdInPlace()
23
+ )
24
+ )
25
+ )
26
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
27
+ (encoder): VizdoomEncoder(
28
+ (basic_encoder): ConvEncoder(
29
+ (enc): RecursiveScriptModule(
30
+ original_name=ConvEncoderImpl
31
+ (conv_head): RecursiveScriptModule(
32
+ original_name=Sequential
33
+ (0): RecursiveScriptModule(original_name=Conv2d)
34
+ (1): RecursiveScriptModule(original_name=ELU)
35
+ (2): RecursiveScriptModule(original_name=Conv2d)
36
+ (3): RecursiveScriptModule(original_name=ELU)
37
+ (4): RecursiveScriptModule(original_name=Conv2d)
38
+ (5): RecursiveScriptModule(original_name=ELU)
39
+ )
40
+ (mlp_layers): RecursiveScriptModule(
41
+ original_name=Sequential
42
+ (0): RecursiveScriptModule(original_name=Linear)
43
+ (1): RecursiveScriptModule(original_name=ELU)
44
+ )
45
+ )
46
+ )
47
+ )
48
+ (core): ModelCoreRNN(
49
+ (core): GRU(512, 512)
50
+ )
51
+ (decoder): MlpDecoder(
52
+ (mlp): Identity()
53
+ )
54
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
55
+ (action_parameterization): ActionParameterizationDefault(
56
+ (distribution_linear): Linear(in_features=512, out_features=11, bias=True)
57
+ )
58
+ )
59
+ [2023-02-22 21:39:20,560][44362] Worker 2 uses CPU cores [0]
60
+ [2023-02-22 21:39:20,816][44380] Worker 6 uses CPU cores [0]
61
+ [2023-02-22 21:39:20,864][44365] Worker 3 uses CPU cores [1]
62
+ [2023-02-22 21:39:21,072][44370] Worker 5 uses CPU cores [1]
63
+ [2023-02-22 21:39:21,112][44372] Worker 4 uses CPU cores [0]
64
+ [2023-02-22 21:39:21,139][44374] Worker 7 uses CPU cores [1]
65
+ [2023-02-22 21:39:27,142][44343] Using optimizer <class 'torch.optim.adam.Adam'>
66
+ [2023-02-22 21:39:27,144][44343] No checkpoints found
67
+ [2023-02-22 21:39:27,144][44343] Did not load from checkpoint, starting from scratch!
68
+ [2023-02-22 21:39:27,144][44343] Initialized policy 0 weights for model version 0
69
+ [2023-02-22 21:39:27,147][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
70
+ [2023-02-22 21:39:27,154][44343] LearnerWorker_p0 finished initialization!
71
+ [2023-02-22 21:39:27,360][44357] RunningMeanStd input shape: (3, 72, 128)
72
+ [2023-02-22 21:39:27,361][44357] RunningMeanStd input shape: (1,)
73
+ [2023-02-22 21:39:27,373][44357] ConvEncoder: input_channels=3
74
+ [2023-02-22 21:39:27,471][44357] Conv encoder output size: 512
75
+ [2023-02-22 21:39:27,472][44357] Policy head output size: 512
76
+ [2023-02-22 21:39:30,323][44365] Doom resolution: 160x120, resize resolution: (128, 72)
77
+ [2023-02-22 21:39:30,344][44370] Doom resolution: 160x120, resize resolution: (128, 72)
78
+ [2023-02-22 21:39:30,348][44359] Doom resolution: 160x120, resize resolution: (128, 72)
79
+ [2023-02-22 21:39:30,354][44374] Doom resolution: 160x120, resize resolution: (128, 72)
80
+ [2023-02-22 21:39:30,502][44372] Doom resolution: 160x120, resize resolution: (128, 72)
81
+ [2023-02-22 21:39:30,507][44380] Doom resolution: 160x120, resize resolution: (128, 72)
82
+ [2023-02-22 21:39:30,535][44362] Doom resolution: 160x120, resize resolution: (128, 72)
83
+ [2023-02-22 21:39:30,603][44358] Doom resolution: 160x120, resize resolution: (128, 72)
84
+ [2023-02-22 21:39:32,430][44374] Decorrelating experience for 0 frames...
85
+ [2023-02-22 21:39:32,432][44370] Decorrelating experience for 0 frames...
86
+ [2023-02-22 21:39:32,434][44365] Decorrelating experience for 0 frames...
87
+ [2023-02-22 21:39:32,434][44359] Decorrelating experience for 0 frames...
88
+ [2023-02-22 21:39:32,682][44372] Decorrelating experience for 0 frames...
89
+ [2023-02-22 21:39:32,687][44380] Decorrelating experience for 0 frames...
90
+ [2023-02-22 21:39:32,693][44362] Decorrelating experience for 0 frames...
91
+ [2023-02-22 21:39:32,710][44358] Decorrelating experience for 0 frames...
92
+ [2023-02-22 21:39:34,060][44365] Decorrelating experience for 32 frames...
93
+ [2023-02-22 21:39:34,061][44359] Decorrelating experience for 32 frames...
94
+ [2023-02-22 21:39:34,055][44374] Decorrelating experience for 32 frames...
95
+ [2023-02-22 21:39:34,351][44370] Decorrelating experience for 32 frames...
96
+ [2023-02-22 21:39:34,371][44372] Decorrelating experience for 32 frames...
97
+ [2023-02-22 21:39:34,444][44358] Decorrelating experience for 32 frames...
98
+ [2023-02-22 21:39:34,503][44362] Decorrelating experience for 32 frames...
99
+ [2023-02-22 21:39:35,178][44380] Decorrelating experience for 32 frames...
100
+ [2023-02-22 21:39:35,471][44358] Decorrelating experience for 64 frames...
101
+ [2023-02-22 21:39:35,815][44380] Decorrelating experience for 64 frames...
102
+ [2023-02-22 21:39:35,825][44365] Decorrelating experience for 64 frames...
103
+ [2023-02-22 21:39:35,879][44359] Decorrelating experience for 64 frames...
104
+ [2023-02-22 21:39:35,882][44374] Decorrelating experience for 64 frames...
105
+ [2023-02-22 21:39:36,094][44370] Decorrelating experience for 64 frames...
106
+ [2023-02-22 21:39:36,872][44380] Decorrelating experience for 96 frames...
107
+ [2023-02-22 21:39:36,885][44358] Decorrelating experience for 96 frames...
108
+ [2023-02-22 21:39:37,033][44372] Decorrelating experience for 64 frames...
109
+ [2023-02-22 21:39:37,168][44365] Decorrelating experience for 96 frames...
110
+ [2023-02-22 21:39:37,251][44359] Decorrelating experience for 96 frames...
111
+ [2023-02-22 21:39:37,288][44374] Decorrelating experience for 96 frames...
112
+ [2023-02-22 21:39:37,810][44362] Decorrelating experience for 64 frames...
113
+ [2023-02-22 21:39:38,106][44372] Decorrelating experience for 96 frames...
114
+ [2023-02-22 21:39:38,408][44370] Decorrelating experience for 96 frames...
115
+ [2023-02-22 21:39:38,514][44362] Decorrelating experience for 96 frames...
116
+ [2023-02-22 21:39:42,397][44343] Signal inference workers to stop experience collection...
117
+ [2023-02-22 21:39:42,420][44357] InferenceWorker_p0-w0: stopping experience collection
118
+ [2023-02-22 21:39:45,642][44343] Signal inference workers to resume experience collection...
119
+ [2023-02-22 21:39:45,646][44357] InferenceWorker_p0-w0: resuming experience collection
120
+ [2023-02-22 21:39:58,407][44357] Updated weights for policy 0, policy_version 10 (0.0583)
121
+ [2023-02-22 21:40:10,911][44357] Updated weights for policy 0, policy_version 20 (0.0022)
122
+ [2023-02-22 21:40:20,180][44343] Saving new best policy, reward=1.570!
123
+ [2023-02-22 21:40:22,503][44357] Updated weights for policy 0, policy_version 30 (0.0027)
124
+ [2023-02-22 21:40:25,197][44343] Saving new best policy, reward=1.799!
125
+ [2023-02-22 21:40:30,180][44343] Saving new best policy, reward=2.195!
126
+ [2023-02-22 21:40:35,188][44343] Saving new best policy, reward=2.481!
127
+ [2023-02-22 21:40:36,833][44357] Updated weights for policy 0, policy_version 40 (0.0012)
128
+ [2023-02-22 21:40:40,177][44343] Saving new best policy, reward=3.021!
129
+ [2023-02-22 21:40:45,185][44343] Saving new best policy, reward=3.234!
130
+ [2023-02-22 21:40:50,415][44357] Updated weights for policy 0, policy_version 50 (0.0028)
131
+ [2023-02-22 21:41:00,181][44343] Saving new best policy, reward=3.364!
132
+ [2023-02-22 21:41:01,966][44357] Updated weights for policy 0, policy_version 60 (0.0016)
133
+ [2023-02-22 21:41:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000062_253952.pth...
134
+ [2023-02-22 21:41:16,110][44357] Updated weights for policy 0, policy_version 70 (0.0020)
135
+ [2023-02-22 21:41:20,174][44343] Saving new best policy, reward=3.581!
136
+ [2023-02-22 21:41:28,833][44357] Updated weights for policy 0, policy_version 80 (0.0026)
137
+ [2023-02-22 21:41:30,184][44343] Saving new best policy, reward=3.665!
138
+ [2023-02-22 21:41:40,180][44343] Saving new best policy, reward=3.834!
139
+ [2023-02-22 21:41:40,808][44357] Updated weights for policy 0, policy_version 90 (0.0020)
140
+ [2023-02-22 21:41:45,270][44343] Saving new best policy, reward=4.019!
141
+ [2023-02-22 21:41:55,240][44357] Updated weights for policy 0, policy_version 100 (0.0018)
142
+ [2023-02-22 21:41:55,243][44343] Saving new best policy, reward=4.030!
143
+ [2023-02-22 21:42:00,177][44343] Saving new best policy, reward=4.640!
144
+ [2023-02-22 21:42:08,134][44357] Updated weights for policy 0, policy_version 110 (0.0022)
145
+ [2023-02-22 21:42:19,932][44357] Updated weights for policy 0, policy_version 120 (0.0012)
146
+ [2023-02-22 21:42:33,978][44357] Updated weights for policy 0, policy_version 130 (0.0024)
147
+ [2023-02-22 21:42:45,804][44357] Updated weights for policy 0, policy_version 140 (0.0020)
148
+ [2023-02-22 21:42:58,125][44357] Updated weights for policy 0, policy_version 150 (0.0019)
149
+ [2023-02-22 21:43:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000155_634880.pth...
150
+ [2023-02-22 21:43:12,422][44357] Updated weights for policy 0, policy_version 160 (0.0020)
151
+ [2023-02-22 21:43:24,033][44357] Updated weights for policy 0, policy_version 170 (0.0018)
152
+ [2023-02-22 21:43:36,766][44357] Updated weights for policy 0, policy_version 180 (0.0028)
153
+ [2023-02-22 21:43:50,179][44343] Saving new best policy, reward=4.947!
154
+ [2023-02-22 21:43:51,259][44357] Updated weights for policy 0, policy_version 190 (0.0016)
155
+ [2023-02-22 21:44:02,262][44357] Updated weights for policy 0, policy_version 200 (0.0014)
156
+ [2023-02-22 21:44:15,599][44357] Updated weights for policy 0, policy_version 210 (0.0013)
157
+ [2023-02-22 21:44:28,836][44357] Updated weights for policy 0, policy_version 220 (0.0013)
158
+ [2023-02-22 21:44:35,184][44343] Saving new best policy, reward=5.529!
159
+ [2023-02-22 21:44:39,706][44357] Updated weights for policy 0, policy_version 230 (0.0015)
160
+ [2023-02-22 21:44:53,759][44357] Updated weights for policy 0, policy_version 240 (0.0017)
161
+ [2023-02-22 21:45:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000249_1019904.pth...
162
+ [2023-02-22 21:45:05,432][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000062_253952.pth
163
+ [2023-02-22 21:45:06,592][44357] Updated weights for policy 0, policy_version 250 (0.0024)
164
+ [2023-02-22 21:45:18,363][44357] Updated weights for policy 0, policy_version 260 (0.0036)
165
+ [2023-02-22 21:45:32,402][44357] Updated weights for policy 0, policy_version 270 (0.0014)
166
+ [2023-02-22 21:45:44,524][44357] Updated weights for policy 0, policy_version 280 (0.0018)
167
+ [2023-02-22 21:45:55,183][44343] Saving new best policy, reward=5.693!
168
+ [2023-02-22 21:45:56,854][44357] Updated weights for policy 0, policy_version 290 (0.0024)
169
+ [2023-02-22 21:46:05,299][44343] Saving new best policy, reward=5.702!
170
+ [2023-02-22 21:46:10,895][44357] Updated weights for policy 0, policy_version 300 (0.0042)
171
+ [2023-02-22 21:46:22,027][44357] Updated weights for policy 0, policy_version 310 (0.0027)
172
+ [2023-02-22 21:46:30,183][44343] Saving new best policy, reward=6.326!
173
+ [2023-02-22 21:46:34,838][44357] Updated weights for policy 0, policy_version 320 (0.0028)
174
+ [2023-02-22 21:46:48,709][44357] Updated weights for policy 0, policy_version 330 (0.0034)
175
+ [2023-02-22 21:46:55,184][44343] Saving new best policy, reward=6.680!
176
+ [2023-02-22 21:46:59,173][44357] Updated weights for policy 0, policy_version 340 (0.0012)
177
+ [2023-02-22 21:47:05,198][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000343_1404928.pth...
178
+ [2023-02-22 21:47:05,367][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000155_634880.pth
179
+ [2023-02-22 21:47:13,125][44357] Updated weights for policy 0, policy_version 350 (0.0023)
180
+ [2023-02-22 21:47:26,190][44357] Updated weights for policy 0, policy_version 360 (0.0022)
181
+ [2023-02-22 21:47:37,325][44357] Updated weights for policy 0, policy_version 370 (0.0017)
182
+ [2023-02-22 21:47:51,298][44357] Updated weights for policy 0, policy_version 380 (0.0031)
183
+ [2023-02-22 21:48:04,032][44357] Updated weights for policy 0, policy_version 390 (0.0029)
184
+ [2023-02-22 21:48:10,200][44343] Saving new best policy, reward=6.724!
185
+ [2023-02-22 21:48:15,427][44357] Updated weights for policy 0, policy_version 400 (0.0029)
186
+ [2023-02-22 21:48:25,182][44343] Saving new best policy, reward=7.131!
187
+ [2023-02-22 21:48:29,120][44357] Updated weights for policy 0, policy_version 410 (0.0030)
188
+ [2023-02-22 21:48:40,241][44357] Updated weights for policy 0, policy_version 420 (0.0019)
189
+ [2023-02-22 21:48:53,312][44357] Updated weights for policy 0, policy_version 430 (0.0020)
190
+ [2023-02-22 21:49:05,192][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000438_1794048.pth...
191
+ [2023-02-22 21:49:05,454][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000249_1019904.pth
192
+ [2023-02-22 21:49:07,053][44357] Updated weights for policy 0, policy_version 440 (0.0017)
193
+ [2023-02-22 21:49:17,206][44357] Updated weights for policy 0, policy_version 450 (0.0017)
194
+ [2023-02-22 21:49:30,957][44357] Updated weights for policy 0, policy_version 460 (0.0036)
195
+ [2023-02-22 21:49:35,185][44343] Saving new best policy, reward=7.273!
196
+ [2023-02-22 21:49:44,197][44357] Updated weights for policy 0, policy_version 470 (0.0012)
197
+ [2023-02-22 21:49:55,073][44357] Updated weights for policy 0, policy_version 480 (0.0033)
198
+ [2023-02-22 21:50:09,114][44357] Updated weights for policy 0, policy_version 490 (0.0021)
199
+ [2023-02-22 21:50:10,175][44343] Saving new best policy, reward=7.677!
200
+ [2023-02-22 21:50:21,185][44357] Updated weights for policy 0, policy_version 500 (0.0015)
201
+ [2023-02-22 21:50:33,198][44357] Updated weights for policy 0, policy_version 510 (0.0018)
202
+ [2023-02-22 21:50:46,907][44357] Updated weights for policy 0, policy_version 520 (0.0022)
203
+ [2023-02-22 21:50:55,193][44343] Saving new best policy, reward=7.977!
204
+ [2023-02-22 21:50:58,148][44357] Updated weights for policy 0, policy_version 530 (0.0014)
205
+ [2023-02-22 21:51:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000534_2187264.pth...
206
+ [2023-02-22 21:51:05,403][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000343_1404928.pth
207
+ [2023-02-22 21:51:10,973][44357] Updated weights for policy 0, policy_version 540 (0.0022)
208
+ [2023-02-22 21:51:24,871][44357] Updated weights for policy 0, policy_version 550 (0.0011)
209
+ [2023-02-22 21:51:35,244][44357] Updated weights for policy 0, policy_version 560 (0.0019)
210
+ [2023-02-22 21:51:48,935][44357] Updated weights for policy 0, policy_version 570 (0.0018)
211
+ [2023-02-22 21:52:00,180][44343] Saving new best policy, reward=8.152!
212
+ [2023-02-22 21:52:01,787][44357] Updated weights for policy 0, policy_version 580 (0.0016)
213
+ [2023-02-22 21:52:13,228][44357] Updated weights for policy 0, policy_version 590 (0.0019)
214
+ [2023-02-22 21:52:20,181][44343] Saving new best policy, reward=8.358!
215
+ [2023-02-22 21:52:27,177][44357] Updated weights for policy 0, policy_version 600 (0.0012)
216
+ [2023-02-22 21:52:39,279][44357] Updated weights for policy 0, policy_version 610 (0.0013)
217
+ [2023-02-22 21:52:51,424][44357] Updated weights for policy 0, policy_version 620 (0.0036)
218
+ [2023-02-22 21:53:00,178][44343] Saving new best policy, reward=9.253!
219
+ [2023-02-22 21:53:05,166][44357] Updated weights for policy 0, policy_version 630 (0.0018)
220
+ [2023-02-22 21:53:05,183][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000630_2580480.pth...
221
+ [2023-02-22 21:53:05,353][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000438_1794048.pth
222
+ [2023-02-22 21:53:16,437][44357] Updated weights for policy 0, policy_version 640 (0.0013)
223
+ [2023-02-22 21:53:29,099][44357] Updated weights for policy 0, policy_version 650 (0.0014)
224
+ [2023-02-22 21:53:42,787][44357] Updated weights for policy 0, policy_version 660 (0.0023)
225
+ [2023-02-22 21:53:53,088][44357] Updated weights for policy 0, policy_version 670 (0.0012)
226
+ [2023-02-22 21:54:06,818][44357] Updated weights for policy 0, policy_version 680 (0.0038)
227
+ [2023-02-22 21:54:19,375][44357] Updated weights for policy 0, policy_version 690 (0.0026)
228
+ [2023-02-22 21:54:20,180][44343] Saving new best policy, reward=9.967!
229
+ [2023-02-22 21:54:30,617][44357] Updated weights for policy 0, policy_version 700 (0.0012)
230
+ [2023-02-22 21:54:44,488][44357] Updated weights for policy 0, policy_version 710 (0.0034)
231
+ [2023-02-22 21:54:56,158][44357] Updated weights for policy 0, policy_version 720 (0.0031)
232
+ [2023-02-22 21:55:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000726_2973696.pth...
233
+ [2023-02-22 21:55:05,392][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000534_2187264.pth
234
+ [2023-02-22 21:55:08,422][44357] Updated weights for policy 0, policy_version 730 (0.0025)
235
+ [2023-02-22 21:55:22,098][44357] Updated weights for policy 0, policy_version 740 (0.0025)
236
+ [2023-02-22 21:55:32,470][44357] Updated weights for policy 0, policy_version 750 (0.0014)
237
+ [2023-02-22 21:55:46,132][44357] Updated weights for policy 0, policy_version 760 (0.0019)
238
+ [2023-02-22 21:55:59,242][44357] Updated weights for policy 0, policy_version 770 (0.0018)
239
+ [2023-02-22 21:56:09,892][44357] Updated weights for policy 0, policy_version 780 (0.0019)
240
+ [2023-02-22 21:56:23,536][44357] Updated weights for policy 0, policy_version 790 (0.0036)
241
+ [2023-02-22 21:56:35,273][44357] Updated weights for policy 0, policy_version 800 (0.0020)
242
+ [2023-02-22 21:56:47,544][44357] Updated weights for policy 0, policy_version 810 (0.0026)
243
+ [2023-02-22 21:57:01,255][44357] Updated weights for policy 0, policy_version 820 (0.0018)
244
+ [2023-02-22 21:57:05,180][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000823_3371008.pth...
245
+ [2023-02-22 21:57:05,331][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000630_2580480.pth
246
+ [2023-02-22 21:57:12,428][44357] Updated weights for policy 0, policy_version 830 (0.0022)
247
+ [2023-02-22 21:57:25,276][44357] Updated weights for policy 0, policy_version 840 (0.0030)
248
+ [2023-02-22 21:57:38,985][44357] Updated weights for policy 0, policy_version 850 (0.0037)
249
+ [2023-02-22 21:57:49,296][44357] Updated weights for policy 0, policy_version 860 (0.0013)
250
+ [2023-02-22 21:58:03,074][44357] Updated weights for policy 0, policy_version 870 (0.0027)
251
+ [2023-02-22 21:58:15,882][44357] Updated weights for policy 0, policy_version 880 (0.0013)
252
+ [2023-02-22 21:58:26,821][44357] Updated weights for policy 0, policy_version 890 (0.0020)
253
+ [2023-02-22 21:58:40,175][44343] Saving new best policy, reward=10.147!
254
+ [2023-02-22 21:58:40,744][44357] Updated weights for policy 0, policy_version 900 (0.0021)
255
+ [2023-02-22 21:58:45,189][44343] Saving new best policy, reward=10.394!
256
+ [2023-02-22 21:58:52,878][44357] Updated weights for policy 0, policy_version 910 (0.0020)
257
+ [2023-02-22 21:59:04,728][44357] Updated weights for policy 0, policy_version 920 (0.0018)
258
+ [2023-02-22 21:59:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000920_3768320.pth...
259
+ [2023-02-22 21:59:05,369][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000726_2973696.pth
260
+ [2023-02-22 21:59:18,648][44357] Updated weights for policy 0, policy_version 930 (0.0012)
261
+ [2023-02-22 21:59:29,534][44357] Updated weights for policy 0, policy_version 940 (0.0018)
262
+ [2023-02-22 21:59:42,369][44357] Updated weights for policy 0, policy_version 950 (0.0038)
263
+ [2023-02-22 21:59:50,177][44343] Saving new best policy, reward=10.410!
264
+ [2023-02-22 21:59:56,324][44357] Updated weights for policy 0, policy_version 960 (0.0018)
265
+ [2023-02-22 22:00:00,175][44343] Saving new best policy, reward=10.565!
266
+ [2023-02-22 22:00:06,253][44357] Updated weights for policy 0, policy_version 970 (0.0017)
267
+ [2023-02-22 22:00:20,191][44357] Updated weights for policy 0, policy_version 980 (0.0040)
268
+ [2023-02-22 22:00:32,711][44357] Updated weights for policy 0, policy_version 990 (0.0014)
269
+ [2023-02-22 22:00:43,741][44357] Updated weights for policy 0, policy_version 1000 (0.0022)
270
+ [2023-02-22 22:00:57,526][44357] Updated weights for policy 0, policy_version 1010 (0.0012)
271
+ [2023-02-22 22:01:05,180][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001017_4165632.pth...
272
+ [2023-02-22 22:01:05,335][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000823_3371008.pth
273
+ [2023-02-22 22:01:08,715][44357] Updated weights for policy 0, policy_version 1020 (0.0035)
274
+ [2023-02-22 22:01:21,311][44357] Updated weights for policy 0, policy_version 1030 (0.0016)
275
+ [2023-02-22 22:01:25,190][44343] Saving new best policy, reward=10.667!
276
+ [2023-02-22 22:01:30,195][44343] Saving new best policy, reward=11.531!
277
+ [2023-02-22 22:01:34,973][44357] Updated weights for policy 0, policy_version 1040 (0.0034)
278
+ [2023-02-22 22:01:45,187][44343] Saving new best policy, reward=11.983!
279
+ [2023-02-22 22:01:45,549][44357] Updated weights for policy 0, policy_version 1050 (0.0012)
280
+ [2023-02-22 22:01:59,401][44357] Updated weights for policy 0, policy_version 1060 (0.0036)
281
+ [2023-02-22 22:02:12,148][44357] Updated weights for policy 0, policy_version 1070 (0.0015)
282
+ [2023-02-22 22:02:23,117][44357] Updated weights for policy 0, policy_version 1080 (0.0012)
283
+ [2023-02-22 22:02:36,996][44357] Updated weights for policy 0, policy_version 1090 (0.0038)
284
+ [2023-02-22 22:02:48,187][44357] Updated weights for policy 0, policy_version 1100 (0.0023)
285
+ [2023-02-22 22:03:00,451][44357] Updated weights for policy 0, policy_version 1110 (0.0032)
286
+ [2023-02-22 22:03:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001114_4562944.pth...
287
+ [2023-02-22 22:03:05,334][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000920_3768320.pth
288
+ [2023-02-22 22:03:14,138][44357] Updated weights for policy 0, policy_version 1120 (0.0019)
289
+ [2023-02-22 22:03:24,170][44357] Updated weights for policy 0, policy_version 1130 (0.0013)
290
+ [2023-02-22 22:03:37,655][44357] Updated weights for policy 0, policy_version 1140 (0.0018)
291
+ [2023-02-22 22:03:50,128][44357] Updated weights for policy 0, policy_version 1150 (0.0012)
292
+ [2023-02-22 22:04:01,346][44357] Updated weights for policy 0, policy_version 1160 (0.0023)
293
+ [2023-02-22 22:04:15,080][44357] Updated weights for policy 0, policy_version 1170 (0.0013)
294
+ [2023-02-22 22:04:26,034][44357] Updated weights for policy 0, policy_version 1180 (0.0028)
295
+ [2023-02-22 22:04:38,850][44357] Updated weights for policy 0, policy_version 1190 (0.0013)
296
+ [2023-02-22 22:04:52,728][44357] Updated weights for policy 0, policy_version 1200 (0.0013)
297
+ [2023-02-22 22:05:02,699][44357] Updated weights for policy 0, policy_version 1210 (0.0028)
298
+ [2023-02-22 22:05:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001211_4960256.pth...
299
+ [2023-02-22 22:05:05,398][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001017_4165632.pth
300
+ [2023-02-22 22:05:16,469][44357] Updated weights for policy 0, policy_version 1220 (0.0012)
301
+ [2023-02-22 22:05:20,173][44343] Saving new best policy, reward=13.082!
302
+ [2023-02-22 22:05:28,886][44357] Updated weights for policy 0, policy_version 1230 (0.0016)
303
+ [2023-02-22 22:05:39,747][44357] Updated weights for policy 0, policy_version 1240 (0.0026)
304
+ [2023-02-22 22:05:53,590][44357] Updated weights for policy 0, policy_version 1250 (0.0016)
305
+ [2023-02-22 22:06:04,499][44357] Updated weights for policy 0, policy_version 1260 (0.0012)
306
+ [2023-02-22 22:06:10,181][44343] Saving new best policy, reward=13.267!
307
+ [2023-02-22 22:06:17,223][44357] Updated weights for policy 0, policy_version 1270 (0.0021)
308
+ [2023-02-22 22:06:30,868][44357] Updated weights for policy 0, policy_version 1280 (0.0027)
309
+ [2023-02-22 22:06:40,821][44357] Updated weights for policy 0, policy_version 1290 (0.0014)
310
+ [2023-02-22 22:06:54,580][44357] Updated weights for policy 0, policy_version 1300 (0.0029)
311
+ [2023-02-22 22:07:05,302][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001309_5361664.pth...
312
+ [2023-02-22 22:07:05,580][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001114_4562944.pth
313
+ [2023-02-22 22:07:07,009][44357] Updated weights for policy 0, policy_version 1310 (0.0023)
314
+ [2023-02-22 22:07:18,242][44357] Updated weights for policy 0, policy_version 1320 (0.0014)
315
+ [2023-02-22 22:07:31,652][44357] Updated weights for policy 0, policy_version 1330 (0.0013)
316
+ [2023-02-22 22:07:40,183][44343] Saving new best policy, reward=13.692!
317
+ [2023-02-22 22:07:43,091][44357] Updated weights for policy 0, policy_version 1340 (0.0015)
318
+ [2023-02-22 22:07:55,898][44357] Updated weights for policy 0, policy_version 1350 (0.0031)
319
+ [2023-02-22 22:08:09,784][44357] Updated weights for policy 0, policy_version 1360 (0.0027)
320
+ [2023-02-22 22:08:19,962][44357] Updated weights for policy 0, policy_version 1370 (0.0019)
321
+ [2023-02-22 22:08:33,221][44357] Updated weights for policy 0, policy_version 1380 (0.0028)
322
+ [2023-02-22 22:08:45,882][44357] Updated weights for policy 0, policy_version 1390 (0.0023)
323
+ [2023-02-22 22:08:56,933][44357] Updated weights for policy 0, policy_version 1400 (0.0014)
324
+ [2023-02-22 22:09:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001405_5754880.pth...
325
+ [2023-02-22 22:09:05,382][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001211_4960256.pth
326
+ [2023-02-22 22:09:10,651][44357] Updated weights for policy 0, policy_version 1410 (0.0014)
327
+ [2023-02-22 22:09:22,062][44357] Updated weights for policy 0, policy_version 1420 (0.0012)
328
+ [2023-02-22 22:09:34,396][44357] Updated weights for policy 0, policy_version 1430 (0.0024)
329
+ [2023-02-22 22:09:40,177][44343] Saving new best policy, reward=13.814!
330
+ [2023-02-22 22:09:48,019][44357] Updated weights for policy 0, policy_version 1440 (0.0012)
331
+ [2023-02-22 22:09:50,187][44343] Saving new best policy, reward=14.316!
332
+ [2023-02-22 22:09:58,219][44357] Updated weights for policy 0, policy_version 1450 (0.0024)
333
+ [2023-02-22 22:10:11,801][44357] Updated weights for policy 0, policy_version 1460 (0.0020)
334
+ [2023-02-22 22:10:24,446][44357] Updated weights for policy 0, policy_version 1470 (0.0031)
335
+ [2023-02-22 22:10:35,200][44343] Saving new best policy, reward=14.817!
336
+ [2023-02-22 22:10:35,202][44357] Updated weights for policy 0, policy_version 1480 (0.0017)
337
+ [2023-02-22 22:10:48,866][44357] Updated weights for policy 0, policy_version 1490 (0.0020)
338
+ [2023-02-22 22:11:00,119][44357] Updated weights for policy 0, policy_version 1500 (0.0017)
339
+ [2023-02-22 22:11:05,261][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001503_6156288.pth...
340
+ [2023-02-22 22:11:05,509][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001309_5361664.pth
341
+ [2023-02-22 22:11:10,178][44343] Saving new best policy, reward=14.984!
342
+ [2023-02-22 22:11:12,591][44357] Updated weights for policy 0, policy_version 1510 (0.0019)
343
+ [2023-02-22 22:11:26,261][44357] Updated weights for policy 0, policy_version 1520 (0.0027)
344
+ [2023-02-22 22:11:36,250][44357] Updated weights for policy 0, policy_version 1530 (0.0019)
345
+ [2023-02-22 22:11:49,925][44357] Updated weights for policy 0, policy_version 1540 (0.0016)
346
+ [2023-02-22 22:12:02,863][44357] Updated weights for policy 0, policy_version 1550 (0.0015)
347
+ [2023-02-22 22:12:13,806][44357] Updated weights for policy 0, policy_version 1560 (0.0012)
348
+ [2023-02-22 22:12:28,531][44357] Updated weights for policy 0, policy_version 1570 (0.0021)
349
+ [2023-02-22 22:12:42,652][44357] Updated weights for policy 0, policy_version 1580 (0.0024)
350
+ [2023-02-22 22:12:53,704][44357] Updated weights for policy 0, policy_version 1590 (0.0021)
351
+ [2023-02-22 22:13:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001597_6541312.pth...
352
+ [2023-02-22 22:13:05,492][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001405_5754880.pth
353
+ [2023-02-22 22:13:08,100][44357] Updated weights for policy 0, policy_version 1600 (0.0018)
354
+ [2023-02-22 22:13:20,543][44357] Updated weights for policy 0, policy_version 1610 (0.0017)
355
+ [2023-02-22 22:13:31,704][44357] Updated weights for policy 0, policy_version 1620 (0.0025)
356
+ [2023-02-22 22:13:45,190][44343] Saving new best policy, reward=15.358!
357
+ [2023-02-22 22:13:45,524][44357] Updated weights for policy 0, policy_version 1630 (0.0019)
358
+ [2023-02-22 22:13:56,449][44357] Updated weights for policy 0, policy_version 1640 (0.0013)
359
+ [2023-02-22 22:14:09,261][44357] Updated weights for policy 0, policy_version 1650 (0.0014)
360
+ [2023-02-22 22:14:22,595][44357] Updated weights for policy 0, policy_version 1660 (0.0027)
361
+ [2023-02-22 22:14:32,964][44357] Updated weights for policy 0, policy_version 1670 (0.0018)
362
+ [2023-02-22 22:14:40,195][44343] Saving new best policy, reward=15.815!
363
+ [2023-02-22 22:14:46,637][44357] Updated weights for policy 0, policy_version 1680 (0.0028)
364
+ [2023-02-22 22:14:58,429][44357] Updated weights for policy 0, policy_version 1690 (0.0027)
365
+ [2023-02-22 22:15:05,195][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001695_6942720.pth...
366
+ [2023-02-22 22:15:05,373][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001503_6156288.pth
367
+ [2023-02-22 22:15:10,380][44357] Updated weights for policy 0, policy_version 1700 (0.0017)
368
+ [2023-02-22 22:15:24,328][44357] Updated weights for policy 0, policy_version 1710 (0.0028)
369
+ [2023-02-22 22:15:34,965][44357] Updated weights for policy 0, policy_version 1720 (0.0031)
370
+ [2023-02-22 22:15:47,961][44357] Updated weights for policy 0, policy_version 1730 (0.0021)
371
+ [2023-02-22 22:16:01,469][44357] Updated weights for policy 0, policy_version 1740 (0.0023)
372
+ [2023-02-22 22:16:12,088][44357] Updated weights for policy 0, policy_version 1750 (0.0018)
373
+ [2023-02-22 22:16:25,789][44357] Updated weights for policy 0, policy_version 1760 (0.0027)
374
+ [2023-02-22 22:16:37,997][44357] Updated weights for policy 0, policy_version 1770 (0.0026)
375
+ [2023-02-22 22:16:49,718][44357] Updated weights for policy 0, policy_version 1780 (0.0020)
376
+ [2023-02-22 22:17:03,608][44357] Updated weights for policy 0, policy_version 1790 (0.0023)
377
+ [2023-02-22 22:17:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001791_7335936.pth...
378
+ [2023-02-22 22:17:05,337][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001597_6541312.pth
379
+ [2023-02-22 22:17:15,426][44357] Updated weights for policy 0, policy_version 1800 (0.0028)
380
+ [2023-02-22 22:17:28,352][44357] Updated weights for policy 0, policy_version 1810 (0.0015)
381
+ [2023-02-22 22:17:42,564][44357] Updated weights for policy 0, policy_version 1820 (0.0012)
382
+ [2023-02-22 22:17:53,088][44357] Updated weights for policy 0, policy_version 1830 (0.0018)
383
+ [2023-02-22 22:18:06,524][44357] Updated weights for policy 0, policy_version 1840 (0.0026)
384
+ [2023-02-22 22:18:19,831][44357] Updated weights for policy 0, policy_version 1850 (0.0026)
385
+ [2023-02-22 22:18:30,737][44357] Updated weights for policy 0, policy_version 1860 (0.0014)
386
+ [2023-02-22 22:18:44,377][44357] Updated weights for policy 0, policy_version 1870 (0.0040)
387
+ [2023-02-22 22:18:56,344][44357] Updated weights for policy 0, policy_version 1880 (0.0019)
388
+ [2023-02-22 22:19:05,184][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001887_7729152.pth...
389
+ [2023-02-22 22:19:05,374][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001695_6942720.pth
390
+ [2023-02-22 22:19:08,080][44357] Updated weights for policy 0, policy_version 1890 (0.0022)
391
+ [2023-02-22 22:19:21,773][44357] Updated weights for policy 0, policy_version 1900 (0.0021)
392
+ [2023-02-22 22:19:32,368][44357] Updated weights for policy 0, policy_version 1910 (0.0028)
393
+ [2023-02-22 22:19:45,622][44357] Updated weights for policy 0, policy_version 1920 (0.0014)
394
+ [2023-02-22 22:19:58,770][44357] Updated weights for policy 0, policy_version 1930 (0.0021)
395
+ [2023-02-22 22:20:09,516][44357] Updated weights for policy 0, policy_version 1940 (0.0013)
396
+ [2023-02-22 22:20:23,161][44357] Updated weights for policy 0, policy_version 1950 (0.0014)
397
+ [2023-02-22 22:20:35,289][44357] Updated weights for policy 0, policy_version 1960 (0.0012)
398
+ [2023-02-22 22:20:46,932][44357] Updated weights for policy 0, policy_version 1970 (0.0019)
399
+ [2023-02-22 22:21:00,847][44357] Updated weights for policy 0, policy_version 1980 (0.0020)
400
+ [2023-02-22 22:21:05,191][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001984_8126464.pth...
401
+ [2023-02-22 22:21:05,357][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001791_7335936.pth
402
+ [2023-02-22 22:21:12,097][44357] Updated weights for policy 0, policy_version 1990 (0.0012)
403
+ [2023-02-22 22:21:24,938][44357] Updated weights for policy 0, policy_version 2000 (0.0017)
404
+ [2023-02-22 22:21:38,170][44357] Updated weights for policy 0, policy_version 2010 (0.0018)
405
+ [2023-02-22 22:21:48,538][44357] Updated weights for policy 0, policy_version 2020 (0.0024)
406
+ [2023-02-22 22:21:50,174][44343] Saving new best policy, reward=15.986!
407
+ [2023-02-22 22:21:55,187][44343] Saving new best policy, reward=16.024!
408
+ [2023-02-22 22:22:02,271][44357] Updated weights for policy 0, policy_version 2030 (0.0027)
409
+ [2023-02-22 22:22:14,337][44357] Updated weights for policy 0, policy_version 2040 (0.0024)
410
+ [2023-02-22 22:22:26,220][44357] Updated weights for policy 0, policy_version 2050 (0.0022)
411
+ [2023-02-22 22:22:40,009][44357] Updated weights for policy 0, policy_version 2060 (0.0031)
412
+ [2023-02-22 22:22:50,319][44357] Updated weights for policy 0, policy_version 2070 (0.0014)
413
+ [2023-02-22 22:22:55,196][44343] Saving new best policy, reward=16.279!
414
+ [2023-02-22 22:23:03,597][44357] Updated weights for policy 0, policy_version 2080 (0.0018)
415
+ [2023-02-22 22:23:05,182][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002081_8523776.pth...
416
+ [2023-02-22 22:23:05,358][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001887_7729152.pth
417
+ [2023-02-22 22:23:05,365][44343] Saving new best policy, reward=16.580!
418
+ [2023-02-22 22:23:10,183][44343] Saving new best policy, reward=17.174!
419
+ [2023-02-22 22:23:17,504][44357] Updated weights for policy 0, policy_version 2090 (0.0042)
420
+ [2023-02-22 22:23:28,031][44357] Updated weights for policy 0, policy_version 2100 (0.0016)
421
+ [2023-02-22 22:23:41,630][44357] Updated weights for policy 0, policy_version 2110 (0.0023)
422
+ [2023-02-22 22:23:54,289][44357] Updated weights for policy 0, policy_version 2120 (0.0018)
423
+ [2023-02-22 22:24:05,664][44357] Updated weights for policy 0, policy_version 2130 (0.0017)
424
+ [2023-02-22 22:24:19,337][44357] Updated weights for policy 0, policy_version 2140 (0.0013)
425
+ [2023-02-22 22:24:30,018][44357] Updated weights for policy 0, policy_version 2150 (0.0026)
426
+ [2023-02-22 22:24:43,012][44357] Updated weights for policy 0, policy_version 2160 (0.0028)
427
+ [2023-02-22 22:24:56,533][44357] Updated weights for policy 0, policy_version 2170 (0.0014)
428
+ [2023-02-22 22:25:05,192][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002178_8921088.pth...
429
+ [2023-02-22 22:25:05,348][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001984_8126464.pth
430
+ [2023-02-22 22:25:06,980][44357] Updated weights for policy 0, policy_version 2180 (0.0020)
431
+ [2023-02-22 22:25:20,932][44357] Updated weights for policy 0, policy_version 2190 (0.0038)
432
+ [2023-02-22 22:25:33,457][44357] Updated weights for policy 0, policy_version 2200 (0.0017)
433
+ [2023-02-22 22:25:44,759][44357] Updated weights for policy 0, policy_version 2210 (0.0029)
434
+ [2023-02-22 22:25:58,677][44357] Updated weights for policy 0, policy_version 2220 (0.0012)
435
+ [2023-02-22 22:26:09,960][44357] Updated weights for policy 0, policy_version 2230 (0.0019)
436
+ [2023-02-22 22:26:22,317][44357] Updated weights for policy 0, policy_version 2240 (0.0022)
437
+ [2023-02-22 22:26:36,070][44357] Updated weights for policy 0, policy_version 2250 (0.0022)
438
+ [2023-02-22 22:26:45,762][44357] Updated weights for policy 0, policy_version 2260 (0.0020)
439
+ [2023-02-22 22:26:59,512][44357] Updated weights for policy 0, policy_version 2270 (0.0018)
440
+ [2023-02-22 22:27:05,190][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002275_9318400.pth...
441
+ [2023-02-22 22:27:05,351][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002081_8523776.pth
442
+ [2023-02-22 22:27:05,375][44343] Saving new best policy, reward=17.231!
443
+ [2023-02-22 22:27:12,097][44357] Updated weights for policy 0, policy_version 2280 (0.0022)
444
+ [2023-02-22 22:27:23,133][44357] Updated weights for policy 0, policy_version 2290 (0.0024)
445
+ [2023-02-22 22:27:36,807][44357] Updated weights for policy 0, policy_version 2300 (0.0027)
446
+ [2023-02-22 22:27:48,016][44357] Updated weights for policy 0, policy_version 2310 (0.0012)
447
+ [2023-02-22 22:28:00,458][44357] Updated weights for policy 0, policy_version 2320 (0.0020)
448
+ [2023-02-22 22:28:14,185][44357] Updated weights for policy 0, policy_version 2330 (0.0023)
449
+ [2023-02-22 22:28:24,436][44357] Updated weights for policy 0, policy_version 2340 (0.0020)
450
+ [2023-02-22 22:28:38,101][44357] Updated weights for policy 0, policy_version 2350 (0.0029)
451
+ [2023-02-22 22:28:50,500][44357] Updated weights for policy 0, policy_version 2360 (0.0011)
452
+ [2023-02-22 22:29:01,834][44357] Updated weights for policy 0, policy_version 2370 (0.0032)
453
+ [2023-02-22 22:29:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002372_9715712.pth...
454
+ [2023-02-22 22:29:05,405][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002178_8921088.pth
455
+ [2023-02-22 22:29:15,519][44357] Updated weights for policy 0, policy_version 2380 (0.0022)
456
+ [2023-02-22 22:29:26,480][44357] Updated weights for policy 0, policy_version 2390 (0.0013)
457
+ [2023-02-22 22:29:39,172][44357] Updated weights for policy 0, policy_version 2400 (0.0019)
458
+ [2023-02-22 22:29:52,732][44357] Updated weights for policy 0, policy_version 2410 (0.0016)
459
+ [2023-02-22 22:30:02,719][44357] Updated weights for policy 0, policy_version 2420 (0.0013)
460
+ [2023-02-22 22:30:16,279][44357] Updated weights for policy 0, policy_version 2430 (0.0014)
461
+ [2023-02-22 22:30:28,389][44357] Updated weights for policy 0, policy_version 2440 (0.0012)
462
+ [2023-02-22 22:30:33,118][44343] Stopping Batcher_0...
463
+ [2023-02-22 22:30:33,120][44343] Loop batcher_evt_loop terminating...
464
+ [2023-02-22 22:30:33,121][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002443_10006528.pth...
465
+ [2023-02-22 22:30:33,206][44357] Weights refcount: 2 0
466
+ [2023-02-22 22:30:33,224][44357] Stopping InferenceWorker_p0-w0...
467
+ [2023-02-22 22:30:33,225][44357] Loop inference_proc0-0_evt_loop terminating...
468
+ [2023-02-22 22:30:33,256][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002275_9318400.pth
469
+ [2023-02-22 22:30:33,264][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002443_10006528.pth...
470
+ [2023-02-22 22:30:33,385][44343] Stopping LearnerWorker_p0...
471
+ [2023-02-22 22:30:33,389][44343] Loop learner_proc0_evt_loop terminating...
472
+ [2023-02-22 22:30:33,492][44359] Stopping RolloutWorker_w1...
473
+ [2023-02-22 22:30:33,502][44359] Loop rollout_proc1_evt_loop terminating...
474
+ [2023-02-22 22:30:33,513][44365] Stopping RolloutWorker_w3...
475
+ [2023-02-22 22:30:33,522][44380] Stopping RolloutWorker_w6...
476
+ [2023-02-22 22:30:33,520][44365] Loop rollout_proc3_evt_loop terminating...
477
+ [2023-02-22 22:30:33,523][44374] Stopping RolloutWorker_w7...
478
+ [2023-02-22 22:30:33,528][44370] Stopping RolloutWorker_w5...
479
+ [2023-02-22 22:30:33,529][44370] Loop rollout_proc5_evt_loop terminating...
480
+ [2023-02-22 22:30:33,535][44374] Loop rollout_proc7_evt_loop terminating...
481
+ [2023-02-22 22:30:33,539][44380] Loop rollout_proc6_evt_loop terminating...
482
+ [2023-02-22 22:30:33,561][44372] Stopping RolloutWorker_w4...
483
+ [2023-02-22 22:30:33,575][44362] Stopping RolloutWorker_w2...
484
+ [2023-02-22 22:30:33,575][44362] Loop rollout_proc2_evt_loop terminating...
485
+ [2023-02-22 22:30:33,590][44358] Stopping RolloutWorker_w0...
486
+ [2023-02-22 22:30:33,590][44358] Loop rollout_proc0_evt_loop terminating...
487
+ [2023-02-22 22:30:33,568][44372] Loop rollout_proc4_evt_loop terminating...