Upload . with huggingface_hub

Browse files

Files changed (9) hide show

.gitattributes +1 -0
.summary/0/events.out.tfevents.1677101945.355362e7601a +3 -0
README.md +56 -0
checkpoint_p0/best_000002275_9318400_reward_17.231.pth +3 -0
checkpoint_p0/checkpoint_000002372_9715712.pth +3 -0
checkpoint_p0/checkpoint_000002443_10006528.pth +3 -0
config.json +143 -0
replay.mp4 +3 -0
sf_log.txt +487 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text

.summary/0/events.out.tfevents.1677101945.355362e7601a ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b1da7f33290987b826292b695f615948309a3a82fb5c743e63b902c5bd7ba02
+size 2302815

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+model-index:
+- name: APPO
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: doom_deadly_corridor
+      type: doom_deadly_corridor
+    metrics:
+    - type: mean_reward
+      value: 10.42 +/- 8.36
+      name: mean_reward
+      verified: false
+---
+A(n) **APPO** model trained on the **doom_deadly_corridor** environment.
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+## Downloading the model
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r RamonAnkersmit/rl_course_doom_deadly_corridor
+```
+## Using the model
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m <path.to.enjoy.module> --algo=APPO --env=doom_deadly_corridor --train_dir=./train_dir --experiment=rl_course_doom_deadly_corridor
+```
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+## Training with this model
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m <path.to.train.module> --algo=APPO --env=doom_deadly_corridor --train_dir=./train_dir --experiment=rl_course_doom_deadly_corridor --restart_behavior=resume --train_for_env_steps=10000000000
+```
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/best_000002275_9318400_reward_17.231.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1d0346b43af0479ecef1f1dd45a431e8162972561fe0a5b0e2fc2e5c5b56a76
+size 34965478

checkpoint_p0/checkpoint_000002372_9715712.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26e3561611732efd9a991633aad7d40c33142b682a8292f355e3b3f84218ea48
+size 34965892

checkpoint_p0/checkpoint_000002443_10006528.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dea399edf996b131058ec439aceb24067a3f8da11e27099113f9215a0e02fa5
+size 34965892

config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "help": false,
+  "algo": "APPO",
+  "env": "doom_deadly_corridor",
+  "experiment": "doom_deadly_corridor",
+  "train_dir": "/content/train_dir",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": null,
+  "num_policies": 1,
+  "async_rl": true,
+  "serial_mode": false,
+  "batched_sampling": false,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 2,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 8,
+  "num_envs_per_worker": 4,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 1,
+  "num_epochs": 1,
+  "rollout": 32,
+  "recurrence": 32,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1.0,
+  "reward_clip": 1000.0,
+  "value_bootstrap": false,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.001,
+  "value_loss_coeff": 0.5,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "symmetric_kl",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.1,
+  "ppo_clip_value": 0.2,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-06,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 4.0,
+  "learning_rate": 0.0001,
+  "lr_schedule": "constant",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 255.0,
+  "normalize_input": true,
+  "normalize_input_keys": null,
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 10,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 20,
+  "heartbeat_reporting_interval": 600,
+  "train_for_env_steps": 10000000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 120,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": -1,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    512,
+    512
+  ],
+  "encoder_conv_architecture": "convnet_simple",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": true,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "elu",
+  "policy_initialization": "orthogonal",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": true,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 4,
+  "env_framestack": 1,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": false,
+  "with_wandb": false,
+  "wandb_user": null,
+  "wandb_project": "sample_factory",
+  "wandb_group": null,
+  "wandb_job_type": "SF",
+  "wandb_tags": [],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "num_agents": -1,
+  "num_humans": 0,
+  "num_bots": -1,
+  "start_bot_difficulty": null,
+  "timelimit": null,
+  "res_w": 128,
+  "res_h": 72,
+  "wide_aspect_ratio": false,
+  "eval_env_frameskip": 1,
+  "fps": 35,
+  "command_line": "--env=doom_deadly_corridor --experiment=doom_deadly_corridor --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=10000000",
+  "cli_args": {
+    "env": "doom_deadly_corridor",
+    "experiment": "doom_deadly_corridor",
+    "num_workers": 8,
+    "num_envs_per_worker": 4,
+    "train_for_env_steps": 10000000
+  },
+  "git_hash": "unknown",
+  "git_repo_name": "not a git repository"
+}

replay.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12335cd0d24082e9d0eb7544fb2f9dcf472d9227bd7a464517d331d46654e4c5
+size 1703429

sf_log.txt ADDED Viewed

	@@ -0,0 +1,487 @@

+[2023-02-22 21:39:18,790][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-02-22 21:39:18,795][44343] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2023-02-22 21:39:18,877][44343] Num visible devices: 1
+[2023-02-22 21:39:18,911][44343] Starting seed is not provided
+[2023-02-22 21:39:18,911][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-02-22 21:39:18,911][44343] Initializing actor-critic model on device cuda:0
+[2023-02-22 21:39:18,912][44343] RunningMeanStd input shape: (3, 72, 128)
+[2023-02-22 21:39:18,916][44343] RunningMeanStd input shape: (1,)
+[2023-02-22 21:39:18,966][44343] ConvEncoder: input_channels=3
+[2023-02-22 21:39:19,495][44358] Worker 0 uses CPU cores [0]
+[2023-02-22 21:39:19,655][44357] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-02-22 21:39:19,661][44357] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2023-02-22 21:39:19,700][44357] Num visible devices: 1
+[2023-02-22 21:39:19,810][44359] Worker 1 uses CPU cores [1]
+[2023-02-22 21:39:19,950][44343] Conv encoder output size: 512
+[2023-02-22 21:39:19,950][44343] Policy head output size: 512
+[2023-02-22 21:39:20,035][44343] Created Actor Critic model with architecture:
+[2023-02-22 21:39:20,035][44343] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=11, bias=True)
+  )
+)
+[2023-02-22 21:39:20,560][44362] Worker 2 uses CPU cores [0]
+[2023-02-22 21:39:20,816][44380] Worker 6 uses CPU cores [0]
+[2023-02-22 21:39:20,864][44365] Worker 3 uses CPU cores [1]
+[2023-02-22 21:39:21,072][44370] Worker 5 uses CPU cores [1]
+[2023-02-22 21:39:21,112][44372] Worker 4 uses CPU cores [0]
+[2023-02-22 21:39:21,139][44374] Worker 7 uses CPU cores [1]
+[2023-02-22 21:39:27,142][44343] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-02-22 21:39:27,144][44343] No checkpoints found
+[2023-02-22 21:39:27,144][44343] Did not load from checkpoint, starting from scratch!
+[2023-02-22 21:39:27,144][44343] Initialized policy 0 weights for model version 0
+[2023-02-22 21:39:27,147][44343] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-02-22 21:39:27,154][44343] LearnerWorker_p0 finished initialization!
+[2023-02-22 21:39:27,360][44357] RunningMeanStd input shape: (3, 72, 128)
+[2023-02-22 21:39:27,361][44357] RunningMeanStd input shape: (1,)
+[2023-02-22 21:39:27,373][44357] ConvEncoder: input_channels=3
+[2023-02-22 21:39:27,471][44357] Conv encoder output size: 512
+[2023-02-22 21:39:27,472][44357] Policy head output size: 512
+[2023-02-22 21:39:30,323][44365] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,344][44370] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,348][44359] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,354][44374] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,502][44372] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,507][44380] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,535][44362] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:30,603][44358] Doom resolution: 160x120, resize resolution: (128, 72)
+[2023-02-22 21:39:32,430][44374] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,432][44370] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,434][44365] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,434][44359] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,682][44372] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,687][44380] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,693][44362] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:32,710][44358] Decorrelating experience for 0 frames...
+[2023-02-22 21:39:34,060][44365] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,061][44359] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,055][44374] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,351][44370] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,371][44372] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,444][44358] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:34,503][44362] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:35,178][44380] Decorrelating experience for 32 frames...
+[2023-02-22 21:39:35,471][44358] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:35,815][44380] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:35,825][44365] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:35,879][44359] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:35,882][44374] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:36,094][44370] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:36,872][44380] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:36,885][44358] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:37,033][44372] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:37,168][44365] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:37,251][44359] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:37,288][44374] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:37,810][44362] Decorrelating experience for 64 frames...
+[2023-02-22 21:39:38,106][44372] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:38,408][44370] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:38,514][44362] Decorrelating experience for 96 frames...
+[2023-02-22 21:39:42,397][44343] Signal inference workers to stop experience collection...
+[2023-02-22 21:39:42,420][44357] InferenceWorker_p0-w0: stopping experience collection
+[2023-02-22 21:39:45,642][44343] Signal inference workers to resume experience collection...
+[2023-02-22 21:39:45,646][44357] InferenceWorker_p0-w0: resuming experience collection
+[2023-02-22 21:39:58,407][44357] Updated weights for policy 0, policy_version 10 (0.0583)
+[2023-02-22 21:40:10,911][44357] Updated weights for policy 0, policy_version 20 (0.0022)
+[2023-02-22 21:40:20,180][44343] Saving new best policy, reward=1.570!
+[2023-02-22 21:40:22,503][44357] Updated weights for policy 0, policy_version 30 (0.0027)
+[2023-02-22 21:40:25,197][44343] Saving new best policy, reward=1.799!
+[2023-02-22 21:40:30,180][44343] Saving new best policy, reward=2.195!
+[2023-02-22 21:40:35,188][44343] Saving new best policy, reward=2.481!
+[2023-02-22 21:40:36,833][44357] Updated weights for policy 0, policy_version 40 (0.0012)
+[2023-02-22 21:40:40,177][44343] Saving new best policy, reward=3.021!
+[2023-02-22 21:40:45,185][44343] Saving new best policy, reward=3.234!
+[2023-02-22 21:40:50,415][44357] Updated weights for policy 0, policy_version 50 (0.0028)
+[2023-02-22 21:41:00,181][44343] Saving new best policy, reward=3.364!
+[2023-02-22 21:41:01,966][44357] Updated weights for policy 0, policy_version 60 (0.0016)
+[2023-02-22 21:41:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000062_253952.pth...
+[2023-02-22 21:41:16,110][44357] Updated weights for policy 0, policy_version 70 (0.0020)
+[2023-02-22 21:41:20,174][44343] Saving new best policy, reward=3.581!
+[2023-02-22 21:41:28,833][44357] Updated weights for policy 0, policy_version 80 (0.0026)
+[2023-02-22 21:41:30,184][44343] Saving new best policy, reward=3.665!
+[2023-02-22 21:41:40,180][44343] Saving new best policy, reward=3.834!
+[2023-02-22 21:41:40,808][44357] Updated weights for policy 0, policy_version 90 (0.0020)
+[2023-02-22 21:41:45,270][44343] Saving new best policy, reward=4.019!
+[2023-02-22 21:41:55,240][44357] Updated weights for policy 0, policy_version 100 (0.0018)
+[2023-02-22 21:41:55,243][44343] Saving new best policy, reward=4.030!
+[2023-02-22 21:42:00,177][44343] Saving new best policy, reward=4.640!
+[2023-02-22 21:42:08,134][44357] Updated weights for policy 0, policy_version 110 (0.0022)
+[2023-02-22 21:42:19,932][44357] Updated weights for policy 0, policy_version 120 (0.0012)
+[2023-02-22 21:42:33,978][44357] Updated weights for policy 0, policy_version 130 (0.0024)
+[2023-02-22 21:42:45,804][44357] Updated weights for policy 0, policy_version 140 (0.0020)
+[2023-02-22 21:42:58,125][44357] Updated weights for policy 0, policy_version 150 (0.0019)
+[2023-02-22 21:43:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000155_634880.pth...
+[2023-02-22 21:43:12,422][44357] Updated weights for policy 0, policy_version 160 (0.0020)
+[2023-02-22 21:43:24,033][44357] Updated weights for policy 0, policy_version 170 (0.0018)
+[2023-02-22 21:43:36,766][44357] Updated weights for policy 0, policy_version 180 (0.0028)
+[2023-02-22 21:43:50,179][44343] Saving new best policy, reward=4.947!
+[2023-02-22 21:43:51,259][44357] Updated weights for policy 0, policy_version 190 (0.0016)
+[2023-02-22 21:44:02,262][44357] Updated weights for policy 0, policy_version 200 (0.0014)
+[2023-02-22 21:44:15,599][44357] Updated weights for policy 0, policy_version 210 (0.0013)
+[2023-02-22 21:44:28,836][44357] Updated weights for policy 0, policy_version 220 (0.0013)
+[2023-02-22 21:44:35,184][44343] Saving new best policy, reward=5.529!
+[2023-02-22 21:44:39,706][44357] Updated weights for policy 0, policy_version 230 (0.0015)
+[2023-02-22 21:44:53,759][44357] Updated weights for policy 0, policy_version 240 (0.0017)
+[2023-02-22 21:45:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000249_1019904.pth...
+[2023-02-22 21:45:05,432][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000062_253952.pth
+[2023-02-22 21:45:06,592][44357] Updated weights for policy 0, policy_version 250 (0.0024)
+[2023-02-22 21:45:18,363][44357] Updated weights for policy 0, policy_version 260 (0.0036)
+[2023-02-22 21:45:32,402][44357] Updated weights for policy 0, policy_version 270 (0.0014)
+[2023-02-22 21:45:44,524][44357] Updated weights for policy 0, policy_version 280 (0.0018)
+[2023-02-22 21:45:55,183][44343] Saving new best policy, reward=5.693!
+[2023-02-22 21:45:56,854][44357] Updated weights for policy 0, policy_version 290 (0.0024)
+[2023-02-22 21:46:05,299][44343] Saving new best policy, reward=5.702!
+[2023-02-22 21:46:10,895][44357] Updated weights for policy 0, policy_version 300 (0.0042)
+[2023-02-22 21:46:22,027][44357] Updated weights for policy 0, policy_version 310 (0.0027)
+[2023-02-22 21:46:30,183][44343] Saving new best policy, reward=6.326!
+[2023-02-22 21:46:34,838][44357] Updated weights for policy 0, policy_version 320 (0.0028)
+[2023-02-22 21:46:48,709][44357] Updated weights for policy 0, policy_version 330 (0.0034)
+[2023-02-22 21:46:55,184][44343] Saving new best policy, reward=6.680!
+[2023-02-22 21:46:59,173][44357] Updated weights for policy 0, policy_version 340 (0.0012)
+[2023-02-22 21:47:05,198][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000343_1404928.pth...
+[2023-02-22 21:47:05,367][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000155_634880.pth
+[2023-02-22 21:47:13,125][44357] Updated weights for policy 0, policy_version 350 (0.0023)
+[2023-02-22 21:47:26,190][44357] Updated weights for policy 0, policy_version 360 (0.0022)
+[2023-02-22 21:47:37,325][44357] Updated weights for policy 0, policy_version 370 (0.0017)
+[2023-02-22 21:47:51,298][44357] Updated weights for policy 0, policy_version 380 (0.0031)
+[2023-02-22 21:48:04,032][44357] Updated weights for policy 0, policy_version 390 (0.0029)
+[2023-02-22 21:48:10,200][44343] Saving new best policy, reward=6.724!
+[2023-02-22 21:48:15,427][44357] Updated weights for policy 0, policy_version 400 (0.0029)
+[2023-02-22 21:48:25,182][44343] Saving new best policy, reward=7.131!
+[2023-02-22 21:48:29,120][44357] Updated weights for policy 0, policy_version 410 (0.0030)
+[2023-02-22 21:48:40,241][44357] Updated weights for policy 0, policy_version 420 (0.0019)
+[2023-02-22 21:48:53,312][44357] Updated weights for policy 0, policy_version 430 (0.0020)
+[2023-02-22 21:49:05,192][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000438_1794048.pth...
+[2023-02-22 21:49:05,454][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000249_1019904.pth
+[2023-02-22 21:49:07,053][44357] Updated weights for policy 0, policy_version 440 (0.0017)
+[2023-02-22 21:49:17,206][44357] Updated weights for policy 0, policy_version 450 (0.0017)
+[2023-02-22 21:49:30,957][44357] Updated weights for policy 0, policy_version 460 (0.0036)
+[2023-02-22 21:49:35,185][44343] Saving new best policy, reward=7.273!
+[2023-02-22 21:49:44,197][44357] Updated weights for policy 0, policy_version 470 (0.0012)
+[2023-02-22 21:49:55,073][44357] Updated weights for policy 0, policy_version 480 (0.0033)
+[2023-02-22 21:50:09,114][44357] Updated weights for policy 0, policy_version 490 (0.0021)
+[2023-02-22 21:50:10,175][44343] Saving new best policy, reward=7.677!
+[2023-02-22 21:50:21,185][44357] Updated weights for policy 0, policy_version 500 (0.0015)
+[2023-02-22 21:50:33,198][44357] Updated weights for policy 0, policy_version 510 (0.0018)
+[2023-02-22 21:50:46,907][44357] Updated weights for policy 0, policy_version 520 (0.0022)
+[2023-02-22 21:50:55,193][44343] Saving new best policy, reward=7.977!
+[2023-02-22 21:50:58,148][44357] Updated weights for policy 0, policy_version 530 (0.0014)
+[2023-02-22 21:51:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000534_2187264.pth...
+[2023-02-22 21:51:05,403][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000343_1404928.pth
+[2023-02-22 21:51:10,973][44357] Updated weights for policy 0, policy_version 540 (0.0022)
+[2023-02-22 21:51:24,871][44357] Updated weights for policy 0, policy_version 550 (0.0011)
+[2023-02-22 21:51:35,244][44357] Updated weights for policy 0, policy_version 560 (0.0019)
+[2023-02-22 21:51:48,935][44357] Updated weights for policy 0, policy_version 570 (0.0018)
+[2023-02-22 21:52:00,180][44343] Saving new best policy, reward=8.152!
+[2023-02-22 21:52:01,787][44357] Updated weights for policy 0, policy_version 580 (0.0016)
+[2023-02-22 21:52:13,228][44357] Updated weights for policy 0, policy_version 590 (0.0019)
+[2023-02-22 21:52:20,181][44343] Saving new best policy, reward=8.358!
+[2023-02-22 21:52:27,177][44357] Updated weights for policy 0, policy_version 600 (0.0012)
+[2023-02-22 21:52:39,279][44357] Updated weights for policy 0, policy_version 610 (0.0013)
+[2023-02-22 21:52:51,424][44357] Updated weights for policy 0, policy_version 620 (0.0036)
+[2023-02-22 21:53:00,178][44343] Saving new best policy, reward=9.253!
+[2023-02-22 21:53:05,166][44357] Updated weights for policy 0, policy_version 630 (0.0018)
+[2023-02-22 21:53:05,183][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000630_2580480.pth...
+[2023-02-22 21:53:05,353][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000438_1794048.pth
+[2023-02-22 21:53:16,437][44357] Updated weights for policy 0, policy_version 640 (0.0013)
+[2023-02-22 21:53:29,099][44357] Updated weights for policy 0, policy_version 650 (0.0014)
+[2023-02-22 21:53:42,787][44357] Updated weights for policy 0, policy_version 660 (0.0023)
+[2023-02-22 21:53:53,088][44357] Updated weights for policy 0, policy_version 670 (0.0012)
+[2023-02-22 21:54:06,818][44357] Updated weights for policy 0, policy_version 680 (0.0038)
+[2023-02-22 21:54:19,375][44357] Updated weights for policy 0, policy_version 690 (0.0026)
+[2023-02-22 21:54:20,180][44343] Saving new best policy, reward=9.967!
+[2023-02-22 21:54:30,617][44357] Updated weights for policy 0, policy_version 700 (0.0012)
+[2023-02-22 21:54:44,488][44357] Updated weights for policy 0, policy_version 710 (0.0034)
+[2023-02-22 21:54:56,158][44357] Updated weights for policy 0, policy_version 720 (0.0031)
+[2023-02-22 21:55:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000726_2973696.pth...
+[2023-02-22 21:55:05,392][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000534_2187264.pth
+[2023-02-22 21:55:08,422][44357] Updated weights for policy 0, policy_version 730 (0.0025)
+[2023-02-22 21:55:22,098][44357] Updated weights for policy 0, policy_version 740 (0.0025)
+[2023-02-22 21:55:32,470][44357] Updated weights for policy 0, policy_version 750 (0.0014)
+[2023-02-22 21:55:46,132][44357] Updated weights for policy 0, policy_version 760 (0.0019)
+[2023-02-22 21:55:59,242][44357] Updated weights for policy 0, policy_version 770 (0.0018)
+[2023-02-22 21:56:09,892][44357] Updated weights for policy 0, policy_version 780 (0.0019)
+[2023-02-22 21:56:23,536][44357] Updated weights for policy 0, policy_version 790 (0.0036)
+[2023-02-22 21:56:35,273][44357] Updated weights for policy 0, policy_version 800 (0.0020)
+[2023-02-22 21:56:47,544][44357] Updated weights for policy 0, policy_version 810 (0.0026)
+[2023-02-22 21:57:01,255][44357] Updated weights for policy 0, policy_version 820 (0.0018)
+[2023-02-22 21:57:05,180][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000823_3371008.pth...
+[2023-02-22 21:57:05,331][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000630_2580480.pth
+[2023-02-22 21:57:12,428][44357] Updated weights for policy 0, policy_version 830 (0.0022)
+[2023-02-22 21:57:25,276][44357] Updated weights for policy 0, policy_version 840 (0.0030)
+[2023-02-22 21:57:38,985][44357] Updated weights for policy 0, policy_version 850 (0.0037)
+[2023-02-22 21:57:49,296][44357] Updated weights for policy 0, policy_version 860 (0.0013)
+[2023-02-22 21:58:03,074][44357] Updated weights for policy 0, policy_version 870 (0.0027)
+[2023-02-22 21:58:15,882][44357] Updated weights for policy 0, policy_version 880 (0.0013)
+[2023-02-22 21:58:26,821][44357] Updated weights for policy 0, policy_version 890 (0.0020)
+[2023-02-22 21:58:40,175][44343] Saving new best policy, reward=10.147!
+[2023-02-22 21:58:40,744][44357] Updated weights for policy 0, policy_version 900 (0.0021)
+[2023-02-22 21:58:45,189][44343] Saving new best policy, reward=10.394!
+[2023-02-22 21:58:52,878][44357] Updated weights for policy 0, policy_version 910 (0.0020)
+[2023-02-22 21:59:04,728][44357] Updated weights for policy 0, policy_version 920 (0.0018)
+[2023-02-22 21:59:05,186][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000920_3768320.pth...
+[2023-02-22 21:59:05,369][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000726_2973696.pth
+[2023-02-22 21:59:18,648][44357] Updated weights for policy 0, policy_version 930 (0.0012)
+[2023-02-22 21:59:29,534][44357] Updated weights for policy 0, policy_version 940 (0.0018)
+[2023-02-22 21:59:42,369][44357] Updated weights for policy 0, policy_version 950 (0.0038)
+[2023-02-22 21:59:50,177][44343] Saving new best policy, reward=10.410!
+[2023-02-22 21:59:56,324][44357] Updated weights for policy 0, policy_version 960 (0.0018)
+[2023-02-22 22:00:00,175][44343] Saving new best policy, reward=10.565!
+[2023-02-22 22:00:06,253][44357] Updated weights for policy 0, policy_version 970 (0.0017)
+[2023-02-22 22:00:20,191][44357] Updated weights for policy 0, policy_version 980 (0.0040)
+[2023-02-22 22:00:32,711][44357] Updated weights for policy 0, policy_version 990 (0.0014)
+[2023-02-22 22:00:43,741][44357] Updated weights for policy 0, policy_version 1000 (0.0022)
+[2023-02-22 22:00:57,526][44357] Updated weights for policy 0, policy_version 1010 (0.0012)
+[2023-02-22 22:01:05,180][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001017_4165632.pth...
+[2023-02-22 22:01:05,335][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000823_3371008.pth
+[2023-02-22 22:01:08,715][44357] Updated weights for policy 0, policy_version 1020 (0.0035)
+[2023-02-22 22:01:21,311][44357] Updated weights for policy 0, policy_version 1030 (0.0016)
+[2023-02-22 22:01:25,190][44343] Saving new best policy, reward=10.667!
+[2023-02-22 22:01:30,195][44343] Saving new best policy, reward=11.531!
+[2023-02-22 22:01:34,973][44357] Updated weights for policy 0, policy_version 1040 (0.0034)
+[2023-02-22 22:01:45,187][44343] Saving new best policy, reward=11.983!
+[2023-02-22 22:01:45,549][44357] Updated weights for policy 0, policy_version 1050 (0.0012)
+[2023-02-22 22:01:59,401][44357] Updated weights for policy 0, policy_version 1060 (0.0036)
+[2023-02-22 22:02:12,148][44357] Updated weights for policy 0, policy_version 1070 (0.0015)
+[2023-02-22 22:02:23,117][44357] Updated weights for policy 0, policy_version 1080 (0.0012)
+[2023-02-22 22:02:36,996][44357] Updated weights for policy 0, policy_version 1090 (0.0038)
+[2023-02-22 22:02:48,187][44357] Updated weights for policy 0, policy_version 1100 (0.0023)
+[2023-02-22 22:03:00,451][44357] Updated weights for policy 0, policy_version 1110 (0.0032)
+[2023-02-22 22:03:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001114_4562944.pth...
+[2023-02-22 22:03:05,334][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000000920_3768320.pth
+[2023-02-22 22:03:14,138][44357] Updated weights for policy 0, policy_version 1120 (0.0019)
+[2023-02-22 22:03:24,170][44357] Updated weights for policy 0, policy_version 1130 (0.0013)
+[2023-02-22 22:03:37,655][44357] Updated weights for policy 0, policy_version 1140 (0.0018)
+[2023-02-22 22:03:50,128][44357] Updated weights for policy 0, policy_version 1150 (0.0012)
+[2023-02-22 22:04:01,346][44357] Updated weights for policy 0, policy_version 1160 (0.0023)
+[2023-02-22 22:04:15,080][44357] Updated weights for policy 0, policy_version 1170 (0.0013)
+[2023-02-22 22:04:26,034][44357] Updated weights for policy 0, policy_version 1180 (0.0028)
+[2023-02-22 22:04:38,850][44357] Updated weights for policy 0, policy_version 1190 (0.0013)
+[2023-02-22 22:04:52,728][44357] Updated weights for policy 0, policy_version 1200 (0.0013)
+[2023-02-22 22:05:02,699][44357] Updated weights for policy 0, policy_version 1210 (0.0028)
+[2023-02-22 22:05:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001211_4960256.pth...
+[2023-02-22 22:05:05,398][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001017_4165632.pth
+[2023-02-22 22:05:16,469][44357] Updated weights for policy 0, policy_version 1220 (0.0012)
+[2023-02-22 22:05:20,173][44343] Saving new best policy, reward=13.082!
+[2023-02-22 22:05:28,886][44357] Updated weights for policy 0, policy_version 1230 (0.0016)
+[2023-02-22 22:05:39,747][44357] Updated weights for policy 0, policy_version 1240 (0.0026)
+[2023-02-22 22:05:53,590][44357] Updated weights for policy 0, policy_version 1250 (0.0016)
+[2023-02-22 22:06:04,499][44357] Updated weights for policy 0, policy_version 1260 (0.0012)
+[2023-02-22 22:06:10,181][44343] Saving new best policy, reward=13.267!
+[2023-02-22 22:06:17,223][44357] Updated weights for policy 0, policy_version 1270 (0.0021)
+[2023-02-22 22:06:30,868][44357] Updated weights for policy 0, policy_version 1280 (0.0027)
+[2023-02-22 22:06:40,821][44357] Updated weights for policy 0, policy_version 1290 (0.0014)
+[2023-02-22 22:06:54,580][44357] Updated weights for policy 0, policy_version 1300 (0.0029)
+[2023-02-22 22:07:05,302][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001309_5361664.pth...
+[2023-02-22 22:07:05,580][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001114_4562944.pth
+[2023-02-22 22:07:07,009][44357] Updated weights for policy 0, policy_version 1310 (0.0023)
+[2023-02-22 22:07:18,242][44357] Updated weights for policy 0, policy_version 1320 (0.0014)
+[2023-02-22 22:07:31,652][44357] Updated weights for policy 0, policy_version 1330 (0.0013)
+[2023-02-22 22:07:40,183][44343] Saving new best policy, reward=13.692!
+[2023-02-22 22:07:43,091][44357] Updated weights for policy 0, policy_version 1340 (0.0015)
+[2023-02-22 22:07:55,898][44357] Updated weights for policy 0, policy_version 1350 (0.0031)
+[2023-02-22 22:08:09,784][44357] Updated weights for policy 0, policy_version 1360 (0.0027)
+[2023-02-22 22:08:19,962][44357] Updated weights for policy 0, policy_version 1370 (0.0019)
+[2023-02-22 22:08:33,221][44357] Updated weights for policy 0, policy_version 1380 (0.0028)
+[2023-02-22 22:08:45,882][44357] Updated weights for policy 0, policy_version 1390 (0.0023)
+[2023-02-22 22:08:56,933][44357] Updated weights for policy 0, policy_version 1400 (0.0014)
+[2023-02-22 22:09:05,188][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001405_5754880.pth...
+[2023-02-22 22:09:05,382][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001211_4960256.pth
+[2023-02-22 22:09:10,651][44357] Updated weights for policy 0, policy_version 1410 (0.0014)
+[2023-02-22 22:09:22,062][44357] Updated weights for policy 0, policy_version 1420 (0.0012)
+[2023-02-22 22:09:34,396][44357] Updated weights for policy 0, policy_version 1430 (0.0024)
+[2023-02-22 22:09:40,177][44343] Saving new best policy, reward=13.814!
+[2023-02-22 22:09:48,019][44357] Updated weights for policy 0, policy_version 1440 (0.0012)
+[2023-02-22 22:09:50,187][44343] Saving new best policy, reward=14.316!
+[2023-02-22 22:09:58,219][44357] Updated weights for policy 0, policy_version 1450 (0.0024)
+[2023-02-22 22:10:11,801][44357] Updated weights for policy 0, policy_version 1460 (0.0020)
+[2023-02-22 22:10:24,446][44357] Updated weights for policy 0, policy_version 1470 (0.0031)
+[2023-02-22 22:10:35,200][44343] Saving new best policy, reward=14.817!
+[2023-02-22 22:10:35,202][44357] Updated weights for policy 0, policy_version 1480 (0.0017)
+[2023-02-22 22:10:48,866][44357] Updated weights for policy 0, policy_version 1490 (0.0020)
+[2023-02-22 22:11:00,119][44357] Updated weights for policy 0, policy_version 1500 (0.0017)
+[2023-02-22 22:11:05,261][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001503_6156288.pth...
+[2023-02-22 22:11:05,509][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001309_5361664.pth
+[2023-02-22 22:11:10,178][44343] Saving new best policy, reward=14.984!
+[2023-02-22 22:11:12,591][44357] Updated weights for policy 0, policy_version 1510 (0.0019)
+[2023-02-22 22:11:26,261][44357] Updated weights for policy 0, policy_version 1520 (0.0027)
+[2023-02-22 22:11:36,250][44357] Updated weights for policy 0, policy_version 1530 (0.0019)
+[2023-02-22 22:11:49,925][44357] Updated weights for policy 0, policy_version 1540 (0.0016)
+[2023-02-22 22:12:02,863][44357] Updated weights for policy 0, policy_version 1550 (0.0015)
+[2023-02-22 22:12:13,806][44357] Updated weights for policy 0, policy_version 1560 (0.0012)
+[2023-02-22 22:12:28,531][44357] Updated weights for policy 0, policy_version 1570 (0.0021)
+[2023-02-22 22:12:42,652][44357] Updated weights for policy 0, policy_version 1580 (0.0024)
+[2023-02-22 22:12:53,704][44357] Updated weights for policy 0, policy_version 1590 (0.0021)
+[2023-02-22 22:13:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001597_6541312.pth...
+[2023-02-22 22:13:05,492][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001405_5754880.pth
+[2023-02-22 22:13:08,100][44357] Updated weights for policy 0, policy_version 1600 (0.0018)
+[2023-02-22 22:13:20,543][44357] Updated weights for policy 0, policy_version 1610 (0.0017)
+[2023-02-22 22:13:31,704][44357] Updated weights for policy 0, policy_version 1620 (0.0025)
+[2023-02-22 22:13:45,190][44343] Saving new best policy, reward=15.358!
+[2023-02-22 22:13:45,524][44357] Updated weights for policy 0, policy_version 1630 (0.0019)
+[2023-02-22 22:13:56,449][44357] Updated weights for policy 0, policy_version 1640 (0.0013)
+[2023-02-22 22:14:09,261][44357] Updated weights for policy 0, policy_version 1650 (0.0014)
+[2023-02-22 22:14:22,595][44357] Updated weights for policy 0, policy_version 1660 (0.0027)
+[2023-02-22 22:14:32,964][44357] Updated weights for policy 0, policy_version 1670 (0.0018)
+[2023-02-22 22:14:40,195][44343] Saving new best policy, reward=15.815!
+[2023-02-22 22:14:46,637][44357] Updated weights for policy 0, policy_version 1680 (0.0028)
+[2023-02-22 22:14:58,429][44357] Updated weights for policy 0, policy_version 1690 (0.0027)
+[2023-02-22 22:15:05,195][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001695_6942720.pth...
+[2023-02-22 22:15:05,373][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001503_6156288.pth
+[2023-02-22 22:15:10,380][44357] Updated weights for policy 0, policy_version 1700 (0.0017)
+[2023-02-22 22:15:24,328][44357] Updated weights for policy 0, policy_version 1710 (0.0028)
+[2023-02-22 22:15:34,965][44357] Updated weights for policy 0, policy_version 1720 (0.0031)
+[2023-02-22 22:15:47,961][44357] Updated weights for policy 0, policy_version 1730 (0.0021)
+[2023-02-22 22:16:01,469][44357] Updated weights for policy 0, policy_version 1740 (0.0023)
+[2023-02-22 22:16:12,088][44357] Updated weights for policy 0, policy_version 1750 (0.0018)
+[2023-02-22 22:16:25,789][44357] Updated weights for policy 0, policy_version 1760 (0.0027)
+[2023-02-22 22:16:37,997][44357] Updated weights for policy 0, policy_version 1770 (0.0026)
+[2023-02-22 22:16:49,718][44357] Updated weights for policy 0, policy_version 1780 (0.0020)
+[2023-02-22 22:17:03,608][44357] Updated weights for policy 0, policy_version 1790 (0.0023)
+[2023-02-22 22:17:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001791_7335936.pth...
+[2023-02-22 22:17:05,337][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001597_6541312.pth
+[2023-02-22 22:17:15,426][44357] Updated weights for policy 0, policy_version 1800 (0.0028)
+[2023-02-22 22:17:28,352][44357] Updated weights for policy 0, policy_version 1810 (0.0015)
+[2023-02-22 22:17:42,564][44357] Updated weights for policy 0, policy_version 1820 (0.0012)
+[2023-02-22 22:17:53,088][44357] Updated weights for policy 0, policy_version 1830 (0.0018)
+[2023-02-22 22:18:06,524][44357] Updated weights for policy 0, policy_version 1840 (0.0026)
+[2023-02-22 22:18:19,831][44357] Updated weights for policy 0, policy_version 1850 (0.0026)
+[2023-02-22 22:18:30,737][44357] Updated weights for policy 0, policy_version 1860 (0.0014)
+[2023-02-22 22:18:44,377][44357] Updated weights for policy 0, policy_version 1870 (0.0040)
+[2023-02-22 22:18:56,344][44357] Updated weights for policy 0, policy_version 1880 (0.0019)
+[2023-02-22 22:19:05,184][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001887_7729152.pth...
+[2023-02-22 22:19:05,374][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001695_6942720.pth
+[2023-02-22 22:19:08,080][44357] Updated weights for policy 0, policy_version 1890 (0.0022)
+[2023-02-22 22:19:21,773][44357] Updated weights for policy 0, policy_version 1900 (0.0021)
+[2023-02-22 22:19:32,368][44357] Updated weights for policy 0, policy_version 1910 (0.0028)
+[2023-02-22 22:19:45,622][44357] Updated weights for policy 0, policy_version 1920 (0.0014)
+[2023-02-22 22:19:58,770][44357] Updated weights for policy 0, policy_version 1930 (0.0021)
+[2023-02-22 22:20:09,516][44357] Updated weights for policy 0, policy_version 1940 (0.0013)
+[2023-02-22 22:20:23,161][44357] Updated weights for policy 0, policy_version 1950 (0.0014)
+[2023-02-22 22:20:35,289][44357] Updated weights for policy 0, policy_version 1960 (0.0012)
+[2023-02-22 22:20:46,932][44357] Updated weights for policy 0, policy_version 1970 (0.0019)
+[2023-02-22 22:21:00,847][44357] Updated weights for policy 0, policy_version 1980 (0.0020)
+[2023-02-22 22:21:05,191][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001984_8126464.pth...
+[2023-02-22 22:21:05,357][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001791_7335936.pth
+[2023-02-22 22:21:12,097][44357] Updated weights for policy 0, policy_version 1990 (0.0012)
+[2023-02-22 22:21:24,938][44357] Updated weights for policy 0, policy_version 2000 (0.0017)
+[2023-02-22 22:21:38,170][44357] Updated weights for policy 0, policy_version 2010 (0.0018)
+[2023-02-22 22:21:48,538][44357] Updated weights for policy 0, policy_version 2020 (0.0024)
+[2023-02-22 22:21:50,174][44343] Saving new best policy, reward=15.986!
+[2023-02-22 22:21:55,187][44343] Saving new best policy, reward=16.024!
+[2023-02-22 22:22:02,271][44357] Updated weights for policy 0, policy_version 2030 (0.0027)
+[2023-02-22 22:22:14,337][44357] Updated weights for policy 0, policy_version 2040 (0.0024)
+[2023-02-22 22:22:26,220][44357] Updated weights for policy 0, policy_version 2050 (0.0022)
+[2023-02-22 22:22:40,009][44357] Updated weights for policy 0, policy_version 2060 (0.0031)
+[2023-02-22 22:22:50,319][44357] Updated weights for policy 0, policy_version 2070 (0.0014)
+[2023-02-22 22:22:55,196][44343] Saving new best policy, reward=16.279!
+[2023-02-22 22:23:03,597][44357] Updated weights for policy 0, policy_version 2080 (0.0018)
+[2023-02-22 22:23:05,182][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002081_8523776.pth...
+[2023-02-22 22:23:05,358][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001887_7729152.pth
+[2023-02-22 22:23:05,365][44343] Saving new best policy, reward=16.580!
+[2023-02-22 22:23:10,183][44343] Saving new best policy, reward=17.174!
+[2023-02-22 22:23:17,504][44357] Updated weights for policy 0, policy_version 2090 (0.0042)
+[2023-02-22 22:23:28,031][44357] Updated weights for policy 0, policy_version 2100 (0.0016)
+[2023-02-22 22:23:41,630][44357] Updated weights for policy 0, policy_version 2110 (0.0023)
+[2023-02-22 22:23:54,289][44357] Updated weights for policy 0, policy_version 2120 (0.0018)
+[2023-02-22 22:24:05,664][44357] Updated weights for policy 0, policy_version 2130 (0.0017)
+[2023-02-22 22:24:19,337][44357] Updated weights for policy 0, policy_version 2140 (0.0013)
+[2023-02-22 22:24:30,018][44357] Updated weights for policy 0, policy_version 2150 (0.0026)
+[2023-02-22 22:24:43,012][44357] Updated weights for policy 0, policy_version 2160 (0.0028)
+[2023-02-22 22:24:56,533][44357] Updated weights for policy 0, policy_version 2170 (0.0014)
+[2023-02-22 22:25:05,192][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002178_8921088.pth...
+[2023-02-22 22:25:05,348][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000001984_8126464.pth
+[2023-02-22 22:25:06,980][44357] Updated weights for policy 0, policy_version 2180 (0.0020)
+[2023-02-22 22:25:20,932][44357] Updated weights for policy 0, policy_version 2190 (0.0038)
+[2023-02-22 22:25:33,457][44357] Updated weights for policy 0, policy_version 2200 (0.0017)
+[2023-02-22 22:25:44,759][44357] Updated weights for policy 0, policy_version 2210 (0.0029)
+[2023-02-22 22:25:58,677][44357] Updated weights for policy 0, policy_version 2220 (0.0012)
+[2023-02-22 22:26:09,960][44357] Updated weights for policy 0, policy_version 2230 (0.0019)
+[2023-02-22 22:26:22,317][44357] Updated weights for policy 0, policy_version 2240 (0.0022)
+[2023-02-22 22:26:36,070][44357] Updated weights for policy 0, policy_version 2250 (0.0022)
+[2023-02-22 22:26:45,762][44357] Updated weights for policy 0, policy_version 2260 (0.0020)
+[2023-02-22 22:26:59,512][44357] Updated weights for policy 0, policy_version 2270 (0.0018)
+[2023-02-22 22:27:05,190][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002275_9318400.pth...
+[2023-02-22 22:27:05,351][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002081_8523776.pth
+[2023-02-22 22:27:05,375][44343] Saving new best policy, reward=17.231!
+[2023-02-22 22:27:12,097][44357] Updated weights for policy 0, policy_version 2280 (0.0022)
+[2023-02-22 22:27:23,133][44357] Updated weights for policy 0, policy_version 2290 (0.0024)
+[2023-02-22 22:27:36,807][44357] Updated weights for policy 0, policy_version 2300 (0.0027)
+[2023-02-22 22:27:48,016][44357] Updated weights for policy 0, policy_version 2310 (0.0012)
+[2023-02-22 22:28:00,458][44357] Updated weights for policy 0, policy_version 2320 (0.0020)
+[2023-02-22 22:28:14,185][44357] Updated weights for policy 0, policy_version 2330 (0.0023)
+[2023-02-22 22:28:24,436][44357] Updated weights for policy 0, policy_version 2340 (0.0020)
+[2023-02-22 22:28:38,101][44357] Updated weights for policy 0, policy_version 2350 (0.0029)
+[2023-02-22 22:28:50,500][44357] Updated weights for policy 0, policy_version 2360 (0.0011)
+[2023-02-22 22:29:01,834][44357] Updated weights for policy 0, policy_version 2370 (0.0032)
+[2023-02-22 22:29:05,187][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002372_9715712.pth...
+[2023-02-22 22:29:05,405][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002178_8921088.pth
+[2023-02-22 22:29:15,519][44357] Updated weights for policy 0, policy_version 2380 (0.0022)
+[2023-02-22 22:29:26,480][44357] Updated weights for policy 0, policy_version 2390 (0.0013)
+[2023-02-22 22:29:39,172][44357] Updated weights for policy 0, policy_version 2400 (0.0019)
+[2023-02-22 22:29:52,732][44357] Updated weights for policy 0, policy_version 2410 (0.0016)
+[2023-02-22 22:30:02,719][44357] Updated weights for policy 0, policy_version 2420 (0.0013)
+[2023-02-22 22:30:16,279][44357] Updated weights for policy 0, policy_version 2430 (0.0014)
+[2023-02-22 22:30:28,389][44357] Updated weights for policy 0, policy_version 2440 (0.0012)
+[2023-02-22 22:30:33,118][44343] Stopping Batcher_0...
+[2023-02-22 22:30:33,120][44343] Loop batcher_evt_loop terminating...
+[2023-02-22 22:30:33,121][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002443_10006528.pth...
+[2023-02-22 22:30:33,206][44357] Weights refcount: 2 0
+[2023-02-22 22:30:33,224][44357] Stopping InferenceWorker_p0-w0...
+[2023-02-22 22:30:33,225][44357] Loop inference_proc0-0_evt_loop terminating...
+[2023-02-22 22:30:33,256][44343] Removing /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002275_9318400.pth
+[2023-02-22 22:30:33,264][44343] Saving /content/train_dir/doom_deadly_corridor/checkpoint_p0/checkpoint_000002443_10006528.pth...
+[2023-02-22 22:30:33,385][44343] Stopping LearnerWorker_p0...
+[2023-02-22 22:30:33,389][44343] Loop learner_proc0_evt_loop terminating...
+[2023-02-22 22:30:33,492][44359] Stopping RolloutWorker_w1...
+[2023-02-22 22:30:33,502][44359] Loop rollout_proc1_evt_loop terminating...
+[2023-02-22 22:30:33,513][44365] Stopping RolloutWorker_w3...
+[2023-02-22 22:30:33,522][44380] Stopping RolloutWorker_w6...
+[2023-02-22 22:30:33,520][44365] Loop rollout_proc3_evt_loop terminating...
+[2023-02-22 22:30:33,523][44374] Stopping RolloutWorker_w7...
+[2023-02-22 22:30:33,528][44370] Stopping RolloutWorker_w5...
+[2023-02-22 22:30:33,529][44370] Loop rollout_proc5_evt_loop terminating...
+[2023-02-22 22:30:33,535][44374] Loop rollout_proc7_evt_loop terminating...
+[2023-02-22 22:30:33,539][44380] Loop rollout_proc6_evt_loop terminating...
+[2023-02-22 22:30:33,561][44372] Stopping RolloutWorker_w4...
+[2023-02-22 22:30:33,575][44362] Stopping RolloutWorker_w2...
+[2023-02-22 22:30:33,575][44362] Loop rollout_proc2_evt_loop terminating...
+[2023-02-22 22:30:33,590][44358] Stopping RolloutWorker_w0...
+[2023-02-22 22:30:33,590][44358] Loop rollout_proc0_evt_loop terminating...
+[2023-02-22 22:30:33,568][44372] Loop rollout_proc4_evt_loop terminating...