chqmatteo commited on
Commit
44ed953
1 Parent(s): 5dc41f0

Upload . with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1677095050.xps ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c330322bde110c48fd794e25ca7afd07a7146afbcd3c3538a32f9bd6d041a28a
3
+ size 671276
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_health_gathering_supreme
15
+ type: doom_health_gathering_supreme
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 3.94 +/- 0.88
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r chqmatteo/rl_course_vizdoom_health_gathering_supreme
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/best_000000494_2023424_reward_4.772.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7922c48b91278eced8427efe9b5e2c9fa5b64a2984176b863ed33c9bc5710aa
3
+ size 34928614
checkpoint_p0/checkpoint_000000836_3424256.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32521bc9a3fa7390e3847c73aeca68f406c7f6e9ee600c77b516b262fae7c753
3
+ size 34929028
checkpoint_p0/checkpoint_000000978_4005888.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9c97ba1dc391ffd99a08009496bc73529021a7ffdd359bceaafd9db6628623
3
+ size 34929028
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_health_gathering_supreme",
5
+ "experiment": "default_experiment",
6
+ "train_dir": "/mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 4,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 4000000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000",
134
+ "cli_args": {
135
+ "env": "doom_health_gathering_supreme",
136
+ "num_workers": 8,
137
+ "num_envs_per_worker": 4,
138
+ "train_for_env_steps": 4000000
139
+ },
140
+ "git_hash": "372eb1042c1a2a82a2684e1795d47eaa26c046f7",
141
+ "git_repo_name": "https://github.com/huggingface/deep-rl-class.git"
142
+ }
git.diff ADDED
File without changes
sf_log.txt ADDED
@@ -0,0 +1,865 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2023-02-22 19:44:15,206][06183] Saving configuration to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json...
2
+ [2023-02-22 19:44:16,254][06183] Rollout worker 0 uses device cpu
3
+ [2023-02-22 19:44:16,257][06183] Rollout worker 1 uses device cpu
4
+ [2023-02-22 19:44:16,261][06183] Rollout worker 2 uses device cpu
5
+ [2023-02-22 19:44:16,263][06183] Rollout worker 3 uses device cpu
6
+ [2023-02-22 19:44:16,266][06183] Rollout worker 4 uses device cpu
7
+ [2023-02-22 19:44:16,269][06183] Rollout worker 5 uses device cpu
8
+ [2023-02-22 19:44:16,273][06183] Rollout worker 6 uses device cpu
9
+ [2023-02-22 19:44:16,276][06183] Rollout worker 7 uses device cpu
10
+ [2023-02-22 19:44:16,339][06183] Using GPUs [0] for process 0 (actually maps to GPUs [0])
11
+ [2023-02-22 19:44:16,341][06183] InferenceWorker_p0-w0: min num requests: 2
12
+ [2023-02-22 19:44:16,370][06183] Starting all processes...
13
+ [2023-02-22 19:44:16,372][06183] Starting process learner_proc0
14
+ [2023-02-22 19:44:16,762][06183] Starting all processes...
15
+ [2023-02-22 19:44:16,775][06183] Starting process inference_proc0-0
16
+ [2023-02-22 19:44:16,776][06183] Starting process rollout_proc0
17
+ [2023-02-22 19:44:16,776][06183] Starting process rollout_proc1
18
+ [2023-02-22 19:44:16,778][06183] Starting process rollout_proc2
19
+ [2023-02-22 19:44:16,779][06183] Starting process rollout_proc3
20
+ [2023-02-22 19:44:16,781][06183] Starting process rollout_proc4
21
+ [2023-02-22 19:44:16,897][06183] Starting process rollout_proc5
22
+ [2023-02-22 19:44:16,898][06183] Starting process rollout_proc6
23
+ [2023-02-22 19:44:16,900][06183] Starting process rollout_proc7
24
+ [2023-02-22 19:44:20,639][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
25
+ [2023-02-22 19:44:20,640][14984] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
26
+ [2023-02-22 19:44:20,772][14984] Num visible devices: 1
27
+ [2023-02-22 19:44:20,793][14984] Starting seed is not provided
28
+ [2023-02-22 19:44:20,795][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
29
+ [2023-02-22 19:44:20,796][14984] Initializing actor-critic model on device cuda:0
30
+ [2023-02-22 19:44:20,799][14984] RunningMeanStd input shape: (3, 72, 128)
31
+ [2023-02-22 19:44:20,803][14984] RunningMeanStd input shape: (1,)
32
+ [2023-02-22 19:44:20,823][14984] ConvEncoder: input_channels=3
33
+ [2023-02-22 19:44:20,829][15003] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
34
+ [2023-02-22 19:44:20,854][15000] Using GPUs [0] for process 0 (actually maps to GPUs [0])
35
+ [2023-02-22 19:44:20,855][15000] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
36
+ [2023-02-22 19:44:20,898][15001] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
37
+ [2023-02-22 19:44:20,926][15000] Num visible devices: 1
38
+ [2023-02-22 19:44:20,959][15005] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
39
+ [2023-02-22 19:44:20,960][15008] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
40
+ [2023-02-22 19:44:20,963][15004] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
41
+ [2023-02-22 19:44:21,060][15007] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
42
+ [2023-02-22 19:44:21,199][15006] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
43
+ [2023-02-22 19:44:21,913][15002] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
44
+ [2023-02-22 19:44:23,056][14984] Conv encoder output size: 512
45
+ [2023-02-22 19:44:23,057][14984] Policy head output size: 512
46
+ [2023-02-22 19:44:23,084][14984] Created Actor Critic model with architecture:
47
+ [2023-02-22 19:44:23,085][14984] ActorCriticSharedWeights(
48
+ (obs_normalizer): ObservationNormalizer(
49
+ (running_mean_std): RunningMeanStdDictInPlace(
50
+ (running_mean_std): ModuleDict(
51
+ (obs): RunningMeanStdInPlace()
52
+ )
53
+ )
54
+ )
55
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
56
+ (encoder): VizdoomEncoder(
57
+ (basic_encoder): ConvEncoder(
58
+ (enc): RecursiveScriptModule(
59
+ original_name=ConvEncoderImpl
60
+ (conv_head): RecursiveScriptModule(
61
+ original_name=Sequential
62
+ (0): RecursiveScriptModule(original_name=Conv2d)
63
+ (1): RecursiveScriptModule(original_name=ELU)
64
+ (2): RecursiveScriptModule(original_name=Conv2d)
65
+ (3): RecursiveScriptModule(original_name=ELU)
66
+ (4): RecursiveScriptModule(original_name=Conv2d)
67
+ (5): RecursiveScriptModule(original_name=ELU)
68
+ )
69
+ (mlp_layers): RecursiveScriptModule(
70
+ original_name=Sequential
71
+ (0): RecursiveScriptModule(original_name=Linear)
72
+ (1): RecursiveScriptModule(original_name=ELU)
73
+ )
74
+ )
75
+ )
76
+ )
77
+ (core): ModelCoreRNN(
78
+ (core): GRU(512, 512)
79
+ )
80
+ (decoder): MlpDecoder(
81
+ (mlp): Identity()
82
+ )
83
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
84
+ (action_parameterization): ActionParameterizationDefault(
85
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
86
+ )
87
+ )
88
+ [2023-02-22 19:44:29,155][14984] Using optimizer <class 'torch.optim.adam.Adam'>
89
+ [2023-02-22 19:44:29,181][14984] No checkpoints found
90
+ [2023-02-22 19:44:29,183][14984] Did not load from checkpoint, starting from scratch!
91
+ [2023-02-22 19:44:29,186][14984] Initialized policy 0 weights for model version 0
92
+ [2023-02-22 19:44:29,202][14984] LearnerWorker_p0 finished initialization!
93
+ [2023-02-22 19:44:29,203][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
94
+ [2023-02-22 19:44:29,445][15000] RunningMeanStd input shape: (3, 72, 128)
95
+ [2023-02-22 19:44:29,447][15000] RunningMeanStd input shape: (1,)
96
+ [2023-02-22 19:44:29,460][15000] ConvEncoder: input_channels=3
97
+ [2023-02-22 19:44:29,587][15000] Conv encoder output size: 512
98
+ [2023-02-22 19:44:29,589][15000] Policy head output size: 512
99
+ [2023-02-22 19:44:30,328][06183] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
100
+ [2023-02-22 19:44:33,566][06183] Inference worker 0-0 is ready!
101
+ [2023-02-22 19:44:33,568][06183] All inference workers are ready! Signal rollout workers to start!
102
+ [2023-02-22 19:44:33,613][15001] Doom resolution: 160x120, resize resolution: (128, 72)
103
+ [2023-02-22 19:44:33,613][15008] Doom resolution: 160x120, resize resolution: (128, 72)
104
+ [2023-02-22 19:44:33,614][15006] Doom resolution: 160x120, resize resolution: (128, 72)
105
+ [2023-02-22 19:44:33,615][15002] Doom resolution: 160x120, resize resolution: (128, 72)
106
+ [2023-02-22 19:44:33,616][15003] Doom resolution: 160x120, resize resolution: (128, 72)
107
+ [2023-02-22 19:44:33,618][15007] Doom resolution: 160x120, resize resolution: (128, 72)
108
+ [2023-02-22 19:44:33,620][15004] Doom resolution: 160x120, resize resolution: (128, 72)
109
+ [2023-02-22 19:44:33,622][15005] Doom resolution: 160x120, resize resolution: (128, 72)
110
+ [2023-02-22 19:44:34,106][15005] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process...
111
+ [2023-02-22 19:44:34,108][15005] EvtLoop [rollout_proc4_evt_loop, process=rollout_proc4] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=()
112
+ Traceback (most recent call last):
113
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 228, in _game_init
114
+ self.game.init()
115
+ vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.
116
+
117
+ During handling of the above exception, another exception occurred:
118
+
119
+ Traceback (most recent call last):
120
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal
121
+ slot_callable(*args)
122
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 150, in init
123
+ env_runner.init(self.timing)
124
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 418, in init
125
+ self._reset()
126
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 430, in _reset
127
+ observations, info = e.reset(seed=seed) # new way of doing seeding since Gym 0.26.0
128
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 323, in reset
129
+ return self.env.reset(**kwargs)
130
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 125, in reset
131
+ obs, info = self.env.reset(**kwargs)
132
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 110, in reset
133
+ obs, info = self.env.reset(**kwargs)
134
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 30, in reset
135
+ return self.env.reset(**kwargs)
136
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 379, in reset
137
+ obs, info = self.env.reset(**kwargs)
138
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/envs/env_wrappers.py", line 84, in reset
139
+ obs, info = self.env.reset(**kwargs)
140
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 323, in reset
141
+ return self.env.reset(**kwargs)
142
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 51, in reset
143
+ return self.env.reset(**kwargs)
144
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 323, in reset
145
+ self._ensure_initialized()
146
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 274, in _ensure_initialized
147
+ self.initialize()
148
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 269, in initialize
149
+ self._game_init()
150
+ File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 244, in _game_init
151
+ raise EnvCriticalError()
152
+ sample_factory.envs.env_utils.EnvCriticalError
153
+ [2023-02-22 19:44:34,110][15005] Unhandled exception in evt loop rollout_proc4_evt_loop
154
+ [2023-02-22 19:44:35,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
155
+ [2023-02-22 19:44:36,332][06183] Heartbeat connected on Batcher_0
156
+ [2023-02-22 19:44:36,335][06183] Heartbeat connected on LearnerWorker_p0
157
+ [2023-02-22 19:44:36,369][06183] Heartbeat connected on InferenceWorker_p0-w0
158
+ [2023-02-22 19:44:37,192][15006] Decorrelating experience for 0 frames...
159
+ [2023-02-22 19:44:37,192][15001] Decorrelating experience for 0 frames...
160
+ [2023-02-22 19:44:37,192][15002] Decorrelating experience for 0 frames...
161
+ [2023-02-22 19:44:37,192][15004] Decorrelating experience for 0 frames...
162
+ [2023-02-22 19:44:37,193][15008] Decorrelating experience for 0 frames...
163
+ [2023-02-22 19:44:37,921][15006] Decorrelating experience for 32 frames...
164
+ [2023-02-22 19:44:37,923][15008] Decorrelating experience for 32 frames...
165
+ [2023-02-22 19:44:37,928][15004] Decorrelating experience for 32 frames...
166
+ [2023-02-22 19:44:37,928][15001] Decorrelating experience for 32 frames...
167
+ [2023-02-22 19:44:37,929][15002] Decorrelating experience for 32 frames...
168
+ [2023-02-22 19:44:37,934][15007] Decorrelating experience for 0 frames...
169
+ [2023-02-22 19:44:38,003][15003] Decorrelating experience for 0 frames...
170
+ [2023-02-22 19:44:38,768][15007] Decorrelating experience for 32 frames...
171
+ [2023-02-22 19:44:38,809][15004] Decorrelating experience for 64 frames...
172
+ [2023-02-22 19:44:38,814][15008] Decorrelating experience for 64 frames...
173
+ [2023-02-22 19:44:38,851][15003] Decorrelating experience for 32 frames...
174
+ [2023-02-22 19:44:38,882][15006] Decorrelating experience for 64 frames...
175
+ [2023-02-22 19:44:39,585][15007] Decorrelating experience for 64 frames...
176
+ [2023-02-22 19:44:39,604][15002] Decorrelating experience for 64 frames...
177
+ [2023-02-22 19:44:39,646][15004] Decorrelating experience for 96 frames...
178
+ [2023-02-22 19:44:39,647][15001] Decorrelating experience for 64 frames...
179
+ [2023-02-22 19:44:39,675][15003] Decorrelating experience for 64 frames...
180
+ [2023-02-22 19:44:39,714][06183] Heartbeat connected on RolloutWorker_w2
181
+ [2023-02-22 19:44:40,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
182
+ [2023-02-22 19:44:40,419][15007] Decorrelating experience for 96 frames...
183
+ [2023-02-22 19:44:40,421][15008] Decorrelating experience for 96 frames...
184
+ [2023-02-22 19:44:40,464][15002] Decorrelating experience for 96 frames...
185
+ [2023-02-22 19:44:40,490][15006] Decorrelating experience for 96 frames...
186
+ [2023-02-22 19:44:40,516][06183] Heartbeat connected on RolloutWorker_w7
187
+ [2023-02-22 19:44:40,536][06183] Heartbeat connected on RolloutWorker_w5
188
+ [2023-02-22 19:44:40,559][06183] Heartbeat connected on RolloutWorker_w1
189
+ [2023-02-22 19:44:40,599][06183] Heartbeat connected on RolloutWorker_w6
190
+ [2023-02-22 19:44:41,183][15003] Decorrelating experience for 96 frames...
191
+ [2023-02-22 19:44:41,253][15001] Decorrelating experience for 96 frames...
192
+ [2023-02-22 19:44:41,272][06183] Heartbeat connected on RolloutWorker_w3
193
+ [2023-02-22 19:44:41,345][06183] Heartbeat connected on RolloutWorker_w0
194
+ [2023-02-22 19:44:45,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 2.1. Samples: 32. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
195
+ [2023-02-22 19:44:45,332][06183] Avg episode reward: [(0, '1.914')]
196
+ [2023-02-22 19:44:45,876][14984] Signal inference workers to stop experience collection...
197
+ [2023-02-22 19:44:45,884][15000] InferenceWorker_p0-w0: stopping experience collection
198
+ [2023-02-22 19:44:49,504][14984] Signal inference workers to resume experience collection...
199
+ [2023-02-22 19:44:49,506][15000] InferenceWorker_p0-w0: resuming experience collection
200
+ [2023-02-22 19:44:50,328][06183] Fps is (10 sec: 409.6, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 4096. Throughput: 0: 147.1. Samples: 2942. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
201
+ [2023-02-22 19:44:50,334][06183] Avg episode reward: [(0, '3.108')]
202
+ [2023-02-22 19:44:54,506][15000] Updated weights for policy 0, policy_version 10 (0.0364)
203
+ [2023-02-22 19:44:55,328][06183] Fps is (10 sec: 4505.6, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 45056. Throughput: 0: 454.3. Samples: 11358. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
204
+ [2023-02-22 19:44:55,330][06183] Avg episode reward: [(0, '4.406')]
205
+ [2023-02-22 19:44:59,586][15000] Updated weights for policy 0, policy_version 20 (0.0015)
206
+ [2023-02-22 19:45:00,329][06183] Fps is (10 sec: 8191.6, 60 sec: 2867.1, 300 sec: 2867.1). Total num frames: 86016. Throughput: 0: 568.7. Samples: 17060. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
207
+ [2023-02-22 19:45:00,332][06183] Avg episode reward: [(0, '4.412')]
208
+ [2023-02-22 19:45:04,441][15000] Updated weights for policy 0, policy_version 30 (0.0012)
209
+ [2023-02-22 19:45:05,328][06183] Fps is (10 sec: 8192.0, 60 sec: 3627.9, 300 sec: 3627.9). Total num frames: 126976. Throughput: 0: 849.7. Samples: 29740. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
210
+ [2023-02-22 19:45:05,331][06183] Avg episode reward: [(0, '4.560')]
211
+ [2023-02-22 19:45:05,400][14984] Saving new best policy, reward=4.560!
212
+ [2023-02-22 19:45:09,187][15000] Updated weights for policy 0, policy_version 40 (0.0010)
213
+ [2023-02-22 19:45:10,328][06183] Fps is (10 sec: 8601.9, 60 sec: 4300.8, 300 sec: 4300.8). Total num frames: 172032. Throughput: 0: 1068.5. Samples: 42740. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
214
+ [2023-02-22 19:45:10,332][06183] Avg episode reward: [(0, '4.466')]
215
+ [2023-02-22 19:45:14,107][15000] Updated weights for policy 0, policy_version 50 (0.0017)
216
+ [2023-02-22 19:45:15,328][06183] Fps is (10 sec: 8601.7, 60 sec: 4733.2, 300 sec: 4733.2). Total num frames: 212992. Throughput: 0: 1085.6. Samples: 48854. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
217
+ [2023-02-22 19:45:15,330][06183] Avg episode reward: [(0, '4.413')]
218
+ [2023-02-22 19:45:18,942][15000] Updated weights for policy 0, policy_version 60 (0.0011)
219
+ [2023-02-22 19:45:20,328][06183] Fps is (10 sec: 8601.7, 60 sec: 5160.9, 300 sec: 5160.9). Total num frames: 258048. Throughput: 0: 1371.8. Samples: 61732. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
220
+ [2023-02-22 19:45:20,331][06183] Avg episode reward: [(0, '4.563')]
221
+ [2023-02-22 19:45:20,341][14984] Saving new best policy, reward=4.563!
222
+ [2023-02-22 19:45:23,692][15000] Updated weights for policy 0, policy_version 70 (0.0013)
223
+ [2023-02-22 19:45:25,328][06183] Fps is (10 sec: 8601.5, 60 sec: 5436.5, 300 sec: 5436.5). Total num frames: 299008. Throughput: 0: 1648.9. Samples: 74202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
224
+ [2023-02-22 19:45:25,332][06183] Avg episode reward: [(0, '4.329')]
225
+ [2023-02-22 19:45:28,664][15000] Updated weights for policy 0, policy_version 80 (0.0011)
226
+ [2023-02-22 19:45:30,328][06183] Fps is (10 sec: 8192.2, 60 sec: 5666.1, 300 sec: 5666.1). Total num frames: 339968. Throughput: 0: 1788.4. Samples: 80512. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
227
+ [2023-02-22 19:45:30,330][06183] Avg episode reward: [(0, '4.295')]
228
+ [2023-02-22 19:45:33,515][15000] Updated weights for policy 0, policy_version 90 (0.0009)
229
+ [2023-02-22 19:45:35,328][06183] Fps is (10 sec: 8192.1, 60 sec: 6348.8, 300 sec: 5860.4). Total num frames: 380928. Throughput: 0: 2007.0. Samples: 93258. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
230
+ [2023-02-22 19:45:35,330][06183] Avg episode reward: [(0, '4.407')]
231
+ [2023-02-22 19:45:38,572][15000] Updated weights for policy 0, policy_version 100 (0.0012)
232
+ [2023-02-22 19:45:40,328][06183] Fps is (10 sec: 8191.9, 60 sec: 7031.5, 300 sec: 6027.0). Total num frames: 421888. Throughput: 0: 2093.5. Samples: 105566. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
233
+ [2023-02-22 19:45:40,331][06183] Avg episode reward: [(0, '4.493')]
234
+ [2023-02-22 19:45:43,650][15000] Updated weights for policy 0, policy_version 110 (0.0014)
235
+ [2023-02-22 19:45:45,328][06183] Fps is (10 sec: 8192.0, 60 sec: 7714.1, 300 sec: 6171.3). Total num frames: 462848. Throughput: 0: 2099.2. Samples: 111522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
236
+ [2023-02-22 19:45:45,331][06183] Avg episode reward: [(0, '4.525')]
237
+ [2023-02-22 19:45:48,756][15000] Updated weights for policy 0, policy_version 120 (0.0012)
238
+ [2023-02-22 19:45:50,328][06183] Fps is (10 sec: 7782.4, 60 sec: 8260.3, 300 sec: 6246.4). Total num frames: 499712. Throughput: 0: 2078.4. Samples: 123266. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
239
+ [2023-02-22 19:45:50,331][06183] Avg episode reward: [(0, '4.624')]
240
+ [2023-02-22 19:45:50,477][14984] Saving new best policy, reward=4.624!
241
+ [2023-02-22 19:45:54,234][15000] Updated weights for policy 0, policy_version 130 (0.0015)
242
+ [2023-02-22 19:45:55,328][06183] Fps is (10 sec: 7782.3, 60 sec: 8260.3, 300 sec: 6360.8). Total num frames: 540672. Throughput: 0: 2043.6. Samples: 134702. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
243
+ [2023-02-22 19:45:55,332][06183] Avg episode reward: [(0, '4.484')]
244
+ [2023-02-22 19:45:59,467][15000] Updated weights for policy 0, policy_version 140 (0.0014)
245
+ [2023-02-22 19:46:00,328][06183] Fps is (10 sec: 7782.3, 60 sec: 8192.1, 300 sec: 6417.1). Total num frames: 577536. Throughput: 0: 2037.4. Samples: 140538. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
246
+ [2023-02-22 19:46:00,331][06183] Avg episode reward: [(0, '4.447')]
247
+ [2023-02-22 19:46:04,827][15000] Updated weights for policy 0, policy_version 150 (0.0021)
248
+ [2023-02-22 19:46:05,328][06183] Fps is (10 sec: 7372.9, 60 sec: 8123.7, 300 sec: 6467.4). Total num frames: 614400. Throughput: 0: 2006.8. Samples: 152036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
249
+ [2023-02-22 19:46:05,332][06183] Avg episode reward: [(0, '4.358')]
250
+ [2023-02-22 19:46:10,160][15000] Updated weights for policy 0, policy_version 160 (0.0017)
251
+ [2023-02-22 19:46:10,328][06183] Fps is (10 sec: 7782.4, 60 sec: 8055.5, 300 sec: 6553.6). Total num frames: 655360. Throughput: 0: 1985.4. Samples: 163544. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
252
+ [2023-02-22 19:46:10,331][06183] Avg episode reward: [(0, '4.399')]
253
+ [2023-02-22 19:46:10,354][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000160_655360.pth...
254
+ [2023-02-22 19:46:15,328][06183] Fps is (10 sec: 7782.3, 60 sec: 7987.2, 300 sec: 6592.6). Total num frames: 692224. Throughput: 0: 1978.1. Samples: 169526. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
255
+ [2023-02-22 19:46:15,331][06183] Avg episode reward: [(0, '4.615')]
256
+ [2023-02-22 19:46:15,414][15000] Updated weights for policy 0, policy_version 170 (0.0019)
257
+ [2023-02-22 19:46:20,328][06183] Fps is (10 sec: 7782.4, 60 sec: 7918.9, 300 sec: 6665.3). Total num frames: 733184. Throughput: 0: 1950.5. Samples: 181032. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
258
+ [2023-02-22 19:46:20,331][06183] Avg episode reward: [(0, '4.568')]
259
+ [2023-02-22 19:46:20,725][15000] Updated weights for policy 0, policy_version 180 (0.0012)
260
+ [2023-02-22 19:46:25,328][06183] Fps is (10 sec: 7372.8, 60 sec: 7782.4, 300 sec: 6660.4). Total num frames: 765952. Throughput: 0: 1912.4. Samples: 191622. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
261
+ [2023-02-22 19:46:25,333][06183] Avg episode reward: [(0, '4.507')]
262
+ [2023-02-22 19:46:26,853][15000] Updated weights for policy 0, policy_version 190 (0.0017)
263
+ [2023-02-22 19:46:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7645.8, 300 sec: 6656.0). Total num frames: 798720. Throughput: 0: 1887.3. Samples: 196452. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
264
+ [2023-02-22 19:46:30,332][06183] Avg episode reward: [(0, '4.383')]
265
+ [2023-02-22 19:46:33,084][15000] Updated weights for policy 0, policy_version 200 (0.0023)
266
+ [2023-02-22 19:46:35,328][06183] Fps is (10 sec: 6553.7, 60 sec: 7509.3, 300 sec: 6651.9). Total num frames: 831488. Throughput: 0: 1848.0. Samples: 206428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
267
+ [2023-02-22 19:46:35,332][06183] Avg episode reward: [(0, '4.311')]
268
+ [2023-02-22 19:46:39,210][15000] Updated weights for policy 0, policy_version 210 (0.0020)
269
+ [2023-02-22 19:46:40,328][06183] Fps is (10 sec: 6963.3, 60 sec: 7441.1, 300 sec: 6679.6). Total num frames: 868352. Throughput: 0: 1817.5. Samples: 216490. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
270
+ [2023-02-22 19:46:40,332][06183] Avg episode reward: [(0, '4.257')]
271
+ [2023-02-22 19:46:45,328][06183] Fps is (10 sec: 6553.5, 60 sec: 7236.2, 300 sec: 6644.6). Total num frames: 897024. Throughput: 0: 1795.2. Samples: 221324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
272
+ [2023-02-22 19:46:45,332][06183] Avg episode reward: [(0, '4.425')]
273
+ [2023-02-22 19:46:45,856][15000] Updated weights for policy 0, policy_version 220 (0.0026)
274
+ [2023-02-22 19:46:50,329][06183] Fps is (10 sec: 5734.2, 60 sec: 7099.7, 300 sec: 6612.1). Total num frames: 925696. Throughput: 0: 1726.7. Samples: 229738. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
275
+ [2023-02-22 19:46:50,334][06183] Avg episode reward: [(0, '4.426')]
276
+ [2023-02-22 19:46:53,358][15000] Updated weights for policy 0, policy_version 230 (0.0032)
277
+ [2023-02-22 19:46:55,329][06183] Fps is (10 sec: 5734.1, 60 sec: 6894.9, 300 sec: 6581.8). Total num frames: 954368. Throughput: 0: 1659.2. Samples: 238210. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
278
+ [2023-02-22 19:46:55,333][06183] Avg episode reward: [(0, '4.467')]
279
+ [2023-02-22 19:46:59,818][15000] Updated weights for policy 0, policy_version 240 (0.0017)
280
+ [2023-02-22 19:47:00,328][06183] Fps is (10 sec: 5734.5, 60 sec: 6758.4, 300 sec: 6553.6). Total num frames: 983040. Throughput: 0: 1633.1. Samples: 243016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
281
+ [2023-02-22 19:47:00,333][06183] Avg episode reward: [(0, '4.537')]
282
+ [2023-02-22 19:47:05,328][06183] Fps is (10 sec: 5734.8, 60 sec: 6621.9, 300 sec: 6527.2). Total num frames: 1011712. Throughput: 0: 1578.8. Samples: 252080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
283
+ [2023-02-22 19:47:05,332][06183] Avg episode reward: [(0, '4.393')]
284
+ [2023-02-22 19:47:06,684][15000] Updated weights for policy 0, policy_version 250 (0.0026)
285
+ [2023-02-22 19:47:10,328][06183] Fps is (10 sec: 6144.0, 60 sec: 6485.3, 300 sec: 6528.0). Total num frames: 1044480. Throughput: 0: 1543.8. Samples: 261092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
286
+ [2023-02-22 19:47:10,332][06183] Avg episode reward: [(0, '4.500')]
287
+ [2023-02-22 19:47:13,877][15000] Updated weights for policy 0, policy_version 260 (0.0034)
288
+ [2023-02-22 19:47:15,329][06183] Fps is (10 sec: 5734.1, 60 sec: 6280.5, 300 sec: 6479.1). Total num frames: 1069056. Throughput: 0: 1528.6. Samples: 265238. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
289
+ [2023-02-22 19:47:15,336][06183] Avg episode reward: [(0, '4.446')]
290
+ [2023-02-22 19:47:20,329][06183] Fps is (10 sec: 4505.5, 60 sec: 5939.2, 300 sec: 6409.0). Total num frames: 1089536. Throughput: 0: 1465.9. Samples: 272394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
291
+ [2023-02-22 19:47:20,335][06183] Avg episode reward: [(0, '4.503')]
292
+ [2023-02-22 19:47:22,727][15000] Updated weights for policy 0, policy_version 270 (0.0038)
293
+ [2023-02-22 19:47:25,329][06183] Fps is (10 sec: 4505.7, 60 sec: 5802.6, 300 sec: 6366.3). Total num frames: 1114112. Throughput: 0: 1393.6. Samples: 279204. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
294
+ [2023-02-22 19:47:25,343][06183] Avg episode reward: [(0, '4.563')]
295
+ [2023-02-22 19:47:30,331][06183] Fps is (10 sec: 4095.2, 60 sec: 5529.4, 300 sec: 6280.5). Total num frames: 1130496. Throughput: 0: 1344.1. Samples: 281812. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
296
+ [2023-02-22 19:47:30,337][06183] Avg episode reward: [(0, '4.578')]
297
+ [2023-02-22 19:47:34,132][15000] Updated weights for policy 0, policy_version 280 (0.0075)
298
+ [2023-02-22 19:47:35,329][06183] Fps is (10 sec: 3686.4, 60 sec: 5324.7, 300 sec: 6221.5). Total num frames: 1150976. Throughput: 0: 1267.7. Samples: 286786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
299
+ [2023-02-22 19:47:35,332][06183] Avg episode reward: [(0, '4.419')]
300
+ [2023-02-22 19:47:40,329][06183] Fps is (10 sec: 4506.3, 60 sec: 5119.9, 300 sec: 6187.1). Total num frames: 1175552. Throughput: 0: 1232.6. Samples: 293676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
301
+ [2023-02-22 19:47:40,334][06183] Avg episode reward: [(0, '4.364')]
302
+ [2023-02-22 19:47:43,155][15000] Updated weights for policy 0, policy_version 290 (0.0041)
303
+ [2023-02-22 19:47:45,329][06183] Fps is (10 sec: 4505.7, 60 sec: 4983.4, 300 sec: 6133.5). Total num frames: 1196032. Throughput: 0: 1201.9. Samples: 297102. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
304
+ [2023-02-22 19:47:45,333][06183] Avg episode reward: [(0, '4.416')]
305
+ [2023-02-22 19:47:50,329][06183] Fps is (10 sec: 4096.1, 60 sec: 4846.9, 300 sec: 6082.5). Total num frames: 1216512. Throughput: 0: 1132.1. Samples: 303026. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
306
+ [2023-02-22 19:47:50,341][06183] Avg episode reward: [(0, '4.404')]
307
+ [2023-02-22 19:47:53,335][15000] Updated weights for policy 0, policy_version 300 (0.0062)
308
+ [2023-02-22 19:47:55,329][06183] Fps is (10 sec: 4096.0, 60 sec: 4710.4, 300 sec: 6034.1). Total num frames: 1236992. Throughput: 0: 1073.8. Samples: 309412. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
309
+ [2023-02-22 19:47:55,333][06183] Avg episode reward: [(0, '4.453')]
310
+ [2023-02-22 19:48:00,329][06183] Fps is (10 sec: 4915.2, 60 sec: 4710.4, 300 sec: 6027.0). Total num frames: 1265664. Throughput: 0: 1071.2. Samples: 313444. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
311
+ [2023-02-22 19:48:00,337][06183] Avg episode reward: [(0, '4.269')]
312
+ [2023-02-22 19:48:01,035][15000] Updated weights for policy 0, policy_version 310 (0.0037)
313
+ [2023-02-22 19:48:05,328][06183] Fps is (10 sec: 5324.8, 60 sec: 4642.1, 300 sec: 6001.1). Total num frames: 1290240. Throughput: 0: 1082.8. Samples: 321122. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
314
+ [2023-02-22 19:48:05,334][06183] Avg episode reward: [(0, '4.324')]
315
+ [2023-02-22 19:48:09,841][15000] Updated weights for policy 0, policy_version 320 (0.0035)
316
+ [2023-02-22 19:48:10,328][06183] Fps is (10 sec: 4505.7, 60 sec: 4437.3, 300 sec: 5957.8). Total num frames: 1310720. Throughput: 0: 1086.6. Samples: 328102. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
317
+ [2023-02-22 19:48:10,333][06183] Avg episode reward: [(0, '4.457')]
318
+ [2023-02-22 19:48:10,378][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000320_1310720.pth...
319
+ [2023-02-22 19:48:15,328][06183] Fps is (10 sec: 4505.6, 60 sec: 4437.4, 300 sec: 5934.6). Total num frames: 1335296. Throughput: 0: 1111.8. Samples: 331840. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
320
+ [2023-02-22 19:48:15,333][06183] Avg episode reward: [(0, '4.421')]
321
+ [2023-02-22 19:48:19,348][15000] Updated weights for policy 0, policy_version 330 (0.0053)
322
+ [2023-02-22 19:48:20,329][06183] Fps is (10 sec: 4505.3, 60 sec: 4437.3, 300 sec: 5894.7). Total num frames: 1355776. Throughput: 0: 1137.7. Samples: 337982. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
323
+ [2023-02-22 19:48:20,334][06183] Avg episode reward: [(0, '4.440')]
324
+ [2023-02-22 19:48:25,329][06183] Fps is (10 sec: 4095.8, 60 sec: 4369.0, 300 sec: 5856.4). Total num frames: 1376256. Throughput: 0: 1117.7. Samples: 343972. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0)
325
+ [2023-02-22 19:48:25,343][06183] Avg episode reward: [(0, '4.406')]
326
+ [2023-02-22 19:48:28,754][15000] Updated weights for policy 0, policy_version 340 (0.0045)
327
+ [2023-02-22 19:48:30,329][06183] Fps is (10 sec: 4096.3, 60 sec: 4437.5, 300 sec: 5819.7). Total num frames: 1396736. Throughput: 0: 1115.6. Samples: 347306. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
328
+ [2023-02-22 19:48:30,333][06183] Avg episode reward: [(0, '4.419')]
329
+ [2023-02-22 19:48:35,329][06183] Fps is (10 sec: 4505.8, 60 sec: 4505.6, 300 sec: 5801.3). Total num frames: 1421312. Throughput: 0: 1149.8. Samples: 354766. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
330
+ [2023-02-22 19:48:35,333][06183] Avg episode reward: [(0, '4.488')]
331
+ [2023-02-22 19:48:37,340][15000] Updated weights for policy 0, policy_version 350 (0.0039)
332
+ [2023-02-22 19:48:40,329][06183] Fps is (10 sec: 4505.5, 60 sec: 4437.3, 300 sec: 5767.2). Total num frames: 1441792. Throughput: 0: 1151.1. Samples: 361214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
333
+ [2023-02-22 19:48:40,343][06183] Avg episode reward: [(0, '4.412')]
334
+ [2023-02-22 19:48:45,328][06183] Fps is (10 sec: 4096.1, 60 sec: 4437.4, 300 sec: 5734.4). Total num frames: 1462272. Throughput: 0: 1120.4. Samples: 363862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
335
+ [2023-02-22 19:48:45,334][06183] Avg episode reward: [(0, '4.358')]
336
+ [2023-02-22 19:48:48,069][15000] Updated weights for policy 0, policy_version 360 (0.0038)
337
+ [2023-02-22 19:48:50,328][06183] Fps is (10 sec: 4096.2, 60 sec: 4437.3, 300 sec: 5702.9). Total num frames: 1482752. Throughput: 0: 1088.8. Samples: 370116. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
338
+ [2023-02-22 19:48:50,336][06183] Avg episode reward: [(0, '4.525')]
339
+ [2023-02-22 19:48:55,329][06183] Fps is (10 sec: 4095.7, 60 sec: 4437.3, 300 sec: 5672.6). Total num frames: 1503232. Throughput: 0: 1070.1. Samples: 376256. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
340
+ [2023-02-22 19:48:55,348][06183] Avg episode reward: [(0, '4.419')]
341
+ [2023-02-22 19:48:58,116][15000] Updated weights for policy 0, policy_version 370 (0.0050)
342
+ [2023-02-22 19:49:00,329][06183] Fps is (10 sec: 4095.8, 60 sec: 4300.8, 300 sec: 5643.4). Total num frames: 1523712. Throughput: 0: 1050.0. Samples: 379090. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
343
+ [2023-02-22 19:49:00,340][06183] Avg episode reward: [(0, '4.281')]
344
+ [2023-02-22 19:49:05,329][06183] Fps is (10 sec: 4096.1, 60 sec: 4232.5, 300 sec: 5615.2). Total num frames: 1544192. Throughput: 0: 1053.6. Samples: 385394. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
345
+ [2023-02-22 19:49:05,339][06183] Avg episode reward: [(0, '4.351')]
346
+ [2023-02-22 19:49:10,328][06183] Fps is (10 sec: 2867.4, 60 sec: 4027.7, 300 sec: 5544.2). Total num frames: 1552384. Throughput: 0: 1005.1. Samples: 389200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
347
+ [2023-02-22 19:49:10,334][06183] Avg episode reward: [(0, '4.375')]
348
+ [2023-02-22 19:49:10,896][15000] Updated weights for policy 0, policy_version 380 (0.0070)
349
+ [2023-02-22 19:49:15,329][06183] Fps is (10 sec: 2048.0, 60 sec: 3822.9, 300 sec: 5490.1). Total num frames: 1564672. Throughput: 0: 963.8. Samples: 390676. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
350
+ [2023-02-22 19:49:15,339][06183] Avg episode reward: [(0, '4.489')]
351
+ [2023-02-22 19:49:20,329][06183] Fps is (10 sec: 2867.1, 60 sec: 3754.7, 300 sec: 5451.9). Total num frames: 1581056. Throughput: 0: 895.7. Samples: 395074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
352
+ [2023-02-22 19:49:20,347][06183] Avg episode reward: [(0, '4.518')]
353
+ [2023-02-22 19:49:24,344][15000] Updated weights for policy 0, policy_version 390 (0.0063)
354
+ [2023-02-22 19:49:25,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 5415.0). Total num frames: 1597440. Throughput: 0: 864.5. Samples: 400116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
355
+ [2023-02-22 19:49:25,341][06183] Avg episode reward: [(0, '4.529')]
356
+ [2023-02-22 19:49:30,329][06183] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 5470.6). Total num frames: 1613824. Throughput: 0: 859.3. Samples: 402530. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
357
+ [2023-02-22 19:49:30,337][06183] Avg episode reward: [(0, '4.408')]
358
+ [2023-02-22 19:49:35,329][06183] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 5526.1). Total num frames: 1630208. Throughput: 0: 823.4. Samples: 407170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
359
+ [2023-02-22 19:49:35,340][06183] Avg episode reward: [(0, '4.425')]
360
+ [2023-02-22 19:49:37,868][15000] Updated weights for policy 0, policy_version 400 (0.0061)
361
+ [2023-02-22 19:49:40,330][06183] Fps is (10 sec: 2866.9, 60 sec: 3345.0, 300 sec: 5567.8). Total num frames: 1642496. Throughput: 0: 784.7. Samples: 411570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
362
+ [2023-02-22 19:49:40,341][06183] Avg episode reward: [(0, '4.559')]
363
+ [2023-02-22 19:49:45,329][06183] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 5609.4). Total num frames: 1658880. Throughput: 0: 768.7. Samples: 413680. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
364
+ [2023-02-22 19:49:45,359][06183] Avg episode reward: [(0, '4.448')]
365
+ [2023-02-22 19:49:50,330][06183] Fps is (10 sec: 2867.3, 60 sec: 3140.2, 300 sec: 5512.2). Total num frames: 1671168. Throughput: 0: 718.5. Samples: 417728. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
366
+ [2023-02-22 19:49:50,342][06183] Avg episode reward: [(0, '4.462')]
367
+ [2023-02-22 19:49:54,192][15000] Updated weights for policy 0, policy_version 410 (0.0085)
368
+ [2023-02-22 19:49:55,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2935.5, 300 sec: 5401.2). Total num frames: 1679360. Throughput: 0: 695.7. Samples: 420506. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
369
+ [2023-02-22 19:49:55,351][06183] Avg episode reward: [(0, '4.586')]
370
+ [2023-02-22 19:50:00,329][06183] Fps is (10 sec: 2048.0, 60 sec: 2798.9, 300 sec: 5304.0). Total num frames: 1691648. Throughput: 0: 698.0. Samples: 422088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
371
+ [2023-02-22 19:50:00,351][06183] Avg episode reward: [(0, '4.457')]
372
+ [2023-02-22 19:50:05,329][06183] Fps is (10 sec: 2457.5, 60 sec: 2662.4, 300 sec: 5192.9). Total num frames: 1703936. Throughput: 0: 678.6. Samples: 425612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
373
+ [2023-02-22 19:50:05,337][06183] Avg episode reward: [(0, '4.308')]
374
+ [2023-02-22 19:50:10,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2662.4, 300 sec: 5081.8). Total num frames: 1712128. Throughput: 0: 645.4. Samples: 429158. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
375
+ [2023-02-22 19:50:10,336][06183] Avg episode reward: [(0, '4.364')]
376
+ [2023-02-22 19:50:10,860][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000419_1716224.pth...
377
+ [2023-02-22 19:50:11,765][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000160_655360.pth
378
+ [2023-02-22 19:50:12,172][15000] Updated weights for policy 0, policy_version 420 (0.0067)
379
+ [2023-02-22 19:50:15,329][06183] Fps is (10 sec: 2457.7, 60 sec: 2730.7, 300 sec: 4984.6). Total num frames: 1728512. Throughput: 0: 632.7. Samples: 431000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
380
+ [2023-02-22 19:50:15,364][06183] Avg episode reward: [(0, '4.379')]
381
+ [2023-02-22 19:50:20,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2662.4, 300 sec: 4887.4). Total num frames: 1740800. Throughput: 0: 619.5. Samples: 435048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
382
+ [2023-02-22 19:50:20,341][06183] Avg episode reward: [(0, '4.567')]
383
+ [2023-02-22 19:50:25,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2662.4, 300 sec: 4804.1). Total num frames: 1757184. Throughput: 0: 630.9. Samples: 439958. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
384
+ [2023-02-22 19:50:25,343][06183] Avg episode reward: [(0, '4.502')]
385
+ [2023-02-22 19:50:25,673][15000] Updated weights for policy 0, policy_version 430 (0.0074)
386
+ [2023-02-22 19:50:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2662.4, 300 sec: 4720.8). Total num frames: 1773568. Throughput: 0: 635.1. Samples: 442260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
387
+ [2023-02-22 19:50:30,395][06183] Avg episode reward: [(0, '4.370')]
388
+ [2023-02-22 19:50:35,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2594.2, 300 sec: 4623.6). Total num frames: 1785856. Throughput: 0: 630.1. Samples: 446080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
389
+ [2023-02-22 19:50:35,342][06183] Avg episode reward: [(0, '4.403')]
390
+ [2023-02-22 19:50:40,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2525.9, 300 sec: 4512.5). Total num frames: 1794048. Throughput: 0: 641.6. Samples: 449380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
391
+ [2023-02-22 19:50:40,377][06183] Avg episode reward: [(0, '4.452')]
392
+ [2023-02-22 19:50:42,687][15000] Updated weights for policy 0, policy_version 440 (0.0068)
393
+ [2023-02-22 19:50:45,329][06183] Fps is (10 sec: 2047.9, 60 sec: 2457.6, 300 sec: 4429.2). Total num frames: 1806336. Throughput: 0: 647.2. Samples: 451214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
394
+ [2023-02-22 19:50:45,345][06183] Avg episode reward: [(0, '4.428')]
395
+ [2023-02-22 19:50:50,329][06183] Fps is (10 sec: 2457.5, 60 sec: 2457.6, 300 sec: 4332.0). Total num frames: 1818624. Throughput: 0: 649.9. Samples: 454858. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
396
+ [2023-02-22 19:50:50,341][06183] Avg episode reward: [(0, '4.342')]
397
+ [2023-02-22 19:50:55,329][06183] Fps is (10 sec: 2457.7, 60 sec: 2525.9, 300 sec: 4248.7). Total num frames: 1830912. Throughput: 0: 657.8. Samples: 458758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
398
+ [2023-02-22 19:50:55,341][06183] Avg episode reward: [(0, '4.225')]
399
+ [2023-02-22 19:50:57,894][15000] Updated weights for policy 0, policy_version 450 (0.0094)
400
+ [2023-02-22 19:51:00,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2594.1, 300 sec: 4179.3). Total num frames: 1847296. Throughput: 0: 668.9. Samples: 461102. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
401
+ [2023-02-22 19:51:00,364][06183] Avg episode reward: [(0, '4.264')]
402
+ [2023-02-22 19:51:05,330][06183] Fps is (10 sec: 2867.1, 60 sec: 2594.1, 300 sec: 4082.1). Total num frames: 1859584. Throughput: 0: 658.2. Samples: 464666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
403
+ [2023-02-22 19:51:05,347][06183] Avg episode reward: [(0, '4.306')]
404
+ [2023-02-22 19:51:10,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2798.9, 300 sec: 4026.6). Total num frames: 1880064. Throughput: 0: 672.2. Samples: 470206. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
405
+ [2023-02-22 19:51:10,335][06183] Avg episode reward: [(0, '4.395')]
406
+ [2023-02-22 19:51:10,967][15000] Updated weights for policy 0, policy_version 460 (0.0063)
407
+ [2023-02-22 19:51:15,329][06183] Fps is (10 sec: 3686.5, 60 sec: 2798.9, 300 sec: 3943.3). Total num frames: 1896448. Throughput: 0: 689.0. Samples: 473264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
408
+ [2023-02-22 19:51:15,336][06183] Avg episode reward: [(0, '4.431')]
409
+ [2023-02-22 19:51:20,329][06183] Fps is (10 sec: 3276.9, 60 sec: 2867.2, 300 sec: 3887.7). Total num frames: 1912832. Throughput: 0: 702.5. Samples: 477694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
410
+ [2023-02-22 19:51:20,337][06183] Avg episode reward: [(0, '4.327')]
411
+ [2023-02-22 19:51:22,882][15000] Updated weights for policy 0, policy_version 470 (0.0076)
412
+ [2023-02-22 19:51:25,329][06183] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 3832.2). Total num frames: 1929216. Throughput: 0: 745.9. Samples: 482946. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
413
+ [2023-02-22 19:51:25,342][06183] Avg episode reward: [(0, '4.400')]
414
+ [2023-02-22 19:51:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2867.2, 300 sec: 3776.6). Total num frames: 1945600. Throughput: 0: 757.2. Samples: 485290. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
415
+ [2023-02-22 19:51:30,339][06183] Avg episode reward: [(0, '4.355')]
416
+ [2023-02-22 19:51:35,188][15000] Updated weights for policy 0, policy_version 480 (0.0058)
417
+ [2023-02-22 19:51:35,328][06183] Fps is (10 sec: 3686.6, 60 sec: 3003.8, 300 sec: 3721.1). Total num frames: 1966080. Throughput: 0: 794.8. Samples: 490622. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
418
+ [2023-02-22 19:51:35,335][06183] Avg episode reward: [(0, '4.469')]
419
+ [2023-02-22 19:51:40,330][06183] Fps is (10 sec: 3686.2, 60 sec: 3140.2, 300 sec: 3679.4). Total num frames: 1982464. Throughput: 0: 831.6. Samples: 496182. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
420
+ [2023-02-22 19:51:40,352][06183] Avg episode reward: [(0, '4.530')]
421
+ [2023-02-22 19:51:45,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3276.8, 300 sec: 3651.7). Total num frames: 2002944. Throughput: 0: 837.7. Samples: 498800. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
422
+ [2023-02-22 19:51:45,337][06183] Avg episode reward: [(0, '4.628')]
423
+ [2023-02-22 19:51:45,347][14984] Saving new best policy, reward=4.628!
424
+ [2023-02-22 19:51:46,467][15000] Updated weights for policy 0, policy_version 490 (0.0040)
425
+ [2023-02-22 19:51:50,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3345.1, 300 sec: 3610.0). Total num frames: 2019328. Throughput: 0: 884.9. Samples: 504488. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
426
+ [2023-02-22 19:51:50,334][06183] Avg episode reward: [(0, '4.772')]
427
+ [2023-02-22 19:51:50,507][14984] Saving new best policy, reward=4.772!
428
+ [2023-02-22 19:51:55,330][06183] Fps is (10 sec: 3686.2, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 2039808. Throughput: 0: 886.4. Samples: 510092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
429
+ [2023-02-22 19:51:55,337][06183] Avg episode reward: [(0, '4.618')]
430
+ [2023-02-22 19:51:57,362][15000] Updated weights for policy 0, policy_version 500 (0.0057)
431
+ [2023-02-22 19:52:00,330][06183] Fps is (10 sec: 3686.0, 60 sec: 3481.5, 300 sec: 3540.6). Total num frames: 2056192. Throughput: 0: 879.3. Samples: 512832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
432
+ [2023-02-22 19:52:00,348][06183] Avg episode reward: [(0, '4.444')]
433
+ [2023-02-22 19:52:05,330][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3485.1). Total num frames: 2072576. Throughput: 0: 892.3. Samples: 517850. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
434
+ [2023-02-22 19:52:05,337][06183] Avg episode reward: [(0, '4.425')]
435
+ [2023-02-22 19:52:08,766][15000] Updated weights for policy 0, policy_version 510 (0.0052)
436
+ [2023-02-22 19:52:10,329][06183] Fps is (10 sec: 3686.8, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2093056. Throughput: 0: 905.5. Samples: 523694. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
437
+ [2023-02-22 19:52:10,336][06183] Avg episode reward: [(0, '4.350')]
438
+ [2023-02-22 19:52:10,415][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth...
439
+ [2023-02-22 19:52:11,271][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000320_1310720.pth
440
+ [2023-02-22 19:52:15,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2109440. Throughput: 0: 914.8. Samples: 526456. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
441
+ [2023-02-22 19:52:15,345][06183] Avg episode reward: [(0, '4.429')]
442
+ [2023-02-22 19:52:20,329][06183] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 2125824. Throughput: 0: 907.8. Samples: 531472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
443
+ [2023-02-22 19:52:20,334][06183] Avg episode reward: [(0, '4.466')]
444
+ [2023-02-22 19:52:20,385][15000] Updated weights for policy 0, policy_version 520 (0.0075)
445
+ [2023-02-22 19:52:25,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3443.4). Total num frames: 2146304. Throughput: 0: 905.6. Samples: 536934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
446
+ [2023-02-22 19:52:25,339][06183] Avg episode reward: [(0, '4.353')]
447
+ [2023-02-22 19:52:30,330][06183] Fps is (10 sec: 3686.0, 60 sec: 3618.1, 300 sec: 3429.5). Total num frames: 2162688. Throughput: 0: 904.3. Samples: 539496. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
448
+ [2023-02-22 19:52:30,337][06183] Avg episode reward: [(0, '4.417')]
449
+ [2023-02-22 19:52:31,552][15000] Updated weights for policy 0, policy_version 530 (0.0052)
450
+ [2023-02-22 19:52:35,330][06183] Fps is (10 sec: 3686.1, 60 sec: 3618.0, 300 sec: 3415.6). Total num frames: 2183168. Throughput: 0: 902.2. Samples: 545088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
451
+ [2023-02-22 19:52:35,340][06183] Avg episode reward: [(0, '4.525')]
452
+ [2023-02-22 19:52:40,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3618.2, 300 sec: 3401.8). Total num frames: 2199552. Throughput: 0: 897.8. Samples: 550492. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
453
+ [2023-02-22 19:52:40,338][06183] Avg episode reward: [(0, '4.405')]
454
+ [2023-02-22 19:52:43,172][15000] Updated weights for policy 0, policy_version 540 (0.0057)
455
+ [2023-02-22 19:52:45,329][06183] Fps is (10 sec: 3277.1, 60 sec: 3549.9, 300 sec: 3387.9). Total num frames: 2215936. Throughput: 0: 894.9. Samples: 553102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
456
+ [2023-02-22 19:52:45,341][06183] Avg episode reward: [(0, '4.299')]
457
+ [2023-02-22 19:52:50,334][06183] Fps is (10 sec: 3684.6, 60 sec: 3617.8, 300 sec: 3387.8). Total num frames: 2236416. Throughput: 0: 904.6. Samples: 558560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
458
+ [2023-02-22 19:52:50,417][06183] Avg episode reward: [(0, '4.354')]
459
+ [2023-02-22 19:52:55,198][15000] Updated weights for policy 0, policy_version 550 (0.0059)
460
+ [2023-02-22 19:52:55,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3346.2). Total num frames: 2252800. Throughput: 0: 877.9. Samples: 563198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
461
+ [2023-02-22 19:52:55,334][06183] Avg episode reward: [(0, '4.544')]
462
+ [2023-02-22 19:53:00,330][06183] Fps is (10 sec: 3278.2, 60 sec: 3549.9, 300 sec: 3318.4). Total num frames: 2269184. Throughput: 0: 875.5. Samples: 565856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
463
+ [2023-02-22 19:53:00,338][06183] Avg episode reward: [(0, '4.607')]
464
+ [2023-02-22 19:53:05,329][06183] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3304.6). Total num frames: 2285568. Throughput: 0: 883.9. Samples: 571248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
465
+ [2023-02-22 19:53:05,336][06183] Avg episode reward: [(0, '4.433')]
466
+ [2023-02-22 19:53:06,783][15000] Updated weights for policy 0, policy_version 560 (0.0075)
467
+ [2023-02-22 19:53:10,329][06183] Fps is (10 sec: 3686.9, 60 sec: 3549.9, 300 sec: 3290.7). Total num frames: 2306048. Throughput: 0: 882.0. Samples: 576624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
468
+ [2023-02-22 19:53:10,337][06183] Avg episode reward: [(0, '4.360')]
469
+ [2023-02-22 19:53:15,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 2322432. Throughput: 0: 885.4. Samples: 579336. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
470
+ [2023-02-22 19:53:15,338][06183] Avg episode reward: [(0, '4.393')]
471
+ [2023-02-22 19:53:17,881][15000] Updated weights for policy 0, policy_version 570 (0.0045)
472
+ [2023-02-22 19:53:20,329][06183] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 2342912. Throughput: 0: 883.7. Samples: 584852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
473
+ [2023-02-22 19:53:20,335][06183] Avg episode reward: [(0, '4.461')]
474
+ [2023-02-22 19:53:25,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3262.9). Total num frames: 2359296. Throughput: 0: 881.6. Samples: 590162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
475
+ [2023-02-22 19:53:25,338][06183] Avg episode reward: [(0, '4.526')]
476
+ [2023-02-22 19:53:29,477][15000] Updated weights for policy 0, policy_version 580 (0.0044)
477
+ [2023-02-22 19:53:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3235.1). Total num frames: 2375680. Throughput: 0: 883.1. Samples: 592842. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
478
+ [2023-02-22 19:53:30,340][06183] Avg episode reward: [(0, '4.525')]
479
+ [2023-02-22 19:53:35,329][06183] Fps is (10 sec: 3686.2, 60 sec: 3549.9, 300 sec: 3235.1). Total num frames: 2396160. Throughput: 0: 882.5. Samples: 598268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
480
+ [2023-02-22 19:53:35,337][06183] Avg episode reward: [(0, '4.468')]
481
+ [2023-02-22 19:53:40,329][06183] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3221.3). Total num frames: 2412544. Throughput: 0: 903.2. Samples: 603844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
482
+ [2023-02-22 19:53:40,336][06183] Avg episode reward: [(0, '4.403')]
483
+ [2023-02-22 19:53:40,527][15000] Updated weights for policy 0, policy_version 590 (0.0061)
484
+ [2023-02-22 19:53:45,329][06183] Fps is (10 sec: 3686.6, 60 sec: 3618.1, 300 sec: 3221.3). Total num frames: 2433024. Throughput: 0: 906.1. Samples: 606630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
485
+ [2023-02-22 19:53:45,345][06183] Avg episode reward: [(0, '4.392')]
486
+ [2023-02-22 19:53:50,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3550.2, 300 sec: 3207.4). Total num frames: 2449408. Throughput: 0: 912.9. Samples: 612328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
487
+ [2023-02-22 19:53:50,337][06183] Avg episode reward: [(0, '4.578')]
488
+ [2023-02-22 19:53:52,123][15000] Updated weights for policy 0, policy_version 600 (0.0062)
489
+ [2023-02-22 19:53:55,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3193.5). Total num frames: 2465792. Throughput: 0: 889.9. Samples: 616670. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
490
+ [2023-02-22 19:53:55,344][06183] Avg episode reward: [(0, '4.591')]
491
+ [2023-02-22 19:54:00,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3179.6). Total num frames: 2482176. Throughput: 0: 880.1. Samples: 618942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
492
+ [2023-02-22 19:54:00,345][06183] Avg episode reward: [(0, '4.686')]
493
+ [2023-02-22 19:54:05,329][06183] Fps is (10 sec: 2457.5, 60 sec: 3413.3, 300 sec: 3179.6). Total num frames: 2490368. Throughput: 0: 846.7. Samples: 622956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
494
+ [2023-02-22 19:54:05,342][06183] Avg episode reward: [(0, '4.477')]
495
+ [2023-02-22 19:54:07,315][15000] Updated weights for policy 0, policy_version 610 (0.0059)
496
+ [2023-02-22 19:54:10,328][06183] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3235.2). Total num frames: 2519040. Throughput: 0: 876.1. Samples: 629588. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
497
+ [2023-02-22 19:54:10,330][06183] Avg episode reward: [(0, '4.442')]
498
+ [2023-02-22 19:54:10,342][06183] Components not started: RolloutWorker_w4, wait_time=600.1 seconds
499
+ [2023-02-22 19:54:10,553][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth...
500
+ [2023-02-22 19:54:10,749][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000419_1716224.pth
501
+ [2023-02-22 19:54:12,412][15000] Updated weights for policy 0, policy_version 620 (0.0013)
502
+ [2023-02-22 19:54:15,328][06183] Fps is (10 sec: 7373.7, 60 sec: 4027.8, 300 sec: 3332.3). Total num frames: 2564096. Throughput: 0: 965.9. Samples: 636308. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
503
+ [2023-02-22 19:54:15,330][06183] Avg episode reward: [(0, '4.427')]
504
+ [2023-02-22 19:54:17,302][15000] Updated weights for policy 0, policy_version 630 (0.0011)
505
+ [2023-02-22 19:54:20,328][06183] Fps is (10 sec: 8601.6, 60 sec: 4369.1, 300 sec: 3415.7). Total num frames: 2605056. Throughput: 0: 1122.4. Samples: 648774. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
506
+ [2023-02-22 19:54:20,330][06183] Avg episode reward: [(0, '4.489')]
507
+ [2023-02-22 19:54:22,046][15000] Updated weights for policy 0, policy_version 640 (0.0014)
508
+ [2023-02-22 19:54:25,328][06183] Fps is (10 sec: 8191.8, 60 sec: 4778.7, 300 sec: 3499.0). Total num frames: 2646016. Throughput: 0: 1274.0. Samples: 661174. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
509
+ [2023-02-22 19:54:25,332][06183] Avg episode reward: [(0, '4.348')]
510
+ [2023-02-22 19:54:27,020][15000] Updated weights for policy 0, policy_version 650 (0.0015)
511
+ [2023-02-22 19:54:30,328][06183] Fps is (10 sec: 8192.0, 60 sec: 5188.3, 300 sec: 3582.3). Total num frames: 2686976. Throughput: 0: 1352.3. Samples: 667484. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
512
+ [2023-02-22 19:54:30,332][06183] Avg episode reward: [(0, '4.599')]
513
+ [2023-02-22 19:54:32,143][15000] Updated weights for policy 0, policy_version 660 (0.0013)
514
+ [2023-02-22 19:54:35,328][06183] Fps is (10 sec: 8192.2, 60 sec: 5529.7, 300 sec: 3679.5). Total num frames: 2727936. Throughput: 0: 1505.4. Samples: 680070. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
515
+ [2023-02-22 19:54:35,331][06183] Avg episode reward: [(0, '4.600')]
516
+ [2023-02-22 19:54:36,880][15000] Updated weights for policy 0, policy_version 670 (0.0015)
517
+ [2023-02-22 19:54:40,328][06183] Fps is (10 sec: 8192.1, 60 sec: 5939.3, 300 sec: 3762.8). Total num frames: 2768896. Throughput: 0: 1682.3. Samples: 692374. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
518
+ [2023-02-22 19:54:40,330][06183] Avg episode reward: [(0, '4.375')]
519
+ [2023-02-22 19:54:41,917][15000] Updated weights for policy 0, policy_version 680 (0.0014)
520
+ [2023-02-22 19:54:45,328][06183] Fps is (10 sec: 8191.8, 60 sec: 6280.6, 300 sec: 3860.0). Total num frames: 2809856. Throughput: 0: 1771.9. Samples: 698674. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
521
+ [2023-02-22 19:54:45,335][06183] Avg episode reward: [(0, '4.540')]
522
+ [2023-02-22 19:54:47,030][15000] Updated weights for policy 0, policy_version 690 (0.0017)
523
+ [2023-02-22 19:54:50,329][06183] Fps is (10 sec: 8191.7, 60 sec: 6690.2, 300 sec: 3971.0). Total num frames: 2850816. Throughput: 0: 1943.1. Samples: 710392. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
524
+ [2023-02-22 19:54:50,332][06183] Avg episode reward: [(0, '4.672')]
525
+ [2023-02-22 19:54:52,306][15000] Updated weights for policy 0, policy_version 700 (0.0014)
526
+ [2023-02-22 19:54:55,328][06183] Fps is (10 sec: 8192.0, 60 sec: 7099.8, 300 sec: 4068.2). Total num frames: 2891776. Throughput: 0: 2059.7. Samples: 722274. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
527
+ [2023-02-22 19:54:55,331][06183] Avg episode reward: [(0, '4.302')]
528
+ [2023-02-22 19:54:57,507][15000] Updated weights for policy 0, policy_version 710 (0.0016)
529
+ [2023-02-22 19:55:00,329][06183] Fps is (10 sec: 7782.1, 60 sec: 7441.1, 300 sec: 4151.5). Total num frames: 2928640. Throughput: 0: 2040.9. Samples: 728152. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
530
+ [2023-02-22 19:55:00,332][06183] Avg episode reward: [(0, '4.465')]
531
+ [2023-02-22 19:55:02,833][15000] Updated weights for policy 0, policy_version 720 (0.0017)
532
+ [2023-02-22 19:55:05,328][06183] Fps is (10 sec: 7372.9, 60 sec: 7919.1, 300 sec: 4248.7). Total num frames: 2965504. Throughput: 0: 2017.5. Samples: 739562. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
533
+ [2023-02-22 19:55:05,332][06183] Avg episode reward: [(0, '4.500')]
534
+ [2023-02-22 19:55:08,366][15000] Updated weights for policy 0, policy_version 730 (0.0022)
535
+ [2023-02-22 19:55:10,328][06183] Fps is (10 sec: 7373.3, 60 sec: 8055.5, 300 sec: 4318.2). Total num frames: 3002368. Throughput: 0: 1981.9. Samples: 750360. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
536
+ [2023-02-22 19:55:10,332][06183] Avg episode reward: [(0, '4.632')]
537
+ [2023-02-22 19:55:13,945][15000] Updated weights for policy 0, policy_version 740 (0.0016)
538
+ [2023-02-22 19:55:15,328][06183] Fps is (10 sec: 7372.8, 60 sec: 7918.9, 300 sec: 4401.5). Total num frames: 3039232. Throughput: 0: 1972.0. Samples: 756222. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
539
+ [2023-02-22 19:55:15,331][06183] Avg episode reward: [(0, '4.527')]
540
+ [2023-02-22 19:55:19,232][15000] Updated weights for policy 0, policy_version 750 (0.0015)
541
+ [2023-02-22 19:55:20,328][06183] Fps is (10 sec: 7782.3, 60 sec: 7918.9, 300 sec: 4484.8). Total num frames: 3080192. Throughput: 0: 1945.2. Samples: 767604. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
542
+ [2023-02-22 19:55:20,332][06183] Avg episode reward: [(0, '4.350')]
543
+ [2023-02-22 19:55:25,240][15000] Updated weights for policy 0, policy_version 760 (0.0023)
544
+ [2023-02-22 19:55:25,329][06183] Fps is (10 sec: 7372.6, 60 sec: 7782.4, 300 sec: 4540.3). Total num frames: 3112960. Throughput: 0: 1905.4. Samples: 778120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
545
+ [2023-02-22 19:55:25,331][06183] Avg episode reward: [(0, '4.305')]
546
+ [2023-02-22 19:55:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7645.8, 300 sec: 4609.7). Total num frames: 3145728. Throughput: 0: 1884.2. Samples: 783462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
547
+ [2023-02-22 19:55:30,332][06183] Avg episode reward: [(0, '4.478')]
548
+ [2023-02-22 19:55:30,929][15000] Updated weights for policy 0, policy_version 770 (0.0016)
549
+ [2023-02-22 19:55:35,328][06183] Fps is (10 sec: 6963.3, 60 sec: 7577.6, 300 sec: 4706.9). Total num frames: 3182592. Throughput: 0: 1848.1. Samples: 793558. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
550
+ [2023-02-22 19:55:35,331][06183] Avg episode reward: [(0, '4.278')]
551
+ [2023-02-22 19:55:36,915][15000] Updated weights for policy 0, policy_version 780 (0.0023)
552
+ [2023-02-22 19:55:40,328][06183] Fps is (10 sec: 7372.9, 60 sec: 7509.3, 300 sec: 4790.3). Total num frames: 3219456. Throughput: 0: 1828.2. Samples: 804542. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
553
+ [2023-02-22 19:55:40,331][06183] Avg episode reward: [(0, '4.435')]
554
+ [2023-02-22 19:55:42,396][15000] Updated weights for policy 0, policy_version 790 (0.0015)
555
+ [2023-02-22 19:55:45,328][06183] Fps is (10 sec: 7373.0, 60 sec: 7441.1, 300 sec: 4873.6). Total num frames: 3256320. Throughput: 0: 1819.0. Samples: 810008. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
556
+ [2023-02-22 19:55:45,332][06183] Avg episode reward: [(0, '4.464')]
557
+ [2023-02-22 19:55:48,358][15000] Updated weights for policy 0, policy_version 800 (0.0015)
558
+ [2023-02-22 19:55:50,328][06183] Fps is (10 sec: 6963.1, 60 sec: 7304.6, 300 sec: 4943.0). Total num frames: 3289088. Throughput: 0: 1798.2. Samples: 820480. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
559
+ [2023-02-22 19:55:50,333][06183] Avg episode reward: [(0, '4.343')]
560
+ [2023-02-22 19:55:54,485][15000] Updated weights for policy 0, policy_version 810 (0.0015)
561
+ [2023-02-22 19:55:55,328][06183] Fps is (10 sec: 6553.5, 60 sec: 7168.0, 300 sec: 4998.5). Total num frames: 3321856. Throughput: 0: 1776.1. Samples: 830284. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
562
+ [2023-02-22 19:55:55,331][06183] Avg episode reward: [(0, '4.378')]
563
+ [2023-02-22 19:56:00,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7099.8, 300 sec: 5067.9). Total num frames: 3354624. Throughput: 0: 1762.8. Samples: 835550. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
564
+ [2023-02-22 19:56:00,333][06183] Avg episode reward: [(0, '4.323')]
565
+ [2023-02-22 19:56:00,418][15000] Updated weights for policy 0, policy_version 820 (0.0019)
566
+ [2023-02-22 19:56:05,329][06183] Fps is (10 sec: 6553.3, 60 sec: 7031.4, 300 sec: 5109.6). Total num frames: 3387392. Throughput: 0: 1734.9. Samples: 845674. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
567
+ [2023-02-22 19:56:05,331][06183] Avg episode reward: [(0, '4.397')]
568
+ [2023-02-22 19:56:06,625][15000] Updated weights for policy 0, policy_version 830 (0.0022)
569
+ [2023-02-22 19:56:10,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 5165.1). Total num frames: 3420160. Throughput: 0: 1722.4. Samples: 855626. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
570
+ [2023-02-22 19:56:10,332][06183] Avg episode reward: [(0, '4.600')]
571
+ [2023-02-22 19:56:10,485][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000836_3424256.pth...
572
+ [2023-02-22 19:56:10,930][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth
573
+ [2023-02-22 19:56:12,963][15000] Updated weights for policy 0, policy_version 840 (0.0019)
574
+ [2023-02-22 19:56:15,328][06183] Fps is (10 sec: 6963.5, 60 sec: 6963.2, 300 sec: 5234.6). Total num frames: 3457024. Throughput: 0: 1707.0. Samples: 860276. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
575
+ [2023-02-22 19:56:15,332][06183] Avg episode reward: [(0, '4.545')]
576
+ [2023-02-22 19:56:19,252][15000] Updated weights for policy 0, policy_version 850 (0.0022)
577
+ [2023-02-22 19:56:20,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6758.4, 300 sec: 5276.2). Total num frames: 3485696. Throughput: 0: 1700.8. Samples: 870094. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
578
+ [2023-02-22 19:56:20,331][06183] Avg episode reward: [(0, '4.351')]
579
+ [2023-02-22 19:56:25,328][06183] Fps is (10 sec: 6143.9, 60 sec: 6758.4, 300 sec: 5331.8). Total num frames: 3518464. Throughput: 0: 1677.1. Samples: 880014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
580
+ [2023-02-22 19:56:25,332][06183] Avg episode reward: [(0, '4.440')]
581
+ [2023-02-22 19:56:25,388][15000] Updated weights for policy 0, policy_version 860 (0.0021)
582
+ [2023-02-22 19:56:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6758.4, 300 sec: 5373.4). Total num frames: 3551232. Throughput: 0: 1667.3. Samples: 885036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
583
+ [2023-02-22 19:56:30,332][06183] Avg episode reward: [(0, '4.578')]
584
+ [2023-02-22 19:56:31,523][15000] Updated weights for policy 0, policy_version 870 (0.0021)
585
+ [2023-02-22 19:56:35,328][06183] Fps is (10 sec: 6963.2, 60 sec: 6758.4, 300 sec: 5442.8). Total num frames: 3588096. Throughput: 0: 1658.0. Samples: 895088. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
586
+ [2023-02-22 19:56:35,331][06183] Avg episode reward: [(0, '4.614')]
587
+ [2023-02-22 19:56:37,650][15000] Updated weights for policy 0, policy_version 880 (0.0024)
588
+ [2023-02-22 19:56:40,328][06183] Fps is (10 sec: 6963.3, 60 sec: 6690.1, 300 sec: 5484.5). Total num frames: 3620864. Throughput: 0: 1661.6. Samples: 905056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
589
+ [2023-02-22 19:56:40,335][06183] Avg episode reward: [(0, '4.402')]
590
+ [2023-02-22 19:56:43,775][15000] Updated weights for policy 0, policy_version 890 (0.0017)
591
+ [2023-02-22 19:56:45,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6621.8, 300 sec: 5540.0). Total num frames: 3653632. Throughput: 0: 1656.3. Samples: 910082. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
592
+ [2023-02-22 19:56:45,333][06183] Avg episode reward: [(0, '4.616')]
593
+ [2023-02-22 19:56:50,328][06183] Fps is (10 sec: 6553.5, 60 sec: 6621.9, 300 sec: 5581.7). Total num frames: 3686400. Throughput: 0: 1642.3. Samples: 919578. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
594
+ [2023-02-22 19:56:50,332][15000] Updated weights for policy 0, policy_version 900 (0.0030)
595
+ [2023-02-22 19:56:50,332][06183] Avg episode reward: [(0, '4.342')]
596
+ [2023-02-22 19:56:55,329][06183] Fps is (10 sec: 6143.7, 60 sec: 6553.5, 300 sec: 5623.3). Total num frames: 3715072. Throughput: 0: 1636.5. Samples: 929270. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
597
+ [2023-02-22 19:56:55,335][06183] Avg episode reward: [(0, '4.242')]
598
+ [2023-02-22 19:56:56,845][15000] Updated weights for policy 0, policy_version 910 (0.0034)
599
+ [2023-02-22 19:57:00,329][06183] Fps is (10 sec: 5733.8, 60 sec: 6485.2, 300 sec: 5665.0). Total num frames: 3743744. Throughput: 0: 1627.8. Samples: 933528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
600
+ [2023-02-22 19:57:00,333][06183] Avg episode reward: [(0, '4.389')]
601
+ [2023-02-22 19:57:04,308][15000] Updated weights for policy 0, policy_version 920 (0.0029)
602
+ [2023-02-22 19:57:05,329][06183] Fps is (10 sec: 5734.5, 60 sec: 6417.1, 300 sec: 5692.7). Total num frames: 3772416. Throughput: 0: 1594.5. Samples: 941848. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
603
+ [2023-02-22 19:57:05,339][06183] Avg episode reward: [(0, '4.555')]
604
+ [2023-02-22 19:57:10,329][06183] Fps is (10 sec: 5325.2, 60 sec: 6280.5, 300 sec: 5720.5). Total num frames: 3796992. Throughput: 0: 1544.1. Samples: 949498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
605
+ [2023-02-22 19:57:10,333][06183] Avg episode reward: [(0, '4.425')]
606
+ [2023-02-22 19:57:11,788][15000] Updated weights for policy 0, policy_version 930 (0.0033)
607
+ [2023-02-22 19:57:15,328][06183] Fps is (10 sec: 5734.5, 60 sec: 6212.2, 300 sec: 5776.1). Total num frames: 3829760. Throughput: 0: 1535.6. Samples: 954138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
608
+ [2023-02-22 19:57:15,334][06183] Avg episode reward: [(0, '4.353')]
609
+ [2023-02-22 19:57:19,282][15000] Updated weights for policy 0, policy_version 940 (0.0031)
610
+ [2023-02-22 19:57:20,329][06183] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 5790.0). Total num frames: 3854336. Throughput: 0: 1499.3. Samples: 962558. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
611
+ [2023-02-22 19:57:20,333][06183] Avg episode reward: [(0, '4.441')]
612
+ [2023-02-22 19:57:25,329][06183] Fps is (10 sec: 5324.7, 60 sec: 6075.7, 300 sec: 5831.6). Total num frames: 3883008. Throughput: 0: 1461.5. Samples: 970824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
613
+ [2023-02-22 19:57:25,334][06183] Avg episode reward: [(0, '4.174')]
614
+ [2023-02-22 19:57:26,479][15000] Updated weights for policy 0, policy_version 950 (0.0030)
615
+ [2023-02-22 19:57:30,328][06183] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 5859.4). Total num frames: 3911680. Throughput: 0: 1446.0. Samples: 975152. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
616
+ [2023-02-22 19:57:30,333][06183] Avg episode reward: [(0, '4.382')]
617
+ [2023-02-22 19:57:33,771][15000] Updated weights for policy 0, policy_version 960 (0.0027)
618
+ [2023-02-22 19:57:35,329][06183] Fps is (10 sec: 5734.1, 60 sec: 5870.9, 300 sec: 5901.0). Total num frames: 3940352. Throughput: 0: 1420.5. Samples: 983500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
619
+ [2023-02-22 19:57:35,333][06183] Avg episode reward: [(0, '4.299')]
620
+ [2023-02-22 19:57:40,328][06183] Fps is (10 sec: 5734.4, 60 sec: 5802.6, 300 sec: 5942.7). Total num frames: 3969024. Throughput: 0: 1400.8. Samples: 992306. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
621
+ [2023-02-22 19:57:40,334][06183] Avg episode reward: [(0, '4.471')]
622
+ [2023-02-22 19:57:40,870][15000] Updated weights for policy 0, policy_version 970 (0.0023)
623
+ [2023-02-22 19:57:45,329][06183] Fps is (10 sec: 5734.8, 60 sec: 5734.4, 300 sec: 5970.6). Total num frames: 3997696. Throughput: 0: 1392.6. Samples: 996192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
624
+ [2023-02-22 19:57:45,334][06183] Avg episode reward: [(0, '4.346')]
625
+ [2023-02-22 19:57:46,777][14984] Stopping Batcher_0...
626
+ [2023-02-22 19:57:46,781][14984] Loop batcher_evt_loop terminating...
627
+ [2023-02-22 19:57:46,787][06183] Component Batcher_0 stopped!
628
+ [2023-02-22 19:57:46,792][06183] Component RolloutWorker_w4 process died already! Don't wait for it.
629
+ [2023-02-22 19:57:46,795][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
630
+ [2023-02-22 19:57:46,811][15003] Stopping RolloutWorker_w3...
631
+ [2023-02-22 19:57:46,814][15003] Loop rollout_proc3_evt_loop terminating...
632
+ [2023-02-22 19:57:46,812][06183] Component RolloutWorker_w3 stopped!
633
+ [2023-02-22 19:57:46,814][15002] Stopping RolloutWorker_w1...
634
+ [2023-02-22 19:57:46,814][15000] Weights refcount: 2 0
635
+ [2023-02-22 19:57:46,814][15004] Stopping RolloutWorker_w2...
636
+ [2023-02-22 19:57:46,815][15008] Stopping RolloutWorker_w5...
637
+ [2023-02-22 19:57:46,815][15007] Stopping RolloutWorker_w7...
638
+ [2023-02-22 19:57:46,815][15001] Stopping RolloutWorker_w0...
639
+ [2023-02-22 19:57:46,815][15006] Stopping RolloutWorker_w6...
640
+ [2023-02-22 19:57:46,816][15002] Loop rollout_proc1_evt_loop terminating...
641
+ [2023-02-22 19:57:46,817][15004] Loop rollout_proc2_evt_loop terminating...
642
+ [2023-02-22 19:57:46,818][15008] Loop rollout_proc5_evt_loop terminating...
643
+ [2023-02-22 19:57:46,819][15007] Loop rollout_proc7_evt_loop terminating...
644
+ [2023-02-22 19:57:46,819][15001] Loop rollout_proc0_evt_loop terminating...
645
+ [2023-02-22 19:57:46,816][06183] Component RolloutWorker_w1 stopped!
646
+ [2023-02-22 19:57:46,819][15006] Loop rollout_proc6_evt_loop terminating...
647
+ [2023-02-22 19:57:46,822][15000] Stopping InferenceWorker_p0-w0...
648
+ [2023-02-22 19:57:46,822][06183] Component RolloutWorker_w2 stopped!
649
+ [2023-02-22 19:57:46,831][15000] Loop inference_proc0-0_evt_loop terminating...
650
+ [2023-02-22 19:57:46,831][06183] Component RolloutWorker_w0 stopped!
651
+ [2023-02-22 19:57:46,839][06183] Component RolloutWorker_w6 stopped!
652
+ [2023-02-22 19:57:46,843][06183] Component RolloutWorker_w5 stopped!
653
+ [2023-02-22 19:57:46,851][06183] Component RolloutWorker_w7 stopped!
654
+ [2023-02-22 19:57:46,858][06183] Component InferenceWorker_p0-w0 stopped!
655
+ [2023-02-22 19:57:47,288][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth
656
+ [2023-02-22 19:57:47,326][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
657
+ [2023-02-22 19:57:47,774][14984] Stopping LearnerWorker_p0...
658
+ [2023-02-22 19:57:47,776][14984] Loop learner_proc0_evt_loop terminating...
659
+ [2023-02-22 19:57:47,774][06183] Component LearnerWorker_p0 stopped!
660
+ [2023-02-22 19:57:47,780][06183] Waiting for process learner_proc0 to stop...
661
+ [2023-02-22 19:57:51,038][06183] Waiting for process inference_proc0-0 to join...
662
+ [2023-02-22 19:57:51,041][06183] Waiting for process rollout_proc0 to join...
663
+ [2023-02-22 19:57:51,044][06183] Waiting for process rollout_proc1 to join...
664
+ [2023-02-22 19:57:51,048][06183] Waiting for process rollout_proc2 to join...
665
+ [2023-02-22 19:57:51,051][06183] Waiting for process rollout_proc3 to join...
666
+ [2023-02-22 19:57:51,054][06183] Waiting for process rollout_proc4 to join...
667
+ [2023-02-22 19:57:51,057][06183] Waiting for process rollout_proc5 to join...
668
+ [2023-02-22 19:57:51,061][06183] Waiting for process rollout_proc6 to join...
669
+ [2023-02-22 19:57:51,065][06183] Waiting for process rollout_proc7 to join...
670
+ [2023-02-22 19:57:51,070][06183] Batcher 0 profile tree view:
671
+ batching: 25.7881, releasing_batches: 0.0596
672
+ [2023-02-22 19:57:51,073][06183] InferenceWorker_p0-w0 profile tree view:
673
+ wait_policy: 0.0001
674
+ wait_policy_total: 12.6567
675
+ update_model: 11.5028
676
+ weight_update: 0.0038
677
+ one_step: 0.0060
678
+ handle_policy_step: 735.6968
679
+ deserialize: 18.8574, stack: 3.9381, obs_to_device_normalize: 177.8106, forward: 278.2881, send_messages: 53.0078
680
+ prepare_outputs: 178.7753
681
+ to_cpu: 152.6849
682
+ [2023-02-22 19:57:51,076][06183] Learner 0 profile tree view:
683
+ misc: 0.0097, prepare_batch: 78.0888
684
+ train: 160.3988
685
+ epoch_init: 0.0142, minibatch_init: 0.0160, losses_postprocess: 1.1488, kl_divergence: 1.2237, after_optimizer: 84.7016
686
+ calculate_losses: 46.2952
687
+ losses_init: 0.0071, forward_head: 2.9452, bptt_initial: 33.8132, tail: 1.5771, advantages_returns: 0.4771, losses: 3.8496
688
+ bptt: 3.1942
689
+ bptt_forward_core: 3.0460
690
+ update: 26.0565
691
+ clip: 3.4502
692
+ [2023-02-22 19:57:51,080][06183] RolloutWorker_w0 profile tree view:
693
+ wait_for_trajectories: 0.3167, enqueue_policy_requests: 18.2264, env_step: 361.8246, overhead: 28.6136, complete_rollouts: 0.7092
694
+ save_policy_outputs: 22.1311
695
+ split_output_tensors: 10.4875
696
+ [2023-02-22 19:57:51,083][06183] RolloutWorker_w7 profile tree view:
697
+ wait_for_trajectories: 0.3322, enqueue_policy_requests: 18.1051, env_step: 360.2381, overhead: 28.7457, complete_rollouts: 0.6980
698
+ save_policy_outputs: 22.5731
699
+ split_output_tensors: 10.6929
700
+ [2023-02-22 19:57:51,087][06183] Loop Runner_EvtLoop terminating...
701
+ [2023-02-22 19:57:51,090][06183] Runner profile tree view:
702
+ main_loop: 814.7198
703
+ [2023-02-22 19:57:51,093][06183] Collected {0: 4005888}, FPS: 4916.9
704
+ [2023-02-22 20:25:32,536][06183] Loading existing experiment configuration from /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json
705
+ [2023-02-22 20:25:32,541][06183] Overriding arg 'num_workers' with value 1 passed from command line
706
+ [2023-02-22 20:25:32,544][06183] Adding new argument 'no_render'=True that is not in the saved config file!
707
+ [2023-02-22 20:25:32,547][06183] Adding new argument 'save_video'=True that is not in the saved config file!
708
+ [2023-02-22 20:25:32,550][06183] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
709
+ [2023-02-22 20:25:32,552][06183] Adding new argument 'video_name'=None that is not in the saved config file!
710
+ [2023-02-22 20:25:32,553][06183] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
711
+ [2023-02-22 20:25:32,555][06183] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
712
+ [2023-02-22 20:25:32,556][06183] Adding new argument 'push_to_hub'=False that is not in the saved config file!
713
+ [2023-02-22 20:25:32,558][06183] Adding new argument 'hf_repository'=None that is not in the saved config file!
714
+ [2023-02-22 20:25:32,560][06183] Adding new argument 'policy_index'=0 that is not in the saved config file!
715
+ [2023-02-22 20:25:32,562][06183] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
716
+ [2023-02-22 20:25:32,564][06183] Adding new argument 'train_script'=None that is not in the saved config file!
717
+ [2023-02-22 20:25:32,566][06183] Adding new argument 'enjoy_script'=None that is not in the saved config file!
718
+ [2023-02-22 20:25:32,568][06183] Using frameskip 1 and render_action_repeat=4 for evaluation
719
+ [2023-02-22 20:25:32,609][06183] Doom resolution: 160x120, resize resolution: (128, 72)
720
+ [2023-02-22 20:25:32,617][06183] RunningMeanStd input shape: (3, 72, 128)
721
+ [2023-02-22 20:25:32,631][06183] RunningMeanStd input shape: (1,)
722
+ [2023-02-22 20:25:32,717][06183] ConvEncoder: input_channels=3
723
+ [2023-02-22 20:25:33,575][06183] Conv encoder output size: 512
724
+ [2023-02-22 20:25:33,578][06183] Policy head output size: 512
725
+ [2023-02-22 20:25:38,271][06183] Loading state from checkpoint /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
726
+ [2023-02-22 20:25:42,521][06183] Num frames 100...
727
+ [2023-02-22 20:25:42,700][06183] Num frames 200...
728
+ [2023-02-22 20:25:42,879][06183] Num frames 300...
729
+ [2023-02-22 20:25:43,085][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
730
+ [2023-02-22 20:25:43,087][06183] Avg episode reward: 3.840, avg true_objective: 3.840
731
+ [2023-02-22 20:25:43,117][06183] Num frames 400...
732
+ [2023-02-22 20:25:43,298][06183] Num frames 500...
733
+ [2023-02-22 20:25:43,474][06183] Num frames 600...
734
+ [2023-02-22 20:25:43,647][06183] Num frames 700...
735
+ [2023-02-22 20:25:43,836][06183] Num frames 800...
736
+ [2023-02-22 20:25:43,954][06183] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
737
+ [2023-02-22 20:25:43,959][06183] Avg episode reward: 4.660, avg true_objective: 4.160
738
+ [2023-02-22 20:25:44,104][06183] Num frames 900...
739
+ [2023-02-22 20:25:44,289][06183] Num frames 1000...
740
+ [2023-02-22 20:25:44,471][06183] Num frames 1100...
741
+ [2023-02-22 20:25:44,649][06183] Num frames 1200...
742
+ [2023-02-22 20:25:44,742][06183] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
743
+ [2023-02-22 20:25:44,745][06183] Avg episode reward: 4.387, avg true_objective: 4.053
744
+ [2023-02-22 20:25:44,898][06183] Num frames 1300...
745
+ [2023-02-22 20:25:45,081][06183] Num frames 1400...
746
+ [2023-02-22 20:25:45,262][06183] Avg episode rewards: #0: 3.930, true rewards: #0: 3.680
747
+ [2023-02-22 20:25:45,264][06183] Avg episode reward: 3.930, avg true_objective: 3.680
748
+ [2023-02-22 20:25:45,323][06183] Num frames 1500...
749
+ [2023-02-22 20:25:45,500][06183] Num frames 1600...
750
+ [2023-02-22 20:25:45,677][06183] Num frames 1700...
751
+ [2023-02-22 20:25:45,855][06183] Num frames 1800...
752
+ [2023-02-22 20:25:46,035][06183] Num frames 1900...
753
+ [2023-02-22 20:25:46,130][06183] Avg episode rewards: #0: 4.240, true rewards: #0: 3.840
754
+ [2023-02-22 20:25:46,132][06183] Avg episode reward: 4.240, avg true_objective: 3.840
755
+ [2023-02-22 20:25:46,276][06183] Num frames 2000...
756
+ [2023-02-22 20:25:46,450][06183] Num frames 2100...
757
+ [2023-02-22 20:25:46,622][06183] Num frames 2200...
758
+ [2023-02-22 20:25:46,816][06183] Num frames 2300...
759
+ [2023-02-22 20:25:46,881][06183] Avg episode rewards: #0: 4.173, true rewards: #0: 3.840
760
+ [2023-02-22 20:25:46,884][06183] Avg episode reward: 4.173, avg true_objective: 3.840
761
+ [2023-02-22 20:25:47,064][06183] Num frames 2400...
762
+ [2023-02-22 20:25:47,255][06183] Num frames 2500...
763
+ [2023-02-22 20:25:47,435][06183] Num frames 2600...
764
+ [2023-02-22 20:25:47,655][06183] Avg episode rewards: #0: 4.126, true rewards: #0: 3.840
765
+ [2023-02-22 20:25:47,658][06183] Avg episode reward: 4.126, avg true_objective: 3.840
766
+ [2023-02-22 20:25:47,686][06183] Num frames 2700...
767
+ [2023-02-22 20:25:47,876][06183] Num frames 2800...
768
+ [2023-02-22 20:25:48,054][06183] Num frames 2900...
769
+ [2023-02-22 20:25:48,238][06183] Num frames 3000...
770
+ [2023-02-22 20:25:48,411][06183] Num frames 3100...
771
+ [2023-02-22 20:25:48,479][06183] Avg episode rewards: #0: 4.255, true rewards: #0: 3.880
772
+ [2023-02-22 20:25:48,481][06183] Avg episode reward: 4.255, avg true_objective: 3.880
773
+ [2023-02-22 20:25:48,654][06183] Num frames 3200...
774
+ [2023-02-22 20:25:48,826][06183] Num frames 3300...
775
+ [2023-02-22 20:25:49,002][06183] Num frames 3400...
776
+ [2023-02-22 20:25:49,219][06183] Avg episode rewards: #0: 4.209, true rewards: #0: 3.876
777
+ [2023-02-22 20:25:49,221][06183] Avg episode reward: 4.209, avg true_objective: 3.876
778
+ [2023-02-22 20:25:49,271][06183] Num frames 3500...
779
+ [2023-02-22 20:25:49,455][06183] Num frames 3600...
780
+ [2023-02-22 20:25:49,636][06183] Num frames 3700...
781
+ [2023-02-22 20:25:49,820][06183] Num frames 3800...
782
+ [2023-02-22 20:25:49,897][06183] Avg episode rewards: #0: 4.208, true rewards: #0: 3.808
783
+ [2023-02-22 20:25:49,900][06183] Avg episode reward: 4.208, avg true_objective: 3.808
784
+ [2023-02-22 20:25:51,533][06183] Replay video saved to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/replay.mp4!
785
+ [2023-02-22 20:28:07,610][06183] Loading existing experiment configuration from /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json
786
+ [2023-02-22 20:28:07,617][06183] Overriding arg 'num_workers' with value 1 passed from command line
787
+ [2023-02-22 20:28:07,620][06183] Adding new argument 'no_render'=True that is not in the saved config file!
788
+ [2023-02-22 20:28:07,623][06183] Adding new argument 'save_video'=True that is not in the saved config file!
789
+ [2023-02-22 20:28:07,625][06183] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
790
+ [2023-02-22 20:28:07,627][06183] Adding new argument 'video_name'=None that is not in the saved config file!
791
+ [2023-02-22 20:28:07,630][06183] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
792
+ [2023-02-22 20:28:07,632][06183] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
793
+ [2023-02-22 20:28:07,634][06183] Adding new argument 'push_to_hub'=True that is not in the saved config file!
794
+ [2023-02-22 20:28:07,636][06183] Adding new argument 'hf_repository'='chqmatteo/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
795
+ [2023-02-22 20:28:07,638][06183] Adding new argument 'policy_index'=0 that is not in the saved config file!
796
+ [2023-02-22 20:28:07,639][06183] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
797
+ [2023-02-22 20:28:07,640][06183] Adding new argument 'train_script'=None that is not in the saved config file!
798
+ [2023-02-22 20:28:07,642][06183] Adding new argument 'enjoy_script'=None that is not in the saved config file!
799
+ [2023-02-22 20:28:07,643][06183] Using frameskip 1 and render_action_repeat=4 for evaluation
800
+ [2023-02-22 20:28:07,661][06183] RunningMeanStd input shape: (3, 72, 128)
801
+ [2023-02-22 20:28:07,665][06183] RunningMeanStd input shape: (1,)
802
+ [2023-02-22 20:28:07,680][06183] ConvEncoder: input_channels=3
803
+ [2023-02-22 20:28:07,712][06183] Conv encoder output size: 512
804
+ [2023-02-22 20:28:07,714][06183] Policy head output size: 512
805
+ [2023-02-22 20:28:07,762][06183] Loading state from checkpoint /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
806
+ [2023-02-22 20:28:09,216][06183] Num frames 100...
807
+ [2023-02-22 20:28:09,390][06183] Num frames 200...
808
+ [2023-02-22 20:28:09,564][06183] Num frames 300...
809
+ [2023-02-22 20:28:09,775][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
810
+ [2023-02-22 20:28:09,777][06183] Avg episode reward: 3.840, avg true_objective: 3.840
811
+ [2023-02-22 20:28:09,833][06183] Num frames 400...
812
+ [2023-02-22 20:28:10,015][06183] Num frames 500...
813
+ [2023-02-22 20:28:10,196][06183] Num frames 600...
814
+ [2023-02-22 20:28:10,374][06183] Num frames 700...
815
+ [2023-02-22 20:28:10,551][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
816
+ [2023-02-22 20:28:10,555][06183] Avg episode reward: 3.840, avg true_objective: 3.840
817
+ [2023-02-22 20:28:10,614][06183] Num frames 800...
818
+ [2023-02-22 20:28:10,799][06183] Num frames 900...
819
+ [2023-02-22 20:28:10,978][06183] Num frames 1000...
820
+ [2023-02-22 20:28:11,080][06183] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
821
+ [2023-02-22 20:28:11,082][06183] Avg episode reward: 3.413, avg true_objective: 3.413
822
+ [2023-02-22 20:28:11,220][06183] Num frames 1100...
823
+ [2023-02-22 20:28:11,403][06183] Num frames 1200...
824
+ [2023-02-22 20:28:11,589][06183] Num frames 1300...
825
+ [2023-02-22 20:28:11,801][06183] Num frames 1400...
826
+ [2023-02-22 20:28:11,880][06183] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
827
+ [2023-02-22 20:28:11,883][06183] Avg episode reward: 3.520, avg true_objective: 3.520
828
+ [2023-02-22 20:28:12,065][06183] Num frames 1500...
829
+ [2023-02-22 20:28:12,242][06183] Num frames 1600...
830
+ [2023-02-22 20:28:12,420][06183] Num frames 1700...
831
+ [2023-02-22 20:28:12,635][06183] Avg episode rewards: #0: 3.584, true rewards: #0: 3.584
832
+ [2023-02-22 20:28:12,639][06183] Avg episode reward: 3.584, avg true_objective: 3.584
833
+ [2023-02-22 20:28:12,659][06183] Num frames 1800...
834
+ [2023-02-22 20:28:12,838][06183] Num frames 1900...
835
+ [2023-02-22 20:28:13,023][06183] Num frames 2000...
836
+ [2023-02-22 20:28:13,210][06183] Num frames 2100...
837
+ [2023-02-22 20:28:13,401][06183] Num frames 2200...
838
+ [2023-02-22 20:28:13,475][06183] Avg episode rewards: #0: 3.847, true rewards: #0: 3.680
839
+ [2023-02-22 20:28:13,477][06183] Avg episode reward: 3.847, avg true_objective: 3.680
840
+ [2023-02-22 20:28:13,644][06183] Num frames 2300...
841
+ [2023-02-22 20:28:13,814][06183] Num frames 2400...
842
+ [2023-02-22 20:28:13,990][06183] Num frames 2500...
843
+ [2023-02-22 20:28:14,187][06183] Num frames 2600...
844
+ [2023-02-22 20:28:14,364][06183] Num frames 2700...
845
+ [2023-02-22 20:28:14,572][06183] Avg episode rewards: #0: 4.549, true rewards: #0: 3.977
846
+ [2023-02-22 20:28:14,575][06183] Avg episode reward: 4.549, avg true_objective: 3.977
847
+ [2023-02-22 20:28:14,610][06183] Num frames 2800...
848
+ [2023-02-22 20:28:14,800][06183] Num frames 2900...
849
+ [2023-02-22 20:28:14,990][06183] Num frames 3000...
850
+ [2023-02-22 20:28:15,181][06183] Num frames 3100...
851
+ [2023-02-22 20:28:15,363][06183] Num frames 3200...
852
+ [2023-02-22 20:28:15,486][06183] Avg episode rewards: #0: 4.665, true rewards: #0: 4.040
853
+ [2023-02-22 20:28:15,488][06183] Avg episode reward: 4.665, avg true_objective: 4.040
854
+ [2023-02-22 20:28:15,635][06183] Num frames 3300...
855
+ [2023-02-22 20:28:15,828][06183] Num frames 3400...
856
+ [2023-02-22 20:28:16,009][06183] Num frames 3500...
857
+ [2023-02-22 20:28:16,196][06183] Num frames 3600...
858
+ [2023-02-22 20:28:16,399][06183] Avg episode rewards: #0: 4.756, true rewards: #0: 4.089
859
+ [2023-02-22 20:28:16,401][06183] Avg episode reward: 4.756, avg true_objective: 4.089
860
+ [2023-02-22 20:28:16,444][06183] Num frames 3700...
861
+ [2023-02-22 20:28:16,645][06183] Num frames 3800...
862
+ [2023-02-22 20:28:16,836][06183] Num frames 3900...
863
+ [2023-02-22 20:28:16,960][06183] Avg episode rewards: #0: 4.536, true rewards: #0: 3.936
864
+ [2023-02-22 20:28:16,964][06183] Avg episode reward: 4.536, avg true_objective: 3.936
865
+ [2023-02-22 20:28:18,569][06183] Replay video saved to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/replay.mp4!