diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1117 @@ +[2023-02-25 14:10:34,782][00869] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-25 14:10:34,787][00869] Rollout worker 0 uses device cpu +[2023-02-25 14:10:34,791][00869] Rollout worker 1 uses device cpu +[2023-02-25 14:10:34,795][00869] Rollout worker 2 uses device cpu +[2023-02-25 14:10:34,797][00869] Rollout worker 3 uses device cpu +[2023-02-25 14:10:34,798][00869] Rollout worker 4 uses device cpu +[2023-02-25 14:10:34,799][00869] Rollout worker 5 uses device cpu +[2023-02-25 14:10:34,800][00869] Rollout worker 6 uses device cpu +[2023-02-25 14:10:34,801][00869] Rollout worker 7 uses device cpu +[2023-02-25 14:10:35,068][00869] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 14:10:35,071][00869] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-25 14:10:35,125][00869] Starting all processes... +[2023-02-25 14:10:35,127][00869] Starting process learner_proc0 +[2023-02-25 14:10:35,219][00869] Starting all processes... +[2023-02-25 14:10:35,298][00869] Starting process inference_proc0-0 +[2023-02-25 14:10:35,306][00869] Starting process rollout_proc0 +[2023-02-25 14:10:35,306][00869] Starting process rollout_proc1 +[2023-02-25 14:10:35,306][00869] Starting process rollout_proc2 +[2023-02-25 14:10:35,309][00869] Starting process rollout_proc3 +[2023-02-25 14:10:35,309][00869] Starting process rollout_proc4 +[2023-02-25 14:10:35,311][00869] Starting process rollout_proc5 +[2023-02-25 14:10:35,311][00869] Starting process rollout_proc6 +[2023-02-25 14:10:35,311][00869] Starting process rollout_proc7 +[2023-02-25 14:10:45,716][10866] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 14:10:45,720][10866] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-25 14:10:48,473][10884] Worker 3 uses CPU cores [1] +[2023-02-25 14:10:48,869][10882] Worker 1 uses CPU cores [1] +[2023-02-25 14:10:48,875][10887] Worker 6 uses CPU cores [0] +[2023-02-25 14:10:49,038][10866] Num visible devices: 1 +[2023-02-25 14:10:49,067][10866] Starting seed is not provided +[2023-02-25 14:10:49,068][10866] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 14:10:49,069][10866] Initializing actor-critic model on device cuda:0 +[2023-02-25 14:10:49,070][10866] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 14:10:49,073][10866] RunningMeanStd input shape: (1,) +[2023-02-25 14:10:49,109][10886] Worker 5 uses CPU cores [1] +[2023-02-25 14:10:49,116][10883] Worker 2 uses CPU cores [0] +[2023-02-25 14:10:49,165][10881] Worker 0 uses CPU cores [0] +[2023-02-25 14:10:49,197][10866] ConvEncoder: input_channels=3 +[2023-02-25 14:10:49,283][10880] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 14:10:49,285][10880] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-25 14:10:49,294][10885] Worker 4 uses CPU cores [0] +[2023-02-25 14:10:49,322][10888] Worker 7 uses CPU cores [1] +[2023-02-25 14:10:49,326][10880] Num visible devices: 1 +[2023-02-25 14:10:49,760][10866] Conv encoder output size: 512 +[2023-02-25 14:10:49,761][10866] Policy head output size: 512 +[2023-02-25 14:10:49,834][10866] Created Actor Critic model with architecture: +[2023-02-25 14:10:49,835][10866] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-25 14:10:55,058][00869] Heartbeat connected on Batcher_0 +[2023-02-25 14:10:55,069][00869] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-25 14:10:55,079][00869] Heartbeat connected on RolloutWorker_w0 +[2023-02-25 14:10:55,083][00869] Heartbeat connected on RolloutWorker_w1 +[2023-02-25 14:10:55,088][00869] Heartbeat connected on RolloutWorker_w2 +[2023-02-25 14:10:55,092][00869] Heartbeat connected on RolloutWorker_w3 +[2023-02-25 14:10:55,096][00869] Heartbeat connected on RolloutWorker_w4 +[2023-02-25 14:10:55,104][00869] Heartbeat connected on RolloutWorker_w5 +[2023-02-25 14:10:55,118][00869] Heartbeat connected on RolloutWorker_w6 +[2023-02-25 14:10:55,124][00869] Heartbeat connected on RolloutWorker_w7 +[2023-02-25 14:10:57,017][10866] Using optimizer +[2023-02-25 14:10:57,018][10866] No checkpoints found +[2023-02-25 14:10:57,018][10866] Did not load from checkpoint, starting from scratch! +[2023-02-25 14:10:57,019][10866] Initialized policy 0 weights for model version 0 +[2023-02-25 14:10:57,021][10866] LearnerWorker_p0 finished initialization! +[2023-02-25 14:10:57,025][10866] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 14:10:57,022][00869] Heartbeat connected on LearnerWorker_p0 +[2023-02-25 14:10:57,224][10880] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 14:10:57,226][10880] RunningMeanStd input shape: (1,) +[2023-02-25 14:10:57,240][10880] ConvEncoder: input_channels=3 +[2023-02-25 14:10:57,337][10880] Conv encoder output size: 512 +[2023-02-25 14:10:57,338][10880] Policy head output size: 512 +[2023-02-25 14:10:57,797][00869] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 14:10:59,581][00869] Inference worker 0-0 is ready! +[2023-02-25 14:10:59,583][00869] All inference workers are ready! Signal rollout workers to start! +[2023-02-25 14:10:59,699][10887] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,715][10881] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,723][10883] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,736][10884] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,735][10888] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,747][10882] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,753][10885] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:10:59,764][10886] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:11:00,667][10884] Decorrelating experience for 0 frames... +[2023-02-25 14:11:00,669][10886] Decorrelating experience for 0 frames... +[2023-02-25 14:11:01,537][10887] Decorrelating experience for 0 frames... +[2023-02-25 14:11:01,550][10883] Decorrelating experience for 0 frames... +[2023-02-25 14:11:01,568][10881] Decorrelating experience for 0 frames... +[2023-02-25 14:11:01,593][10885] Decorrelating experience for 0 frames... +[2023-02-25 14:11:01,668][10884] Decorrelating experience for 32 frames... +[2023-02-25 14:11:02,654][10885] Decorrelating experience for 32 frames... +[2023-02-25 14:11:02,749][10883] Decorrelating experience for 32 frames... +[2023-02-25 14:11:02,797][00869] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 14:11:03,188][10882] Decorrelating experience for 0 frames... +[2023-02-25 14:11:03,301][10888] Decorrelating experience for 0 frames... +[2023-02-25 14:11:03,428][10886] Decorrelating experience for 32 frames... +[2023-02-25 14:11:03,663][10884] Decorrelating experience for 64 frames... +[2023-02-25 14:11:04,748][10883] Decorrelating experience for 64 frames... +[2023-02-25 14:11:05,009][10882] Decorrelating experience for 32 frames... +[2023-02-25 14:11:05,007][10887] Decorrelating experience for 32 frames... +[2023-02-25 14:11:05,146][10888] Decorrelating experience for 32 frames... +[2023-02-25 14:11:05,499][10885] Decorrelating experience for 64 frames... +[2023-02-25 14:11:05,515][10886] Decorrelating experience for 64 frames... +[2023-02-25 14:11:05,656][10884] Decorrelating experience for 96 frames... +[2023-02-25 14:11:06,613][10882] Decorrelating experience for 64 frames... +[2023-02-25 14:11:06,750][10888] Decorrelating experience for 64 frames... +[2023-02-25 14:11:07,378][10881] Decorrelating experience for 32 frames... +[2023-02-25 14:11:07,485][10882] Decorrelating experience for 96 frames... +[2023-02-25 14:11:07,801][00869] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 14:11:07,878][10886] Decorrelating experience for 96 frames... +[2023-02-25 14:11:08,120][10887] Decorrelating experience for 64 frames... +[2023-02-25 14:11:08,302][10888] Decorrelating experience for 96 frames... +[2023-02-25 14:11:08,307][10883] Decorrelating experience for 96 frames... +[2023-02-25 14:11:08,444][10885] Decorrelating experience for 96 frames... +[2023-02-25 14:11:08,961][10881] Decorrelating experience for 64 frames... +[2023-02-25 14:11:09,065][10887] Decorrelating experience for 96 frames... +[2023-02-25 14:11:09,394][10881] Decorrelating experience for 96 frames... +[2023-02-25 14:11:12,605][10866] Signal inference workers to stop experience collection... +[2023-02-25 14:11:12,613][10880] InferenceWorker_p0-w0: stopping experience collection +[2023-02-25 14:11:12,797][00869] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 105.9. Samples: 1588. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 14:11:12,799][00869] Avg episode reward: [(0, '1.901')] +[2023-02-25 14:11:14,944][10866] Signal inference workers to resume experience collection... +[2023-02-25 14:11:14,945][10880] InferenceWorker_p0-w0: resuming experience collection +[2023-02-25 14:11:17,797][00869] Fps is (10 sec: 1639.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 16384. Throughput: 0: 222.5. Samples: 4450. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:11:17,802][00869] Avg episode reward: [(0, '3.277')] +[2023-02-25 14:11:22,797][00869] Fps is (10 sec: 2867.2, 60 sec: 1146.9, 300 sec: 1146.9). Total num frames: 28672. Throughput: 0: 267.4. Samples: 6684. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 14:11:22,803][00869] Avg episode reward: [(0, '3.820')] +[2023-02-25 14:11:25,946][10880] Updated weights for policy 0, policy_version 10 (0.0018) +[2023-02-25 14:11:27,797][00869] Fps is (10 sec: 2867.2, 60 sec: 1501.9, 300 sec: 1501.9). Total num frames: 45056. Throughput: 0: 372.9. Samples: 11186. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:11:27,799][00869] Avg episode reward: [(0, '4.325')] +[2023-02-25 14:11:32,797][00869] Fps is (10 sec: 4096.0, 60 sec: 1989.5, 300 sec: 1989.5). Total num frames: 69632. Throughput: 0: 516.2. Samples: 18066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:11:32,798][00869] Avg episode reward: [(0, '4.403')] +[2023-02-25 14:11:35,049][10880] Updated weights for policy 0, policy_version 20 (0.0016) +[2023-02-25 14:11:37,797][00869] Fps is (10 sec: 4505.4, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 90112. Throughput: 0: 540.9. Samples: 21638. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:11:37,801][00869] Avg episode reward: [(0, '4.304')] +[2023-02-25 14:11:42,797][00869] Fps is (10 sec: 3686.4, 60 sec: 2366.6, 300 sec: 2366.6). Total num frames: 106496. Throughput: 0: 596.7. Samples: 26852. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:11:42,799][00869] Avg episode reward: [(0, '4.234')] +[2023-02-25 14:11:42,801][10866] Saving new best policy, reward=4.234! +[2023-02-25 14:11:47,369][10880] Updated weights for policy 0, policy_version 30 (0.0042) +[2023-02-25 14:11:47,797][00869] Fps is (10 sec: 3276.9, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 122880. Throughput: 0: 705.0. Samples: 31726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:11:47,804][00869] Avg episode reward: [(0, '4.236')] +[2023-02-25 14:11:47,814][10866] Saving new best policy, reward=4.236! +[2023-02-25 14:11:52,797][00869] Fps is (10 sec: 4096.0, 60 sec: 2681.0, 300 sec: 2681.0). Total num frames: 147456. Throughput: 0: 784.7. Samples: 35310. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-25 14:11:52,799][00869] Avg episode reward: [(0, '4.222')] +[2023-02-25 14:11:55,740][10880] Updated weights for policy 0, policy_version 40 (0.0014) +[2023-02-25 14:11:57,797][00869] Fps is (10 sec: 4505.6, 60 sec: 2798.9, 300 sec: 2798.9). Total num frames: 167936. Throughput: 0: 907.8. Samples: 42440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:11:57,806][00869] Avg episode reward: [(0, '4.356')] +[2023-02-25 14:11:57,908][10866] Saving new best policy, reward=4.356! +[2023-02-25 14:12:02,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 2772.7). Total num frames: 180224. Throughput: 0: 929.6. Samples: 46282. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:12:02,802][00869] Avg episode reward: [(0, '4.235')] +[2023-02-25 14:12:07,797][00869] Fps is (10 sec: 2457.6, 60 sec: 3208.8, 300 sec: 2750.2). Total num frames: 192512. Throughput: 0: 909.3. Samples: 47602. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:12:07,799][00869] Avg episode reward: [(0, '4.222')] +[2023-02-25 14:12:12,142][10880] Updated weights for policy 0, policy_version 50 (0.0027) +[2023-02-25 14:12:12,797][00869] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 2730.7). Total num frames: 204800. Throughput: 0: 889.6. Samples: 51216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:12:12,803][00869] Avg episode reward: [(0, '4.205')] +[2023-02-25 14:12:17,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 2867.2). Total num frames: 229376. Throughput: 0: 890.4. Samples: 58134. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:12:17,799][00869] Avg episode reward: [(0, '4.345')] +[2023-02-25 14:12:21,221][10880] Updated weights for policy 0, policy_version 60 (0.0024) +[2023-02-25 14:12:22,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 2939.5). Total num frames: 249856. Throughput: 0: 890.9. Samples: 61730. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:12:22,803][00869] Avg episode reward: [(0, '4.548')] +[2023-02-25 14:12:22,806][10866] Saving new best policy, reward=4.548! +[2023-02-25 14:12:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 2912.7). Total num frames: 262144. Throughput: 0: 871.7. Samples: 66078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:12:27,800][00869] Avg episode reward: [(0, '4.497')] +[2023-02-25 14:12:27,817][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000064_262144.pth... +[2023-02-25 14:12:32,797][00869] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 2975.0). Total num frames: 282624. Throughput: 0: 885.2. Samples: 71560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:12:32,801][00869] Avg episode reward: [(0, '4.391')] +[2023-02-25 14:12:33,190][10880] Updated weights for policy 0, policy_version 70 (0.0036) +[2023-02-25 14:12:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3072.0). Total num frames: 307200. Throughput: 0: 880.2. Samples: 74918. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:12:37,799][00869] Avg episode reward: [(0, '4.391')] +[2023-02-25 14:12:42,797][00869] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3081.8). Total num frames: 323584. Throughput: 0: 865.1. Samples: 81368. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:12:42,799][00869] Avg episode reward: [(0, '4.431')] +[2023-02-25 14:12:43,232][10880] Updated weights for policy 0, policy_version 80 (0.0014) +[2023-02-25 14:12:47,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3090.6). Total num frames: 339968. Throughput: 0: 873.7. Samples: 85598. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:12:47,800][00869] Avg episode reward: [(0, '4.305')] +[2023-02-25 14:12:52,799][00869] Fps is (10 sec: 3685.6, 60 sec: 3549.7, 300 sec: 3134.3). Total num frames: 360448. Throughput: 0: 900.8. Samples: 88142. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:12:52,802][00869] Avg episode reward: [(0, '4.326')] +[2023-02-25 14:12:54,581][10880] Updated weights for policy 0, policy_version 90 (0.0036) +[2023-02-25 14:12:57,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3174.4). Total num frames: 380928. Throughput: 0: 972.9. Samples: 94998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:12:57,805][00869] Avg episode reward: [(0, '4.393')] +[2023-02-25 14:13:02,797][00869] Fps is (10 sec: 3687.2, 60 sec: 3618.1, 300 sec: 3178.5). Total num frames: 397312. Throughput: 0: 939.2. Samples: 100398. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:13:02,801][00869] Avg episode reward: [(0, '4.378')] +[2023-02-25 14:13:06,177][10880] Updated weights for policy 0, policy_version 100 (0.0011) +[2023-02-25 14:13:07,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3182.3). Total num frames: 413696. Throughput: 0: 906.0. Samples: 102500. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:13:07,803][00869] Avg episode reward: [(0, '4.464')] +[2023-02-25 14:13:12,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3216.1). Total num frames: 434176. Throughput: 0: 929.8. Samples: 107920. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:13:12,803][00869] Avg episode reward: [(0, '4.392')] +[2023-02-25 14:13:16,035][10880] Updated weights for policy 0, policy_version 110 (0.0021) +[2023-02-25 14:13:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3276.8). Total num frames: 458752. Throughput: 0: 963.6. Samples: 114922. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:13:17,799][00869] Avg episode reward: [(0, '4.398')] +[2023-02-25 14:13:22,797][00869] Fps is (10 sec: 4095.8, 60 sec: 3754.6, 300 sec: 3276.8). Total num frames: 475136. Throughput: 0: 955.8. Samples: 117930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:13:22,803][00869] Avg episode reward: [(0, '4.502')] +[2023-02-25 14:13:27,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3249.5). Total num frames: 487424. Throughput: 0: 910.0. Samples: 122316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:13:27,802][00869] Avg episode reward: [(0, '4.451')] +[2023-02-25 14:13:27,950][10880] Updated weights for policy 0, policy_version 120 (0.0023) +[2023-02-25 14:13:32,800][00869] Fps is (10 sec: 3685.3, 60 sec: 3822.7, 300 sec: 3303.2). Total num frames: 512000. Throughput: 0: 949.1. Samples: 128310. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:13:32,803][00869] Avg episode reward: [(0, '4.635')] +[2023-02-25 14:13:32,813][10866] Saving new best policy, reward=4.635! +[2023-02-25 14:13:37,224][10880] Updated weights for policy 0, policy_version 130 (0.0023) +[2023-02-25 14:13:37,802][00869] Fps is (10 sec: 4503.4, 60 sec: 3754.4, 300 sec: 3327.9). Total num frames: 532480. Throughput: 0: 966.7. Samples: 131644. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:13:37,804][00869] Avg episode reward: [(0, '4.633')] +[2023-02-25 14:13:42,803][00869] Fps is (10 sec: 3685.3, 60 sec: 3754.3, 300 sec: 3326.3). Total num frames: 548864. Throughput: 0: 943.8. Samples: 137476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:13:42,806][00869] Avg episode reward: [(0, '4.481')] +[2023-02-25 14:13:47,797][00869] Fps is (10 sec: 3278.2, 60 sec: 3754.6, 300 sec: 3325.0). Total num frames: 565248. Throughput: 0: 920.8. Samples: 141836. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:13:47,801][00869] Avg episode reward: [(0, '4.402')] +[2023-02-25 14:13:49,764][10880] Updated weights for policy 0, policy_version 140 (0.0013) +[2023-02-25 14:13:52,797][00869] Fps is (10 sec: 3688.8, 60 sec: 3754.8, 300 sec: 3347.0). Total num frames: 585728. Throughput: 0: 940.1. Samples: 144806. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:13:52,806][00869] Avg episode reward: [(0, '4.349')] +[2023-02-25 14:13:57,797][00869] Fps is (10 sec: 4505.8, 60 sec: 3822.9, 300 sec: 3390.6). Total num frames: 610304. Throughput: 0: 974.4. Samples: 151770. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:13:57,799][00869] Avg episode reward: [(0, '4.285')] +[2023-02-25 14:13:58,669][10880] Updated weights for policy 0, policy_version 150 (0.0016) +[2023-02-25 14:14:02,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3387.5). Total num frames: 626688. Throughput: 0: 938.1. Samples: 157138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:14:02,804][00869] Avg episode reward: [(0, '4.453')] +[2023-02-25 14:14:07,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3363.0). Total num frames: 638976. Throughput: 0: 919.5. Samples: 159306. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:14:07,803][00869] Avg episode reward: [(0, '4.510')] +[2023-02-25 14:14:11,129][10880] Updated weights for policy 0, policy_version 160 (0.0020) +[2023-02-25 14:14:12,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3381.8). Total num frames: 659456. Throughput: 0: 948.9. Samples: 165018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:12,800][00869] Avg episode reward: [(0, '4.592')] +[2023-02-25 14:14:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3420.2). Total num frames: 684032. Throughput: 0: 968.9. Samples: 171908. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:14:17,802][00869] Avg episode reward: [(0, '4.643')] +[2023-02-25 14:14:17,813][10866] Saving new best policy, reward=4.643! +[2023-02-25 14:14:21,010][10880] Updated weights for policy 0, policy_version 170 (0.0012) +[2023-02-25 14:14:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3416.7). Total num frames: 700416. Throughput: 0: 951.6. Samples: 174462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:22,800][00869] Avg episode reward: [(0, '4.356')] +[2023-02-25 14:14:27,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3393.8). Total num frames: 712704. Throughput: 0: 917.3. Samples: 178748. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:27,802][00869] Avg episode reward: [(0, '4.419')] +[2023-02-25 14:14:27,821][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth... +[2023-02-25 14:14:32,599][10880] Updated weights for policy 0, policy_version 180 (0.0016) +[2023-02-25 14:14:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.9, 300 sec: 3429.2). Total num frames: 737280. Throughput: 0: 957.2. Samples: 184908. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:32,799][00869] Avg episode reward: [(0, '4.470')] +[2023-02-25 14:14:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3755.0, 300 sec: 3444.4). Total num frames: 757760. Throughput: 0: 967.9. Samples: 188362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:14:37,801][00869] Avg episode reward: [(0, '4.714')] +[2023-02-25 14:14:37,819][10866] Saving new best policy, reward=4.714! +[2023-02-25 14:14:42,799][00869] Fps is (10 sec: 3685.4, 60 sec: 3754.9, 300 sec: 3440.6). Total num frames: 774144. Throughput: 0: 934.2. Samples: 193810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:14:42,803][00869] Avg episode reward: [(0, '4.617')] +[2023-02-25 14:14:43,443][10880] Updated weights for policy 0, policy_version 190 (0.0013) +[2023-02-25 14:14:47,797][00869] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3437.1). Total num frames: 790528. Throughput: 0: 912.4. Samples: 198194. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:14:47,806][00869] Avg episode reward: [(0, '4.775')] +[2023-02-25 14:14:47,817][10866] Saving new best policy, reward=4.775! +[2023-02-25 14:14:52,797][00869] Fps is (10 sec: 3687.4, 60 sec: 3754.7, 300 sec: 3451.1). Total num frames: 811008. Throughput: 0: 934.5. Samples: 201358. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:52,805][00869] Avg episode reward: [(0, '4.456')] +[2023-02-25 14:14:54,218][10880] Updated weights for policy 0, policy_version 200 (0.0014) +[2023-02-25 14:14:57,799][00869] Fps is (10 sec: 4504.5, 60 sec: 3754.5, 300 sec: 3481.6). Total num frames: 835584. Throughput: 0: 962.6. Samples: 208336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:14:57,804][00869] Avg episode reward: [(0, '4.336')] +[2023-02-25 14:15:02,799][00869] Fps is (10 sec: 3685.5, 60 sec: 3686.3, 300 sec: 3460.7). Total num frames: 847872. Throughput: 0: 919.2. Samples: 213272. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:15:02,804][00869] Avg episode reward: [(0, '4.364')] +[2023-02-25 14:15:05,787][10880] Updated weights for policy 0, policy_version 210 (0.0012) +[2023-02-25 14:15:07,797][00869] Fps is (10 sec: 2867.9, 60 sec: 3754.7, 300 sec: 3457.0). Total num frames: 864256. Throughput: 0: 910.9. Samples: 215454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:15:07,802][00869] Avg episode reward: [(0, '4.440')] +[2023-02-25 14:15:12,797][00869] Fps is (10 sec: 4096.8, 60 sec: 3822.9, 300 sec: 3485.6). Total num frames: 888832. Throughput: 0: 952.5. Samples: 221612. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:15:12,803][00869] Avg episode reward: [(0, '4.897')] +[2023-02-25 14:15:12,806][10866] Saving new best policy, reward=4.897! +[2023-02-25 14:15:15,420][10880] Updated weights for policy 0, policy_version 220 (0.0016) +[2023-02-25 14:15:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3497.4). Total num frames: 909312. Throughput: 0: 967.2. Samples: 228430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:15:17,801][00869] Avg episode reward: [(0, '4.827')] +[2023-02-25 14:15:22,797][00869] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3493.2). Total num frames: 925696. Throughput: 0: 941.4. Samples: 230726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:15:22,804][00869] Avg episode reward: [(0, '4.535')] +[2023-02-25 14:15:27,677][10880] Updated weights for policy 0, policy_version 230 (0.0041) +[2023-02-25 14:15:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3489.2). Total num frames: 942080. Throughput: 0: 918.9. Samples: 235156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:15:27,799][00869] Avg episode reward: [(0, '4.378')] +[2023-02-25 14:15:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3500.2). Total num frames: 962560. Throughput: 0: 966.5. Samples: 241686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:15:32,799][00869] Avg episode reward: [(0, '4.495')] +[2023-02-25 14:15:37,798][00869] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3496.2). Total num frames: 978944. Throughput: 0: 961.4. Samples: 244620. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:15:37,804][00869] Avg episode reward: [(0, '4.352')] +[2023-02-25 14:15:37,981][10880] Updated weights for policy 0, policy_version 240 (0.0025) +[2023-02-25 14:15:42,802][00869] Fps is (10 sec: 2865.6, 60 sec: 3618.0, 300 sec: 3477.9). Total num frames: 991232. Throughput: 0: 888.2. Samples: 248308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:15:42,804][00869] Avg episode reward: [(0, '4.413')] +[2023-02-25 14:15:47,797][00869] Fps is (10 sec: 2457.5, 60 sec: 3549.9, 300 sec: 3460.4). Total num frames: 1003520. Throughput: 0: 853.4. Samples: 251672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:15:47,804][00869] Avg episode reward: [(0, '4.375')] +[2023-02-25 14:15:52,797][00869] Fps is (10 sec: 2868.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1019904. Throughput: 0: 851.6. Samples: 253778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:15:52,799][00869] Avg episode reward: [(0, '4.556')] +[2023-02-25 14:15:53,006][10880] Updated weights for policy 0, policy_version 250 (0.0012) +[2023-02-25 14:15:57,797][00869] Fps is (10 sec: 4096.1, 60 sec: 3481.7, 300 sec: 3540.6). Total num frames: 1044480. Throughput: 0: 865.9. Samples: 260578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:15:57,799][00869] Avg episode reward: [(0, '4.631')] +[2023-02-25 14:16:02,700][10880] Updated weights for policy 0, policy_version 260 (0.0011) +[2023-02-25 14:16:02,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3618.3, 300 sec: 3610.1). Total num frames: 1064960. Throughput: 0: 849.4. Samples: 266652. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:16:02,801][00869] Avg episode reward: [(0, '4.712')] +[2023-02-25 14:16:07,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1077248. Throughput: 0: 846.3. Samples: 268808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:16:07,799][00869] Avg episode reward: [(0, '4.650')] +[2023-02-25 14:16:12,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 1097728. Throughput: 0: 857.3. Samples: 273736. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:16:12,799][00869] Avg episode reward: [(0, '4.730')] +[2023-02-25 14:16:14,595][10880] Updated weights for policy 0, policy_version 270 (0.0013) +[2023-02-25 14:16:17,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3693.3). Total num frames: 1118208. Throughput: 0: 865.6. Samples: 280640. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:16:17,799][00869] Avg episode reward: [(0, '4.560')] +[2023-02-25 14:16:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 1138688. Throughput: 0: 875.3. Samples: 284010. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:16:22,801][00869] Avg episode reward: [(0, '4.659')] +[2023-02-25 14:16:25,133][10880] Updated weights for policy 0, policy_version 280 (0.0024) +[2023-02-25 14:16:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 1150976. Throughput: 0: 889.8. Samples: 288344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:16:27,804][00869] Avg episode reward: [(0, '4.722')] +[2023-02-25 14:16:27,851][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000282_1155072.pth... +[2023-02-25 14:16:27,987][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000064_262144.pth +[2023-02-25 14:16:32,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 1171456. Throughput: 0: 933.9. Samples: 293696. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:16:32,805][00869] Avg episode reward: [(0, '4.911')] +[2023-02-25 14:16:32,810][10866] Saving new best policy, reward=4.911! +[2023-02-25 14:16:35,929][10880] Updated weights for policy 0, policy_version 290 (0.0019) +[2023-02-25 14:16:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 1196032. Throughput: 0: 960.9. Samples: 297020. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:16:37,799][00869] Avg episode reward: [(0, '4.733')] +[2023-02-25 14:16:42,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3686.7, 300 sec: 3693.3). Total num frames: 1212416. Throughput: 0: 948.3. Samples: 303250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:16:42,803][00869] Avg episode reward: [(0, '4.732')] +[2023-02-25 14:16:47,802][00869] Fps is (10 sec: 2865.8, 60 sec: 3686.1, 300 sec: 3651.6). Total num frames: 1224704. Throughput: 0: 908.8. Samples: 307552. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:16:47,810][00869] Avg episode reward: [(0, '4.777')] +[2023-02-25 14:16:48,052][10880] Updated weights for policy 0, policy_version 300 (0.0022) +[2023-02-25 14:16:52,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 1245184. Throughput: 0: 916.9. Samples: 310068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:16:52,802][00869] Avg episode reward: [(0, '4.865')] +[2023-02-25 14:16:57,797][00869] Fps is (10 sec: 4098.1, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1265664. Throughput: 0: 954.1. Samples: 316670. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:16:57,804][00869] Avg episode reward: [(0, '4.829')] +[2023-02-25 14:16:57,837][10880] Updated weights for policy 0, policy_version 310 (0.0030) +[2023-02-25 14:17:02,798][00869] Fps is (10 sec: 4095.4, 60 sec: 3686.3, 300 sec: 3707.2). Total num frames: 1286144. Throughput: 0: 921.3. Samples: 322098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:17:02,804][00869] Avg episode reward: [(0, '5.038')] +[2023-02-25 14:17:02,808][10866] Saving new best policy, reward=5.038! +[2023-02-25 14:17:07,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 1298432. Throughput: 0: 892.2. Samples: 324158. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:17:07,803][00869] Avg episode reward: [(0, '5.227')] +[2023-02-25 14:17:07,821][10866] Saving new best policy, reward=5.227! +[2023-02-25 14:17:10,602][10880] Updated weights for policy 0, policy_version 320 (0.0024) +[2023-02-25 14:17:12,797][00869] Fps is (10 sec: 3277.3, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 1318912. Throughput: 0: 914.2. Samples: 329482. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:17:12,799][00869] Avg episode reward: [(0, '5.404')] +[2023-02-25 14:17:12,807][10866] Saving new best policy, reward=5.404! +[2023-02-25 14:17:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 1343488. Throughput: 0: 952.8. Samples: 336572. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:17:17,802][00869] Avg episode reward: [(0, '5.184')] +[2023-02-25 14:17:19,289][10880] Updated weights for policy 0, policy_version 330 (0.0012) +[2023-02-25 14:17:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 1359872. Throughput: 0: 946.4. Samples: 339610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:17:22,804][00869] Avg episode reward: [(0, '5.135')] +[2023-02-25 14:17:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 1376256. Throughput: 0: 909.1. Samples: 344160. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:17:27,800][00869] Avg episode reward: [(0, '5.558')] +[2023-02-25 14:17:27,813][10866] Saving new best policy, reward=5.558! +[2023-02-25 14:17:31,330][10880] Updated weights for policy 0, policy_version 340 (0.0020) +[2023-02-25 14:17:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 1396736. Throughput: 0: 947.2. Samples: 350170. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:17:32,799][00869] Avg episode reward: [(0, '5.400')] +[2023-02-25 14:17:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 1421312. Throughput: 0: 968.2. Samples: 353638. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:17:37,799][00869] Avg episode reward: [(0, '5.048')] +[2023-02-25 14:17:40,452][10880] Updated weights for policy 0, policy_version 350 (0.0017) +[2023-02-25 14:17:42,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 1437696. Throughput: 0: 957.3. Samples: 359748. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:17:42,800][00869] Avg episode reward: [(0, '4.961')] +[2023-02-25 14:17:47,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3823.3, 300 sec: 3707.3). Total num frames: 1454080. Throughput: 0: 936.5. Samples: 364238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:17:47,803][00869] Avg episode reward: [(0, '5.058')] +[2023-02-25 14:17:52,234][10880] Updated weights for policy 0, policy_version 360 (0.0021) +[2023-02-25 14:17:52,799][00869] Fps is (10 sec: 3685.6, 60 sec: 3822.8, 300 sec: 3707.2). Total num frames: 1474560. Throughput: 0: 955.2. Samples: 367142. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:17:52,807][00869] Avg episode reward: [(0, '5.536')] +[2023-02-25 14:17:57,797][00869] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3735.0). Total num frames: 1499136. Throughput: 0: 992.4. Samples: 374138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:17:57,804][00869] Avg episode reward: [(0, '5.489')] +[2023-02-25 14:18:02,607][10880] Updated weights for policy 0, policy_version 370 (0.0013) +[2023-02-25 14:18:02,799][00869] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 1515520. Throughput: 0: 954.7. Samples: 379538. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:18:02,808][00869] Avg episode reward: [(0, '5.563')] +[2023-02-25 14:18:02,815][10866] Saving new best policy, reward=5.563! +[2023-02-25 14:18:07,797][00869] Fps is (10 sec: 2867.3, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 1527808. Throughput: 0: 934.8. Samples: 381678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:18:07,806][00869] Avg episode reward: [(0, '5.742')] +[2023-02-25 14:18:07,820][10866] Saving new best policy, reward=5.742! +[2023-02-25 14:18:12,797][00869] Fps is (10 sec: 3687.3, 60 sec: 3891.2, 300 sec: 3707.2). Total num frames: 1552384. Throughput: 0: 958.6. Samples: 387296. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:18:12,804][00869] Avg episode reward: [(0, '6.126')] +[2023-02-25 14:18:12,808][10866] Saving new best policy, reward=6.126! +[2023-02-25 14:18:13,784][10880] Updated weights for policy 0, policy_version 380 (0.0012) +[2023-02-25 14:18:17,800][00869] Fps is (10 sec: 4504.2, 60 sec: 3822.7, 300 sec: 3721.1). Total num frames: 1572864. Throughput: 0: 978.8. Samples: 394220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:18:17,802][00869] Avg episode reward: [(0, '6.725')] +[2023-02-25 14:18:17,819][10866] Saving new best policy, reward=6.725! +[2023-02-25 14:18:22,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 1589248. Throughput: 0: 961.6. Samples: 396908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:18:22,801][00869] Avg episode reward: [(0, '6.577')] +[2023-02-25 14:18:24,729][10880] Updated weights for policy 0, policy_version 390 (0.0016) +[2023-02-25 14:18:27,797][00869] Fps is (10 sec: 3277.9, 60 sec: 3822.9, 300 sec: 3707.3). Total num frames: 1605632. Throughput: 0: 923.8. Samples: 401318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:18:27,799][00869] Avg episode reward: [(0, '6.363')] +[2023-02-25 14:18:27,807][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000392_1605632.pth... +[2023-02-25 14:18:27,973][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth +[2023-02-25 14:18:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3707.3). Total num frames: 1626112. Throughput: 0: 959.1. Samples: 407396. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:18:32,802][00869] Avg episode reward: [(0, '6.888')] +[2023-02-25 14:18:32,808][10866] Saving new best policy, reward=6.888! +[2023-02-25 14:18:34,875][10880] Updated weights for policy 0, policy_version 400 (0.0021) +[2023-02-25 14:18:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3735.1). Total num frames: 1650688. Throughput: 0: 969.5. Samples: 410766. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:18:37,802][00869] Avg episode reward: [(0, '7.576')] +[2023-02-25 14:18:37,814][10866] Saving new best policy, reward=7.576! +[2023-02-25 14:18:42,800][00869] Fps is (10 sec: 4094.5, 60 sec: 3822.7, 300 sec: 3735.0). Total num frames: 1667072. Throughput: 0: 936.6. Samples: 416290. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:18:42,806][00869] Avg episode reward: [(0, '7.583')] +[2023-02-25 14:18:42,809][10866] Saving new best policy, reward=7.583! +[2023-02-25 14:18:47,224][10880] Updated weights for policy 0, policy_version 410 (0.0026) +[2023-02-25 14:18:47,798][00869] Fps is (10 sec: 2866.8, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 1679360. Throughput: 0: 913.0. Samples: 420622. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:18:47,805][00869] Avg episode reward: [(0, '7.331')] +[2023-02-25 14:18:52,797][00869] Fps is (10 sec: 3277.9, 60 sec: 3754.8, 300 sec: 3693.3). Total num frames: 1699840. Throughput: 0: 934.8. Samples: 423746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:18:52,804][00869] Avg episode reward: [(0, '7.185')] +[2023-02-25 14:18:56,510][10880] Updated weights for policy 0, policy_version 420 (0.0022) +[2023-02-25 14:18:57,797][00869] Fps is (10 sec: 4506.2, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 1724416. Throughput: 0: 965.1. Samples: 430724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:18:57,799][00869] Avg episode reward: [(0, '7.023')] +[2023-02-25 14:19:02,797][00869] Fps is (10 sec: 4095.8, 60 sec: 3754.8, 300 sec: 3735.0). Total num frames: 1740800. Throughput: 0: 925.4. Samples: 435862. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:19:02,802][00869] Avg episode reward: [(0, '7.321')] +[2023-02-25 14:19:07,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 1753088. Throughput: 0: 913.7. Samples: 438026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:07,810][00869] Avg episode reward: [(0, '7.775')] +[2023-02-25 14:19:07,821][10866] Saving new best policy, reward=7.775! +[2023-02-25 14:19:09,021][10880] Updated weights for policy 0, policy_version 430 (0.0044) +[2023-02-25 14:19:12,797][00869] Fps is (10 sec: 3686.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 1777664. Throughput: 0: 946.3. Samples: 443902. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:19:12,806][00869] Avg episode reward: [(0, '8.114')] +[2023-02-25 14:19:12,810][10866] Saving new best policy, reward=8.114! +[2023-02-25 14:19:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3754.9, 300 sec: 3721.1). Total num frames: 1798144. Throughput: 0: 964.1. Samples: 450780. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:19:17,804][00869] Avg episode reward: [(0, '9.286')] +[2023-02-25 14:19:17,820][10866] Saving new best policy, reward=9.286! +[2023-02-25 14:19:18,138][10880] Updated weights for policy 0, policy_version 440 (0.0022) +[2023-02-25 14:19:22,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 1814528. Throughput: 0: 940.7. Samples: 453098. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-25 14:19:22,807][00869] Avg episode reward: [(0, '9.391')] +[2023-02-25 14:19:22,814][10866] Saving new best policy, reward=9.391! +[2023-02-25 14:19:27,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 1826816. Throughput: 0: 916.7. Samples: 457538. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-25 14:19:27,801][00869] Avg episode reward: [(0, '9.945')] +[2023-02-25 14:19:27,909][10866] Saving new best policy, reward=9.945! +[2023-02-25 14:19:30,560][10880] Updated weights for policy 0, policy_version 450 (0.0025) +[2023-02-25 14:19:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 1851392. Throughput: 0: 953.3. Samples: 463518. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:19:32,799][00869] Avg episode reward: [(0, '9.842')] +[2023-02-25 14:19:37,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3693.4). Total num frames: 1863680. Throughput: 0: 929.8. Samples: 465586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:37,799][00869] Avg episode reward: [(0, '9.690')] +[2023-02-25 14:19:42,800][00869] Fps is (10 sec: 2456.7, 60 sec: 3481.6, 300 sec: 3679.4). Total num frames: 1875968. Throughput: 0: 860.8. Samples: 469462. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:42,804][00869] Avg episode reward: [(0, '9.212')] +[2023-02-25 14:19:44,785][10880] Updated weights for policy 0, policy_version 460 (0.0020) +[2023-02-25 14:19:47,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 1892352. Throughput: 0: 837.7. Samples: 473560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:47,800][00869] Avg episode reward: [(0, '9.005')] +[2023-02-25 14:19:52,797][00869] Fps is (10 sec: 3278.0, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 1908736. Throughput: 0: 849.3. Samples: 476244. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:52,799][00869] Avg episode reward: [(0, '8.747')] +[2023-02-25 14:19:55,334][10880] Updated weights for policy 0, policy_version 470 (0.0019) +[2023-02-25 14:19:57,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3679.5). Total num frames: 1933312. Throughput: 0: 877.9. Samples: 483408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:19:57,802][00869] Avg episode reward: [(0, '9.162')] +[2023-02-25 14:20:02,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 1953792. Throughput: 0: 849.3. Samples: 488998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:20:02,802][00869] Avg episode reward: [(0, '9.239')] +[2023-02-25 14:20:06,973][10880] Updated weights for policy 0, policy_version 480 (0.0013) +[2023-02-25 14:20:07,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1966080. Throughput: 0: 845.2. Samples: 491132. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:20:07,803][00869] Avg episode reward: [(0, '9.224')] +[2023-02-25 14:20:12,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 1986560. Throughput: 0: 870.4. Samples: 496708. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:20:12,800][00869] Avg episode reward: [(0, '9.736')] +[2023-02-25 14:20:16,279][10880] Updated weights for policy 0, policy_version 490 (0.0024) +[2023-02-25 14:20:17,797][00869] Fps is (10 sec: 4505.5, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 2011136. Throughput: 0: 893.0. Samples: 503702. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:20:17,799][00869] Avg episode reward: [(0, '10.480')] +[2023-02-25 14:20:17,816][10866] Saving new best policy, reward=10.480! +[2023-02-25 14:20:22,798][00869] Fps is (10 sec: 4095.7, 60 sec: 3549.8, 300 sec: 3679.4). Total num frames: 2027520. Throughput: 0: 907.3. Samples: 506416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:20:22,799][00869] Avg episode reward: [(0, '10.989')] +[2023-02-25 14:20:22,803][10866] Saving new best policy, reward=10.989! +[2023-02-25 14:20:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 2043904. Throughput: 0: 918.9. Samples: 510808. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:20:27,800][00869] Avg episode reward: [(0, '10.774')] +[2023-02-25 14:20:27,808][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000499_2043904.pth... +[2023-02-25 14:20:28,014][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000282_1155072.pth +[2023-02-25 14:20:28,852][10880] Updated weights for policy 0, policy_version 500 (0.0021) +[2023-02-25 14:20:32,797][00869] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 2064384. Throughput: 0: 961.1. Samples: 516810. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:20:32,800][00869] Avg episode reward: [(0, '10.623')] +[2023-02-25 14:20:37,783][10880] Updated weights for policy 0, policy_version 510 (0.0017) +[2023-02-25 14:20:37,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3721.2). Total num frames: 2088960. Throughput: 0: 978.3. Samples: 520266. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:20:37,799][00869] Avg episode reward: [(0, '11.155')] +[2023-02-25 14:20:37,808][10866] Saving new best policy, reward=11.155! +[2023-02-25 14:20:42,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3823.2, 300 sec: 3735.0). Total num frames: 2105344. Throughput: 0: 945.4. Samples: 525950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:20:42,803][00869] Avg episode reward: [(0, '12.215')] +[2023-02-25 14:20:42,805][10866] Saving new best policy, reward=12.215! +[2023-02-25 14:20:47,797][00869] Fps is (10 sec: 2867.1, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2117632. Throughput: 0: 920.5. Samples: 530422. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:20:47,801][00869] Avg episode reward: [(0, '12.576')] +[2023-02-25 14:20:47,813][10866] Saving new best policy, reward=12.576! +[2023-02-25 14:20:50,292][10880] Updated weights for policy 0, policy_version 520 (0.0013) +[2023-02-25 14:20:52,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 2138112. Throughput: 0: 939.1. Samples: 533392. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:20:52,803][00869] Avg episode reward: [(0, '11.429')] +[2023-02-25 14:20:57,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 2162688. Throughput: 0: 971.2. Samples: 540414. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:20:57,801][00869] Avg episode reward: [(0, '10.668')] +[2023-02-25 14:20:59,263][10880] Updated weights for policy 0, policy_version 530 (0.0012) +[2023-02-25 14:21:02,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2179072. Throughput: 0: 930.4. Samples: 545572. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:21:02,799][00869] Avg episode reward: [(0, '11.095')] +[2023-02-25 14:21:07,797][00869] Fps is (10 sec: 2867.0, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 2191360. Throughput: 0: 918.4. Samples: 547742. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:21:07,807][00869] Avg episode reward: [(0, '10.997')] +[2023-02-25 14:21:11,523][10880] Updated weights for policy 0, policy_version 540 (0.0027) +[2023-02-25 14:21:12,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 2215936. Throughput: 0: 950.1. Samples: 553562. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:21:12,804][00869] Avg episode reward: [(0, '11.660')] +[2023-02-25 14:21:17,797][00869] Fps is (10 sec: 4915.5, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 2240512. Throughput: 0: 976.8. Samples: 560766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:21:17,799][00869] Avg episode reward: [(0, '12.335')] +[2023-02-25 14:21:21,362][10880] Updated weights for policy 0, policy_version 550 (0.0026) +[2023-02-25 14:21:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3748.9). Total num frames: 2256896. Throughput: 0: 953.8. Samples: 563188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:21:22,800][00869] Avg episode reward: [(0, '11.839')] +[2023-02-25 14:21:27,797][00869] Fps is (10 sec: 2867.1, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2269184. Throughput: 0: 925.4. Samples: 567592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:21:27,799][00869] Avg episode reward: [(0, '12.035')] +[2023-02-25 14:21:32,654][10880] Updated weights for policy 0, policy_version 560 (0.0013) +[2023-02-25 14:21:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 2293760. Throughput: 0: 967.7. Samples: 573968. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:21:32,800][00869] Avg episode reward: [(0, '13.546')] +[2023-02-25 14:21:32,801][10866] Saving new best policy, reward=13.546! +[2023-02-25 14:21:37,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2314240. Throughput: 0: 976.3. Samples: 577324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:21:37,802][00869] Avg episode reward: [(0, '13.323')] +[2023-02-25 14:21:42,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2330624. Throughput: 0: 942.2. Samples: 582812. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:21:42,801][00869] Avg episode reward: [(0, '14.215')] +[2023-02-25 14:21:42,806][10866] Saving new best policy, reward=14.215! +[2023-02-25 14:21:43,623][10880] Updated weights for policy 0, policy_version 570 (0.0013) +[2023-02-25 14:21:47,800][00869] Fps is (10 sec: 3275.7, 60 sec: 3822.7, 300 sec: 3735.0). Total num frames: 2347008. Throughput: 0: 924.0. Samples: 587154. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:21:47,805][00869] Avg episode reward: [(0, '16.039')] +[2023-02-25 14:21:47,819][10866] Saving new best policy, reward=16.039! +[2023-02-25 14:21:52,797][00869] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 2367488. Throughput: 0: 948.7. Samples: 590434. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:21:52,806][00869] Avg episode reward: [(0, '16.511')] +[2023-02-25 14:21:52,809][10866] Saving new best policy, reward=16.511! +[2023-02-25 14:21:54,135][10880] Updated weights for policy 0, policy_version 580 (0.0016) +[2023-02-25 14:21:57,797][00869] Fps is (10 sec: 4507.1, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 2392064. Throughput: 0: 971.8. Samples: 597292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:21:57,802][00869] Avg episode reward: [(0, '17.123')] +[2023-02-25 14:21:57,813][10866] Saving new best policy, reward=17.123! +[2023-02-25 14:22:02,797][00869] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2404352. Throughput: 0: 918.9. Samples: 602118. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:22:02,804][00869] Avg episode reward: [(0, '17.584')] +[2023-02-25 14:22:02,809][10866] Saving new best policy, reward=17.584! +[2023-02-25 14:22:06,621][10880] Updated weights for policy 0, policy_version 590 (0.0013) +[2023-02-25 14:22:07,797][00869] Fps is (10 sec: 2457.6, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2416640. Throughput: 0: 909.6. Samples: 604118. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:22:07,805][00869] Avg episode reward: [(0, '17.414')] +[2023-02-25 14:22:12,797][00869] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2441216. Throughput: 0: 944.0. Samples: 610070. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:22:12,799][00869] Avg episode reward: [(0, '16.833')] +[2023-02-25 14:22:15,887][10880] Updated weights for policy 0, policy_version 600 (0.0014) +[2023-02-25 14:22:17,797][00869] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2465792. Throughput: 0: 957.2. Samples: 617040. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:22:17,800][00869] Avg episode reward: [(0, '16.683')] +[2023-02-25 14:22:22,800][00869] Fps is (10 sec: 3685.2, 60 sec: 3686.2, 300 sec: 3735.0). Total num frames: 2478080. Throughput: 0: 930.4. Samples: 619196. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:22:22,803][00869] Avg episode reward: [(0, '16.506')] +[2023-02-25 14:22:27,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2494464. Throughput: 0: 904.8. Samples: 623530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:22:27,799][00869] Avg episode reward: [(0, '15.938')] +[2023-02-25 14:22:27,813][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000609_2494464.pth... +[2023-02-25 14:22:27,935][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000392_1605632.pth +[2023-02-25 14:22:28,424][10880] Updated weights for policy 0, policy_version 610 (0.0012) +[2023-02-25 14:22:32,797][00869] Fps is (10 sec: 4097.5, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2519040. Throughput: 0: 956.4. Samples: 630188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:22:32,804][00869] Avg episode reward: [(0, '15.992')] +[2023-02-25 14:22:37,023][10880] Updated weights for policy 0, policy_version 620 (0.0012) +[2023-02-25 14:22:37,802][00869] Fps is (10 sec: 4503.1, 60 sec: 3754.3, 300 sec: 3734.9). Total num frames: 2539520. Throughput: 0: 959.7. Samples: 633626. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:22:37,809][00869] Avg episode reward: [(0, '15.959')] +[2023-02-25 14:22:42,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2555904. Throughput: 0: 920.1. Samples: 638698. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:22:42,799][00869] Avg episode reward: [(0, '15.717')] +[2023-02-25 14:22:47,797][00869] Fps is (10 sec: 3278.6, 60 sec: 3754.9, 300 sec: 3721.1). Total num frames: 2572288. Throughput: 0: 919.6. Samples: 643498. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:22:47,799][00869] Avg episode reward: [(0, '16.029')] +[2023-02-25 14:22:49,523][10880] Updated weights for policy 0, policy_version 630 (0.0037) +[2023-02-25 14:22:52,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2592768. Throughput: 0: 951.6. Samples: 646942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:22:52,803][00869] Avg episode reward: [(0, '15.959')] +[2023-02-25 14:22:57,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2613248. Throughput: 0: 974.4. Samples: 653918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:22:57,799][00869] Avg episode reward: [(0, '16.744')] +[2023-02-25 14:22:59,165][10880] Updated weights for policy 0, policy_version 640 (0.0013) +[2023-02-25 14:23:02,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2629632. Throughput: 0: 918.2. Samples: 658360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:23:02,802][00869] Avg episode reward: [(0, '17.564')] +[2023-02-25 14:23:07,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 2646016. Throughput: 0: 919.8. Samples: 660584. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:23:07,799][00869] Avg episode reward: [(0, '18.772')] +[2023-02-25 14:23:07,814][10866] Saving new best policy, reward=18.772! +[2023-02-25 14:23:10,943][10880] Updated weights for policy 0, policy_version 650 (0.0016) +[2023-02-25 14:23:12,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 2666496. Throughput: 0: 966.1. Samples: 667004. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:23:12,809][00869] Avg episode reward: [(0, '19.958')] +[2023-02-25 14:23:12,849][10866] Saving new best policy, reward=19.958! +[2023-02-25 14:23:17,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2691072. Throughput: 0: 963.4. Samples: 673542. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:23:17,800][00869] Avg episode reward: [(0, '20.410')] +[2023-02-25 14:23:17,816][10866] Saving new best policy, reward=20.410! +[2023-02-25 14:23:21,373][10880] Updated weights for policy 0, policy_version 660 (0.0029) +[2023-02-25 14:23:22,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.9, 300 sec: 3721.1). Total num frames: 2703360. Throughput: 0: 935.3. Samples: 675708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:23:22,800][00869] Avg episode reward: [(0, '19.885')] +[2023-02-25 14:23:27,798][00869] Fps is (10 sec: 2867.0, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 2719744. Throughput: 0: 918.7. Samples: 680040. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:23:27,805][00869] Avg episode reward: [(0, '20.939')] +[2023-02-25 14:23:27,895][10866] Saving new best policy, reward=20.939! +[2023-02-25 14:23:32,381][10880] Updated weights for policy 0, policy_version 670 (0.0039) +[2023-02-25 14:23:32,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 2744320. Throughput: 0: 967.5. Samples: 687036. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:23:32,798][00869] Avg episode reward: [(0, '19.562')] +[2023-02-25 14:23:37,797][00869] Fps is (10 sec: 4505.9, 60 sec: 3755.0, 300 sec: 3721.2). Total num frames: 2764800. Throughput: 0: 967.7. Samples: 690490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:23:37,800][00869] Avg episode reward: [(0, '17.542')] +[2023-02-25 14:23:42,802][00869] Fps is (10 sec: 3684.4, 60 sec: 3754.3, 300 sec: 3734.9). Total num frames: 2781184. Throughput: 0: 919.9. Samples: 695320. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:23:42,806][00869] Avg episode reward: [(0, '18.462')] +[2023-02-25 14:23:43,837][10880] Updated weights for policy 0, policy_version 680 (0.0038) +[2023-02-25 14:23:47,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2797568. Throughput: 0: 929.5. Samples: 700186. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:23:47,799][00869] Avg episode reward: [(0, '18.920')] +[2023-02-25 14:23:52,797][00869] Fps is (10 sec: 4098.3, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 2822144. Throughput: 0: 957.2. Samples: 703656. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:23:52,804][00869] Avg episode reward: [(0, '19.371')] +[2023-02-25 14:23:53,776][10880] Updated weights for policy 0, policy_version 690 (0.0013) +[2023-02-25 14:23:57,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2838528. Throughput: 0: 954.8. Samples: 709970. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:23:57,804][00869] Avg episode reward: [(0, '19.194')] +[2023-02-25 14:24:02,798][00869] Fps is (10 sec: 2867.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2850816. Throughput: 0: 882.4. Samples: 713252. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:24:02,803][00869] Avg episode reward: [(0, '20.239')] +[2023-02-25 14:24:07,797][00869] Fps is (10 sec: 2457.5, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 2863104. Throughput: 0: 872.6. Samples: 714974. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:24:07,801][00869] Avg episode reward: [(0, '20.843')] +[2023-02-25 14:24:09,566][10880] Updated weights for policy 0, policy_version 700 (0.0030) +[2023-02-25 14:24:12,797][00869] Fps is (10 sec: 2867.4, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 2879488. Throughput: 0: 873.8. Samples: 719360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:24:12,799][00869] Avg episode reward: [(0, '20.546')] +[2023-02-25 14:24:17,797][00869] Fps is (10 sec: 3686.6, 60 sec: 3481.6, 300 sec: 3679.5). Total num frames: 2899968. Throughput: 0: 873.8. Samples: 726356. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:24:17,799][00869] Avg episode reward: [(0, '18.792')] +[2023-02-25 14:24:18,737][10880] Updated weights for policy 0, policy_version 710 (0.0025) +[2023-02-25 14:24:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 2920448. Throughput: 0: 870.6. Samples: 729666. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:24:22,799][00869] Avg episode reward: [(0, '18.789')] +[2023-02-25 14:24:27,799][00869] Fps is (10 sec: 3685.6, 60 sec: 3618.0, 300 sec: 3679.4). Total num frames: 2936832. Throughput: 0: 860.1. Samples: 734022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:24:27,801][00869] Avg episode reward: [(0, '18.369')] +[2023-02-25 14:24:27,820][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000717_2936832.pth... +[2023-02-25 14:24:27,980][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000499_2043904.pth +[2023-02-25 14:24:31,259][10880] Updated weights for policy 0, policy_version 720 (0.0038) +[2023-02-25 14:24:32,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3693.3). Total num frames: 2953216. Throughput: 0: 873.0. Samples: 739472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:24:32,799][00869] Avg episode reward: [(0, '18.638')] +[2023-02-25 14:24:37,797][00869] Fps is (10 sec: 4096.7, 60 sec: 3549.8, 300 sec: 3735.0). Total num frames: 2977792. Throughput: 0: 872.2. Samples: 742904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:24:37,804][00869] Avg episode reward: [(0, '21.388')] +[2023-02-25 14:24:37,816][10866] Saving new best policy, reward=21.388! +[2023-02-25 14:24:40,047][10880] Updated weights for policy 0, policy_version 730 (0.0017) +[2023-02-25 14:24:42,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3618.5, 300 sec: 3748.9). Total num frames: 2998272. Throughput: 0: 874.9. Samples: 749340. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:24:42,799][00869] Avg episode reward: [(0, '21.222')] +[2023-02-25 14:24:47,800][00869] Fps is (10 sec: 3276.0, 60 sec: 3549.7, 300 sec: 3735.0). Total num frames: 3010560. Throughput: 0: 901.4. Samples: 753818. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:24:47,803][00869] Avg episode reward: [(0, '21.830')] +[2023-02-25 14:24:47,815][10866] Saving new best policy, reward=21.830! +[2023-02-25 14:24:52,341][10880] Updated weights for policy 0, policy_version 740 (0.0019) +[2023-02-25 14:24:52,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3721.1). Total num frames: 3031040. Throughput: 0: 917.7. Samples: 756268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:24:52,804][00869] Avg episode reward: [(0, '23.445')] +[2023-02-25 14:24:52,807][10866] Saving new best policy, reward=23.445! +[2023-02-25 14:24:57,797][00869] Fps is (10 sec: 4097.3, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 3051520. Throughput: 0: 972.1. Samples: 763104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:24:57,799][00869] Avg episode reward: [(0, '22.757')] +[2023-02-25 14:25:01,969][10880] Updated weights for policy 0, policy_version 750 (0.0019) +[2023-02-25 14:25:02,797][00869] Fps is (10 sec: 4095.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 3072000. Throughput: 0: 946.8. Samples: 768964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:25:02,799][00869] Avg episode reward: [(0, '20.553')] +[2023-02-25 14:25:07,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3088384. Throughput: 0: 923.2. Samples: 771208. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:25:07,803][00869] Avg episode reward: [(0, '20.456')] +[2023-02-25 14:25:12,797][00869] Fps is (10 sec: 3277.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 3104768. Throughput: 0: 941.7. Samples: 776396. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:25:12,805][00869] Avg episode reward: [(0, '20.647')] +[2023-02-25 14:25:13,797][10880] Updated weights for policy 0, policy_version 760 (0.0015) +[2023-02-25 14:25:17,797][00869] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3129344. Throughput: 0: 977.2. Samples: 783446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:25:17,800][00869] Avg episode reward: [(0, '19.866')] +[2023-02-25 14:25:22,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3149824. Throughput: 0: 973.0. Samples: 786690. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:25:22,801][00869] Avg episode reward: [(0, '19.725')] +[2023-02-25 14:25:23,979][10880] Updated weights for policy 0, policy_version 770 (0.0012) +[2023-02-25 14:25:27,797][00869] Fps is (10 sec: 3276.7, 60 sec: 3754.8, 300 sec: 3721.1). Total num frames: 3162112. Throughput: 0: 927.4. Samples: 791074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:25:27,800][00869] Avg episode reward: [(0, '19.844')] +[2023-02-25 14:25:32,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 3182592. Throughput: 0: 954.5. Samples: 796768. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:25:32,806][00869] Avg episode reward: [(0, '21.503')] +[2023-02-25 14:25:34,874][10880] Updated weights for policy 0, policy_version 780 (0.0025) +[2023-02-25 14:25:37,797][00869] Fps is (10 sec: 4505.8, 60 sec: 3823.0, 300 sec: 3735.0). Total num frames: 3207168. Throughput: 0: 976.0. Samples: 800188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:25:37,806][00869] Avg episode reward: [(0, '21.780')] +[2023-02-25 14:25:42,798][00869] Fps is (10 sec: 4095.4, 60 sec: 3754.6, 300 sec: 3748.9). Total num frames: 3223552. Throughput: 0: 958.7. Samples: 806246. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:25:42,806][00869] Avg episode reward: [(0, '21.694')] +[2023-02-25 14:25:45,846][10880] Updated weights for policy 0, policy_version 790 (0.0013) +[2023-02-25 14:25:47,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3823.1, 300 sec: 3735.0). Total num frames: 3239936. Throughput: 0: 926.7. Samples: 810666. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:25:47,806][00869] Avg episode reward: [(0, '21.670')] +[2023-02-25 14:25:52,797][00869] Fps is (10 sec: 3687.0, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 3260416. Throughput: 0: 938.7. Samples: 813450. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:25:52,799][00869] Avg episode reward: [(0, '22.763')] +[2023-02-25 14:25:55,984][10880] Updated weights for policy 0, policy_version 800 (0.0013) +[2023-02-25 14:25:57,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 3284992. Throughput: 0: 980.7. Samples: 820526. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:25:57,799][00869] Avg episode reward: [(0, '20.548')] +[2023-02-25 14:26:02,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3762.8). Total num frames: 3301376. Throughput: 0: 946.5. Samples: 826040. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:26:02,803][00869] Avg episode reward: [(0, '20.069')] +[2023-02-25 14:26:07,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 3313664. Throughput: 0: 922.4. Samples: 828198. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:26:07,803][00869] Avg episode reward: [(0, '19.562')] +[2023-02-25 14:26:08,164][10880] Updated weights for policy 0, policy_version 810 (0.0018) +[2023-02-25 14:26:12,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 3334144. Throughput: 0: 946.9. Samples: 833686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:26:12,800][00869] Avg episode reward: [(0, '19.652')] +[2023-02-25 14:26:17,151][10880] Updated weights for policy 0, policy_version 820 (0.0015) +[2023-02-25 14:26:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3358720. Throughput: 0: 977.9. Samples: 840774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:26:17,799][00869] Avg episode reward: [(0, '19.835')] +[2023-02-25 14:26:22,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3375104. Throughput: 0: 965.1. Samples: 843616. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:26:22,802][00869] Avg episode reward: [(0, '19.940')] +[2023-02-25 14:26:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3721.1). Total num frames: 3391488. Throughput: 0: 927.9. Samples: 847998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:26:27,800][00869] Avg episode reward: [(0, '19.515')] +[2023-02-25 14:26:27,815][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000828_3391488.pth... +[2023-02-25 14:26:27,955][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000609_2494464.pth +[2023-02-25 14:26:29,703][10880] Updated weights for policy 0, policy_version 830 (0.0039) +[2023-02-25 14:26:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 3411968. Throughput: 0: 962.5. Samples: 853980. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:26:32,802][00869] Avg episode reward: [(0, '19.501')] +[2023-02-25 14:26:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3436544. Throughput: 0: 978.5. Samples: 857482. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:26:37,799][00869] Avg episode reward: [(0, '20.088')] +[2023-02-25 14:26:38,505][10880] Updated weights for policy 0, policy_version 840 (0.0015) +[2023-02-25 14:26:42,797][00869] Fps is (10 sec: 4095.9, 60 sec: 3823.0, 300 sec: 3748.9). Total num frames: 3452928. Throughput: 0: 950.9. Samples: 863316. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:26:42,800][00869] Avg episode reward: [(0, '19.055')] +[2023-02-25 14:26:47,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3469312. Throughput: 0: 927.7. Samples: 867786. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:26:47,800][00869] Avg episode reward: [(0, '18.729')] +[2023-02-25 14:26:50,669][10880] Updated weights for policy 0, policy_version 850 (0.0019) +[2023-02-25 14:26:52,797][00869] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 3489792. Throughput: 0: 948.3. Samples: 870870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:26:52,800][00869] Avg episode reward: [(0, '18.135')] +[2023-02-25 14:26:57,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3514368. Throughput: 0: 981.6. Samples: 877860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:26:57,804][00869] Avg episode reward: [(0, '17.973')] +[2023-02-25 14:27:00,041][10880] Updated weights for policy 0, policy_version 860 (0.0022) +[2023-02-25 14:27:02,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3526656. Throughput: 0: 938.1. Samples: 882990. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:27:02,803][00869] Avg episode reward: [(0, '18.856')] +[2023-02-25 14:27:07,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3543040. Throughput: 0: 924.5. Samples: 885218. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 14:27:07,806][00869] Avg episode reward: [(0, '19.888')] +[2023-02-25 14:27:12,025][10880] Updated weights for policy 0, policy_version 870 (0.0034) +[2023-02-25 14:27:12,797][00869] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3735.0). Total num frames: 3567616. Throughput: 0: 954.4. Samples: 890948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:27:12,798][00869] Avg episode reward: [(0, '20.569')] +[2023-02-25 14:27:17,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3588096. Throughput: 0: 977.7. Samples: 897978. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:27:17,799][00869] Avg episode reward: [(0, '22.271')] +[2023-02-25 14:27:22,087][10880] Updated weights for policy 0, policy_version 880 (0.0012) +[2023-02-25 14:27:22,797][00869] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3604480. Throughput: 0: 955.7. Samples: 900490. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:27:22,801][00869] Avg episode reward: [(0, '23.034')] +[2023-02-25 14:27:27,797][00869] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3620864. Throughput: 0: 924.3. Samples: 904908. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:27:27,806][00869] Avg episode reward: [(0, '22.238')] +[2023-02-25 14:27:32,797][00869] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3735.1). Total num frames: 3641344. Throughput: 0: 962.7. Samples: 911106. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 14:27:32,809][00869] Avg episode reward: [(0, '21.516')] +[2023-02-25 14:27:33,301][10880] Updated weights for policy 0, policy_version 890 (0.0012) +[2023-02-25 14:27:37,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3665920. Throughput: 0: 971.2. Samples: 914576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:27:37,799][00869] Avg episode reward: [(0, '20.989')] +[2023-02-25 14:27:42,802][00869] Fps is (10 sec: 3684.6, 60 sec: 3754.4, 300 sec: 3748.8). Total num frames: 3678208. Throughput: 0: 938.7. Samples: 920108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:27:42,808][00869] Avg episode reward: [(0, '20.617')] +[2023-02-25 14:27:44,265][10880] Updated weights for policy 0, policy_version 900 (0.0012) +[2023-02-25 14:27:47,798][00869] Fps is (10 sec: 2866.8, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 3694592. Throughput: 0: 925.4. Samples: 924634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:27:47,806][00869] Avg episode reward: [(0, '20.250')] +[2023-02-25 14:27:52,797][00869] Fps is (10 sec: 4098.0, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3719168. Throughput: 0: 946.6. Samples: 927814. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:27:52,802][00869] Avg episode reward: [(0, '18.669')] +[2023-02-25 14:27:54,455][10880] Updated weights for policy 0, policy_version 910 (0.0024) +[2023-02-25 14:27:57,797][00869] Fps is (10 sec: 4506.2, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3739648. Throughput: 0: 976.7. Samples: 934900. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:27:57,806][00869] Avg episode reward: [(0, '18.065')] +[2023-02-25 14:28:02,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3756032. Throughput: 0: 933.3. Samples: 939976. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:28:02,801][00869] Avg episode reward: [(0, '18.055')] +[2023-02-25 14:28:06,240][10880] Updated weights for policy 0, policy_version 920 (0.0030) +[2023-02-25 14:28:07,797][00869] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3772416. Throughput: 0: 927.2. Samples: 942216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:28:07,802][00869] Avg episode reward: [(0, '18.557')] +[2023-02-25 14:28:12,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3792896. Throughput: 0: 958.4. Samples: 948034. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:28:12,805][00869] Avg episode reward: [(0, '19.716')] +[2023-02-25 14:28:15,930][10880] Updated weights for policy 0, policy_version 930 (0.0026) +[2023-02-25 14:28:17,797][00869] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 3817472. Throughput: 0: 970.8. Samples: 954792. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:28:17,800][00869] Avg episode reward: [(0, '21.107')] +[2023-02-25 14:28:22,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3829760. Throughput: 0: 946.6. Samples: 957172. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:28:22,801][00869] Avg episode reward: [(0, '22.183')] +[2023-02-25 14:28:27,797][00869] Fps is (10 sec: 2867.0, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 3846144. Throughput: 0: 916.8. Samples: 961362. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:28:27,806][00869] Avg episode reward: [(0, '23.707')] +[2023-02-25 14:28:27,823][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000939_3846144.pth... +[2023-02-25 14:28:27,986][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000717_2936832.pth +[2023-02-25 14:28:28,003][10866] Saving new best policy, reward=23.707! +[2023-02-25 14:28:28,738][10880] Updated weights for policy 0, policy_version 940 (0.0020) +[2023-02-25 14:28:32,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3866624. Throughput: 0: 946.4. Samples: 967220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:28:32,803][00869] Avg episode reward: [(0, '23.187')] +[2023-02-25 14:28:37,797][00869] Fps is (10 sec: 4096.3, 60 sec: 3686.4, 300 sec: 3749.0). Total num frames: 3887104. Throughput: 0: 948.9. Samples: 970516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:28:37,804][00869] Avg episode reward: [(0, '21.678')] +[2023-02-25 14:28:38,308][10880] Updated weights for policy 0, policy_version 950 (0.0018) +[2023-02-25 14:28:42,797][00869] Fps is (10 sec: 3686.4, 60 sec: 3755.0, 300 sec: 3748.9). Total num frames: 3903488. Throughput: 0: 907.9. Samples: 975756. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 14:28:42,799][00869] Avg episode reward: [(0, '21.196')] +[2023-02-25 14:28:47,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3686.5, 300 sec: 3707.2). Total num frames: 3915776. Throughput: 0: 888.2. Samples: 979946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 14:28:47,802][00869] Avg episode reward: [(0, '21.524')] +[2023-02-25 14:28:51,059][10880] Updated weights for policy 0, policy_version 960 (0.0024) +[2023-02-25 14:28:52,801][00869] Fps is (10 sec: 3275.3, 60 sec: 3617.9, 300 sec: 3721.1). Total num frames: 3936256. Throughput: 0: 909.2. Samples: 983134. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 14:28:52,807][00869] Avg episode reward: [(0, '19.869')] +[2023-02-25 14:28:57,797][00869] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 3960832. Throughput: 0: 934.7. Samples: 990094. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:28:57,799][00869] Avg episode reward: [(0, '20.040')] +[2023-02-25 14:29:00,851][10880] Updated weights for policy 0, policy_version 970 (0.0028) +[2023-02-25 14:29:02,797][00869] Fps is (10 sec: 4097.8, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 3977216. Throughput: 0: 894.3. Samples: 995036. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 14:29:02,804][00869] Avg episode reward: [(0, '20.145')] +[2023-02-25 14:29:07,797][00869] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 3989504. Throughput: 0: 884.8. Samples: 996988. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 14:29:07,799][00869] Avg episode reward: [(0, '22.530')] +[2023-02-25 14:29:12,797][00869] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3735.0). Total num frames: 4001792. Throughput: 0: 872.2. Samples: 1000610. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 14:29:12,801][00869] Avg episode reward: [(0, '23.037')] +[2023-02-25 14:29:13,366][10866] Stopping Batcher_0... +[2023-02-25 14:29:13,367][10866] Loop batcher_evt_loop terminating... +[2023-02-25 14:29:13,368][00869] Component Batcher_0 stopped! +[2023-02-25 14:29:13,375][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 14:29:13,462][10880] Weights refcount: 2 0 +[2023-02-25 14:29:13,467][10880] Stopping InferenceWorker_p0-w0... +[2023-02-25 14:29:13,470][00869] Component InferenceWorker_p0-w0 stopped! +[2023-02-25 14:29:13,472][10880] Loop inference_proc0-0_evt_loop terminating... +[2023-02-25 14:29:13,505][10886] Stopping RolloutWorker_w5... +[2023-02-25 14:29:13,509][10886] Loop rollout_proc5_evt_loop terminating... +[2023-02-25 14:29:13,518][10884] Stopping RolloutWorker_w3... +[2023-02-25 14:29:13,519][10884] Loop rollout_proc3_evt_loop terminating... +[2023-02-25 14:29:13,511][00869] Component RolloutWorker_w5 stopped! +[2023-02-25 14:29:13,520][00869] Component RolloutWorker_w3 stopped! +[2023-02-25 14:29:13,524][00869] Component RolloutWorker_w1 stopped! +[2023-02-25 14:29:13,524][10882] Stopping RolloutWorker_w1... +[2023-02-25 14:29:13,529][10882] Loop rollout_proc1_evt_loop terminating... +[2023-02-25 14:29:13,538][00869] Component RolloutWorker_w6 stopped! +[2023-02-25 14:29:13,536][10887] Stopping RolloutWorker_w6... +[2023-02-25 14:29:13,557][10887] Loop rollout_proc6_evt_loop terminating... +[2023-02-25 14:29:13,554][00869] Component RolloutWorker_w7 stopped! +[2023-02-25 14:29:13,560][10888] Stopping RolloutWorker_w7... +[2023-02-25 14:29:13,567][00869] Component RolloutWorker_w0 stopped! +[2023-02-25 14:29:13,572][10888] Loop rollout_proc7_evt_loop terminating... +[2023-02-25 14:29:13,581][10883] Stopping RolloutWorker_w2... +[2023-02-25 14:29:13,568][10881] Stopping RolloutWorker_w0... +[2023-02-25 14:29:13,581][10883] Loop rollout_proc2_evt_loop terminating... +[2023-02-25 14:29:13,580][00869] Component RolloutWorker_w2 stopped! +[2023-02-25 14:29:13,590][10881] Loop rollout_proc0_evt_loop terminating... +[2023-02-25 14:29:13,620][10866] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000828_3391488.pth +[2023-02-25 14:29:13,628][10885] Stopping RolloutWorker_w4... +[2023-02-25 14:29:13,628][10885] Loop rollout_proc4_evt_loop terminating... +[2023-02-25 14:29:13,627][00869] Component RolloutWorker_w4 stopped! +[2023-02-25 14:29:13,641][10866] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 14:29:13,976][00869] Component LearnerWorker_p0 stopped! +[2023-02-25 14:29:13,981][00869] Waiting for process learner_proc0 to stop... +[2023-02-25 14:29:13,986][10866] Stopping LearnerWorker_p0... +[2023-02-25 14:29:13,987][10866] Loop learner_proc0_evt_loop terminating... +[2023-02-25 14:29:16,286][00869] Waiting for process inference_proc0-0 to join... +[2023-02-25 14:29:16,637][00869] Waiting for process rollout_proc0 to join... +[2023-02-25 14:29:17,176][00869] Waiting for process rollout_proc1 to join... +[2023-02-25 14:29:17,179][00869] Waiting for process rollout_proc2 to join... +[2023-02-25 14:29:17,181][00869] Waiting for process rollout_proc3 to join... +[2023-02-25 14:29:17,185][00869] Waiting for process rollout_proc4 to join... +[2023-02-25 14:29:17,186][00869] Waiting for process rollout_proc5 to join... +[2023-02-25 14:29:17,187][00869] Waiting for process rollout_proc6 to join... +[2023-02-25 14:29:17,188][00869] Waiting for process rollout_proc7 to join... +[2023-02-25 14:29:17,193][00869] Batcher 0 profile tree view: +batching: 25.8107, releasing_batches: 0.0260 +[2023-02-25 14:29:17,195][00869] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0126 + wait_policy_total: 539.3566 +update_model: 7.7651 + weight_update: 0.0026 +one_step: 0.0023 + handle_policy_step: 505.1338 + deserialize: 14.6392, stack: 2.9281, obs_to_device_normalize: 114.0070, forward: 239.9325, send_messages: 25.9876 + prepare_outputs: 82.4325 + to_cpu: 51.0361 +[2023-02-25 14:29:17,196][00869] Learner 0 profile tree view: +misc: 0.0063, prepare_batch: 15.5831 +train: 75.1513 + epoch_init: 0.0098, minibatch_init: 0.0169, losses_postprocess: 0.5101, kl_divergence: 0.5375, after_optimizer: 33.3234 + calculate_losses: 26.7133 + losses_init: 0.0033, forward_head: 1.6275, bptt_initial: 17.7678, tail: 1.0881, advantages_returns: 0.2904, losses: 3.3937 + bptt: 2.2422 + bptt_forward_core: 2.1779 + update: 13.4551 + clip: 1.3399 +[2023-02-25 14:29:17,199][00869] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.2869, enqueue_policy_requests: 148.5227, env_step: 818.1197, overhead: 20.8435, complete_rollouts: 7.0061 +save_policy_outputs: 20.0368 + split_output_tensors: 9.7787 +[2023-02-25 14:29:17,200][00869] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3380, enqueue_policy_requests: 146.1220, env_step: 820.6365, overhead: 20.3354, complete_rollouts: 7.0897 +save_policy_outputs: 20.3658 + split_output_tensors: 9.9400 +[2023-02-25 14:29:17,202][00869] Loop Runner_EvtLoop terminating... +[2023-02-25 14:29:17,204][00869] Runner profile tree view: +main_loop: 1122.0795 +[2023-02-25 14:29:17,206][00869] Collected {0: 4005888}, FPS: 3570.1 +[2023-02-25 14:29:17,324][00869] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 14:29:17,326][00869] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 14:29:17,330][00869] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 14:29:17,333][00869] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 14:29:17,336][00869] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 14:29:17,337][00869] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 14:29:17,341][00869] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 14:29:17,342][00869] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 14:29:17,344][00869] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-25 14:29:17,346][00869] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-25 14:29:17,348][00869] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 14:29:17,350][00869] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 14:29:17,351][00869] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 14:29:17,354][00869] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 14:29:17,355][00869] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 14:29:17,381][00869] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 14:29:17,383][00869] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 14:29:17,385][00869] RunningMeanStd input shape: (1,) +[2023-02-25 14:29:17,406][00869] ConvEncoder: input_channels=3 +[2023-02-25 14:29:18,077][00869] Conv encoder output size: 512 +[2023-02-25 14:29:18,079][00869] Policy head output size: 512 +[2023-02-25 14:29:21,060][00869] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 14:29:22,683][00869] Num frames 100... +[2023-02-25 14:29:22,803][00869] Num frames 200... +[2023-02-25 14:29:22,917][00869] Num frames 300... +[2023-02-25 14:29:23,039][00869] Num frames 400... +[2023-02-25 14:29:23,158][00869] Num frames 500... +[2023-02-25 14:29:23,290][00869] Num frames 600... +[2023-02-25 14:29:23,416][00869] Num frames 700... +[2023-02-25 14:29:23,535][00869] Num frames 800... +[2023-02-25 14:29:23,624][00869] Avg episode rewards: #0: 19.230, true rewards: #0: 8.230 +[2023-02-25 14:29:23,626][00869] Avg episode reward: 19.230, avg true_objective: 8.230 +[2023-02-25 14:29:23,719][00869] Num frames 900... +[2023-02-25 14:29:23,837][00869] Num frames 1000... +[2023-02-25 14:29:23,980][00869] Avg episode rewards: #0: 10.895, true rewards: #0: 5.395 +[2023-02-25 14:29:23,982][00869] Avg episode reward: 10.895, avg true_objective: 5.395 +[2023-02-25 14:29:24,012][00869] Num frames 1100... +[2023-02-25 14:29:24,140][00869] Num frames 1200... +[2023-02-25 14:29:24,256][00869] Num frames 1300... +[2023-02-25 14:29:24,366][00869] Num frames 1400... +[2023-02-25 14:29:24,480][00869] Num frames 1500... +[2023-02-25 14:29:24,597][00869] Num frames 1600... +[2023-02-25 14:29:24,707][00869] Num frames 1700... +[2023-02-25 14:29:24,825][00869] Num frames 1800... +[2023-02-25 14:29:24,937][00869] Num frames 1900... +[2023-02-25 14:29:25,059][00869] Num frames 2000... +[2023-02-25 14:29:25,124][00869] Avg episode rewards: #0: 13.357, true rewards: #0: 6.690 +[2023-02-25 14:29:25,125][00869] Avg episode reward: 13.357, avg true_objective: 6.690 +[2023-02-25 14:29:25,233][00869] Num frames 2100... +[2023-02-25 14:29:25,349][00869] Num frames 2200... +[2023-02-25 14:29:25,467][00869] Num frames 2300... +[2023-02-25 14:29:25,624][00869] Avg episode rewards: #0: 10.978, true rewards: #0: 5.977 +[2023-02-25 14:29:25,627][00869] Avg episode reward: 10.978, avg true_objective: 5.977 +[2023-02-25 14:29:25,640][00869] Num frames 2400... +[2023-02-25 14:29:25,758][00869] Num frames 2500... +[2023-02-25 14:29:25,873][00869] Num frames 2600... +[2023-02-25 14:29:25,995][00869] Num frames 2700... +[2023-02-25 14:29:26,115][00869] Num frames 2800... +[2023-02-25 14:29:26,231][00869] Num frames 2900... +[2023-02-25 14:29:26,355][00869] Num frames 3000... +[2023-02-25 14:29:26,466][00869] Num frames 3100... +[2023-02-25 14:29:26,580][00869] Num frames 3200... +[2023-02-25 14:29:26,697][00869] Num frames 3300... +[2023-02-25 14:29:26,810][00869] Num frames 3400... +[2023-02-25 14:29:26,893][00869] Avg episode rewards: #0: 13.230, true rewards: #0: 6.830 +[2023-02-25 14:29:26,895][00869] Avg episode reward: 13.230, avg true_objective: 6.830 +[2023-02-25 14:29:26,997][00869] Num frames 3500... +[2023-02-25 14:29:27,128][00869] Num frames 3600... +[2023-02-25 14:29:27,243][00869] Num frames 3700... +[2023-02-25 14:29:27,364][00869] Num frames 3800... +[2023-02-25 14:29:27,477][00869] Num frames 3900... +[2023-02-25 14:29:27,604][00869] Num frames 4000... +[2023-02-25 14:29:27,716][00869] Num frames 4100... +[2023-02-25 14:29:27,836][00869] Num frames 4200... +[2023-02-25 14:29:27,950][00869] Num frames 4300... +[2023-02-25 14:29:28,072][00869] Num frames 4400... +[2023-02-25 14:29:28,195][00869] Num frames 4500... +[2023-02-25 14:29:28,315][00869] Num frames 4600... +[2023-02-25 14:29:28,427][00869] Num frames 4700... +[2023-02-25 14:29:28,550][00869] Num frames 4800... +[2023-02-25 14:29:28,668][00869] Num frames 4900... +[2023-02-25 14:29:28,798][00869] Num frames 5000... +[2023-02-25 14:29:28,861][00869] Avg episode rewards: #0: 17.340, true rewards: #0: 8.340 +[2023-02-25 14:29:28,863][00869] Avg episode reward: 17.340, avg true_objective: 8.340 +[2023-02-25 14:29:28,985][00869] Num frames 5100... +[2023-02-25 14:29:29,122][00869] Num frames 5200... +[2023-02-25 14:29:29,252][00869] Num frames 5300... +[2023-02-25 14:29:29,371][00869] Num frames 5400... +[2023-02-25 14:29:29,490][00869] Num frames 5500... +[2023-02-25 14:29:29,611][00869] Num frames 5600... +[2023-02-25 14:29:29,725][00869] Num frames 5700... +[2023-02-25 14:29:29,843][00869] Num frames 5800... +[2023-02-25 14:29:29,954][00869] Num frames 5900... +[2023-02-25 14:29:30,072][00869] Num frames 6000... +[2023-02-25 14:29:30,190][00869] Num frames 6100... +[2023-02-25 14:29:30,309][00869] Num frames 6200... +[2023-02-25 14:29:30,421][00869] Num frames 6300... +[2023-02-25 14:29:30,565][00869] Avg episode rewards: #0: 19.533, true rewards: #0: 9.104 +[2023-02-25 14:29:30,570][00869] Avg episode reward: 19.533, avg true_objective: 9.104 +[2023-02-25 14:29:30,603][00869] Num frames 6400... +[2023-02-25 14:29:30,717][00869] Num frames 6500... +[2023-02-25 14:29:30,835][00869] Num frames 6600... +[2023-02-25 14:29:30,949][00869] Num frames 6700... +[2023-02-25 14:29:31,064][00869] Num frames 6800... +[2023-02-25 14:29:31,190][00869] Num frames 6900... +[2023-02-25 14:29:31,304][00869] Num frames 7000... +[2023-02-25 14:29:31,427][00869] Num frames 7100... +[2023-02-25 14:29:31,545][00869] Num frames 7200... +[2023-02-25 14:29:31,659][00869] Num frames 7300... +[2023-02-25 14:29:31,778][00869] Num frames 7400... +[2023-02-25 14:29:31,895][00869] Num frames 7500... +[2023-02-25 14:29:32,008][00869] Num frames 7600... +[2023-02-25 14:29:32,126][00869] Num frames 7700... +[2023-02-25 14:29:32,246][00869] Num frames 7800... +[2023-02-25 14:29:32,378][00869] Num frames 7900... +[2023-02-25 14:29:32,539][00869] Num frames 8000... +[2023-02-25 14:29:32,703][00869] Num frames 8100... +[2023-02-25 14:29:32,865][00869] Num frames 8200... +[2023-02-25 14:29:33,038][00869] Num frames 8300... +[2023-02-25 14:29:33,201][00869] Num frames 8400... +[2023-02-25 14:29:33,391][00869] Avg episode rewards: #0: 23.716, true rewards: #0: 10.591 +[2023-02-25 14:29:33,394][00869] Avg episode reward: 23.716, avg true_objective: 10.591 +[2023-02-25 14:29:33,446][00869] Num frames 8500... +[2023-02-25 14:29:33,612][00869] Num frames 8600... +[2023-02-25 14:29:33,769][00869] Num frames 8700... +[2023-02-25 14:29:33,933][00869] Num frames 8800... +[2023-02-25 14:29:34,103][00869] Num frames 8900... +[2023-02-25 14:29:34,290][00869] Num frames 9000... +[2023-02-25 14:29:34,452][00869] Num frames 9100... +[2023-02-25 14:29:34,623][00869] Num frames 9200... +[2023-02-25 14:29:34,792][00869] Num frames 9300... +[2023-02-25 14:29:34,952][00869] Num frames 9400... +[2023-02-25 14:29:35,131][00869] Num frames 9500... +[2023-02-25 14:29:35,289][00869] Num frames 9600... +[2023-02-25 14:29:35,373][00869] Avg episode rewards: #0: 23.908, true rewards: #0: 10.686 +[2023-02-25 14:29:35,376][00869] Avg episode reward: 23.908, avg true_objective: 10.686 +[2023-02-25 14:29:35,507][00869] Num frames 9700... +[2023-02-25 14:29:35,674][00869] Num frames 9800... +[2023-02-25 14:29:35,832][00869] Num frames 9900... +[2023-02-25 14:29:35,955][00869] Num frames 10000... +[2023-02-25 14:29:36,069][00869] Num frames 10100... +[2023-02-25 14:29:36,198][00869] Num frames 10200... +[2023-02-25 14:29:36,310][00869] Num frames 10300... +[2023-02-25 14:29:36,435][00869] Num frames 10400... +[2023-02-25 14:29:36,548][00869] Num frames 10500... +[2023-02-25 14:29:36,666][00869] Num frames 10600... +[2023-02-25 14:29:36,784][00869] Num frames 10700... +[2023-02-25 14:29:36,897][00869] Num frames 10800... +[2023-02-25 14:29:37,024][00869] Num frames 10900... +[2023-02-25 14:29:37,143][00869] Avg episode rewards: #0: 24.550, true rewards: #0: 10.950 +[2023-02-25 14:29:37,145][00869] Avg episode reward: 24.550, avg true_objective: 10.950 +[2023-02-25 14:30:40,273][00869] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-25 14:31:07,217][00869] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 14:31:07,219][00869] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 14:31:07,222][00869] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 14:31:07,223][00869] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 14:31:07,226][00869] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 14:31:07,228][00869] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 14:31:07,229][00869] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-25 14:31:07,232][00869] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 14:31:07,233][00869] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-25 14:31:07,234][00869] Adding new argument 'hf_repository'='chist/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-25 14:31:07,235][00869] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 14:31:07,236][00869] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 14:31:07,238][00869] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 14:31:07,239][00869] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 14:31:07,241][00869] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 14:31:07,268][00869] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 14:31:07,270][00869] RunningMeanStd input shape: (1,) +[2023-02-25 14:31:07,283][00869] ConvEncoder: input_channels=3 +[2023-02-25 14:31:07,319][00869] Conv encoder output size: 512 +[2023-02-25 14:31:07,320][00869] Policy head output size: 512 +[2023-02-25 14:31:07,340][00869] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 14:31:07,801][00869] Num frames 100... +[2023-02-25 14:31:07,922][00869] Num frames 200... +[2023-02-25 14:31:08,044][00869] Num frames 300... +[2023-02-25 14:31:08,154][00869] Num frames 400... +[2023-02-25 14:31:08,272][00869] Num frames 500... +[2023-02-25 14:31:08,423][00869] Num frames 600... +[2023-02-25 14:31:08,593][00869] Num frames 700... +[2023-02-25 14:31:08,755][00869] Num frames 800... +[2023-02-25 14:31:08,913][00869] Num frames 900... +[2023-02-25 14:31:09,118][00869] Avg episode rewards: #0: 21.980, true rewards: #0: 9.980 +[2023-02-25 14:31:09,122][00869] Avg episode reward: 21.980, avg true_objective: 9.980 +[2023-02-25 14:31:09,129][00869] Num frames 1000... +[2023-02-25 14:31:09,285][00869] Num frames 1100... +[2023-02-25 14:31:09,455][00869] Num frames 1200... +[2023-02-25 14:31:09,619][00869] Num frames 1300... +[2023-02-25 14:31:09,775][00869] Num frames 1400... +[2023-02-25 14:31:09,935][00869] Num frames 1500... +[2023-02-25 14:31:10,097][00869] Num frames 1600... +[2023-02-25 14:31:10,262][00869] Avg episode rewards: #0: 16.850, true rewards: #0: 8.350 +[2023-02-25 14:31:10,265][00869] Avg episode reward: 16.850, avg true_objective: 8.350 +[2023-02-25 14:31:10,321][00869] Num frames 1700... +[2023-02-25 14:31:10,476][00869] Num frames 1800... +[2023-02-25 14:31:10,639][00869] Num frames 1900... +[2023-02-25 14:31:10,807][00869] Num frames 2000... +[2023-02-25 14:31:10,968][00869] Num frames 2100... +[2023-02-25 14:31:11,169][00869] Avg episode rewards: #0: 13.607, true rewards: #0: 7.273 +[2023-02-25 14:31:11,171][00869] Avg episode reward: 13.607, avg true_objective: 7.273 +[2023-02-25 14:31:11,204][00869] Num frames 2200... +[2023-02-25 14:31:11,370][00869] Num frames 2300... +[2023-02-25 14:31:11,534][00869] Num frames 2400... +[2023-02-25 14:31:11,703][00869] Num frames 2500... +[2023-02-25 14:31:11,854][00869] Num frames 2600... +[2023-02-25 14:31:11,967][00869] Num frames 2700... +[2023-02-25 14:31:12,086][00869] Num frames 2800... +[2023-02-25 14:31:12,193][00869] Num frames 2900... +[2023-02-25 14:31:12,310][00869] Avg episode rewards: #0: 14.625, true rewards: #0: 7.375 +[2023-02-25 14:31:12,312][00869] Avg episode reward: 14.625, avg true_objective: 7.375 +[2023-02-25 14:31:12,369][00869] Num frames 3000... +[2023-02-25 14:31:12,483][00869] Num frames 3100... +[2023-02-25 14:31:12,599][00869] Num frames 3200... +[2023-02-25 14:31:12,717][00869] Num frames 3300... +[2023-02-25 14:31:12,876][00869] Avg episode rewards: #0: 12.796, true rewards: #0: 6.796 +[2023-02-25 14:31:12,881][00869] Avg episode reward: 12.796, avg true_objective: 6.796 +[2023-02-25 14:31:12,886][00869] Num frames 3400... +[2023-02-25 14:31:13,000][00869] Num frames 3500... +[2023-02-25 14:31:13,116][00869] Num frames 3600... +[2023-02-25 14:31:13,226][00869] Num frames 3700... +[2023-02-25 14:31:13,342][00869] Num frames 3800... +[2023-02-25 14:31:13,451][00869] Num frames 3900... +[2023-02-25 14:31:13,566][00869] Num frames 4000... +[2023-02-25 14:31:13,685][00869] Num frames 4100... +[2023-02-25 14:31:13,797][00869] Num frames 4200... +[2023-02-25 14:31:13,961][00869] Avg episode rewards: #0: 13.490, true rewards: #0: 7.157 +[2023-02-25 14:31:13,963][00869] Avg episode reward: 13.490, avg true_objective: 7.157 +[2023-02-25 14:31:13,975][00869] Num frames 4300... +[2023-02-25 14:31:14,095][00869] Num frames 4400... +[2023-02-25 14:31:14,208][00869] Num frames 4500... +[2023-02-25 14:31:14,329][00869] Num frames 4600... +[2023-02-25 14:31:14,443][00869] Num frames 4700... +[2023-02-25 14:31:14,562][00869] Num frames 4800... +[2023-02-25 14:31:14,694][00869] Num frames 4900... +[2023-02-25 14:31:14,817][00869] Num frames 5000... +[2023-02-25 14:31:14,937][00869] Num frames 5100... +[2023-02-25 14:31:15,060][00869] Num frames 5200... +[2023-02-25 14:31:15,174][00869] Num frames 5300... +[2023-02-25 14:31:15,296][00869] Num frames 5400... +[2023-02-25 14:31:15,410][00869] Num frames 5500... +[2023-02-25 14:31:15,531][00869] Num frames 5600... +[2023-02-25 14:31:15,657][00869] Num frames 5700... +[2023-02-25 14:31:15,769][00869] Avg episode rewards: #0: 16.777, true rewards: #0: 8.206 +[2023-02-25 14:31:15,770][00869] Avg episode reward: 16.777, avg true_objective: 8.206 +[2023-02-25 14:31:15,837][00869] Num frames 5800... +[2023-02-25 14:31:15,954][00869] Num frames 5900... +[2023-02-25 14:31:16,070][00869] Num frames 6000... +[2023-02-25 14:31:16,187][00869] Num frames 6100... +[2023-02-25 14:31:16,301][00869] Num frames 6200... +[2023-02-25 14:31:16,420][00869] Num frames 6300... +[2023-02-25 14:31:16,532][00869] Num frames 6400... +[2023-02-25 14:31:16,657][00869] Num frames 6500... +[2023-02-25 14:31:16,772][00869] Avg episode rewards: #0: 16.941, true rewards: #0: 8.191 +[2023-02-25 14:31:16,774][00869] Avg episode reward: 16.941, avg true_objective: 8.191 +[2023-02-25 14:31:16,845][00869] Num frames 6600... +[2023-02-25 14:31:16,963][00869] Num frames 6700... +[2023-02-25 14:31:17,077][00869] Num frames 6800... +[2023-02-25 14:31:17,193][00869] Num frames 6900... +[2023-02-25 14:31:17,304][00869] Num frames 7000... +[2023-02-25 14:31:17,419][00869] Num frames 7100... +[2023-02-25 14:31:17,510][00869] Avg episode rewards: #0: 16.143, true rewards: #0: 7.921 +[2023-02-25 14:31:17,513][00869] Avg episode reward: 16.143, avg true_objective: 7.921 +[2023-02-25 14:31:17,591][00869] Num frames 7200... +[2023-02-25 14:31:17,712][00869] Num frames 7300... +[2023-02-25 14:31:17,827][00869] Num frames 7400... +[2023-02-25 14:31:17,943][00869] Num frames 7500... +[2023-02-25 14:31:18,060][00869] Num frames 7600... +[2023-02-25 14:31:18,173][00869] Num frames 7700... +[2023-02-25 14:31:18,287][00869] Num frames 7800... +[2023-02-25 14:31:18,408][00869] Num frames 7900... +[2023-02-25 14:31:18,518][00869] Num frames 8000... +[2023-02-25 14:31:18,644][00869] Num frames 8100... +[2023-02-25 14:31:18,764][00869] Num frames 8200... +[2023-02-25 14:31:18,883][00869] Num frames 8300... +[2023-02-25 14:31:19,004][00869] Num frames 8400... +[2023-02-25 14:31:19,123][00869] Num frames 8500... +[2023-02-25 14:31:19,241][00869] Num frames 8600... +[2023-02-25 14:31:19,358][00869] Num frames 8700... +[2023-02-25 14:31:19,482][00869] Num frames 8800... +[2023-02-25 14:31:19,601][00869] Num frames 8900... +[2023-02-25 14:31:19,731][00869] Num frames 9000... +[2023-02-25 14:31:19,845][00869] Num frames 9100... +[2023-02-25 14:31:19,964][00869] Num frames 9200... +[2023-02-25 14:31:20,054][00869] Avg episode rewards: #0: 20.329, true rewards: #0: 9.229 +[2023-02-25 14:31:20,056][00869] Avg episode reward: 20.329, avg true_objective: 9.229 +[2023-02-25 14:32:13,809][00869] Replay video saved to /content/train_dir/default_experiment/replay.mp4!